From 7ee13677c8f119746ee07057540d57f65b9c3fd8 Mon Sep 17 00:00:00 2001 From: meissner Date: Fri, 13 Nov 2015 20:02:56 +0000 Subject: [gcc] 2015-11-13 Michael Meissner * config/rs6000/constraints.md (we constraint): New constraint for 64-bit power9 vector support. (wL constraint): New constraint for the element in a vector that can be addressed by the MFVSRLD instruction. * config/rs6000/rs6000-protos.h (convert_float128_to_int): Add declaration. (convert_int_to_float128): Likewise. (rs6000_generate_compare): Add support for ISA 3.0 (power9) hardware support for IEEE 128-bit floating point. (rs6000_expand_float128_convert): Likewise. (convert_float128_to_int): Likewise. (convert_int_to_float128): Likewise. * config/rs6000/rs6000.md (UNSPEC_ROUND_TO_ODD): New unspecs for ISA 3.0 hardware IEEE 128-bit floating point. (UNSPEC_IEEE128_MOVE): Likewise. (UNSPEC_IEEE128_CONVERT): Likewise. (FMA_F): Add support for IEEE 128-bit floating point hardware support. (Ff): Add support for DImode. (Fv): Likewise. (any_fix code iterator): New and updated iterators for IEEE 128-bit floating point hardware support. (any_float code iterator): Likewise. (s code attribute): Likewise. (su code attribute): Likewise. (az code attribute): Likewise. (uns code attribute): Likewise. (neg2, FLOAT128 iterator): Add support for IEEE 128-bit floating point hardware support. (abs2, FLOAT128 iterator): Likewise. (add3, IEEE128 iterator): New insns for IEEE 128-bit floating point hardware. (sub3, IEEE128 iterator): Likewise. (mul3, IEEE128 iterator): Likewise. (div3, IEEE128 iterator): Likewise. (copysign3, IEEE128 iterator): Likewise. (sqrt2, IEEE128 iterator): Likewise. (neg2, IEEE128 iterator): Likewise. (abs2, IEEE128 iterator): Likewise. (nabs2, IEEE128 iterator): Likewise. (fma4_hw, IEEE128 iterator): Likewise. (fms4_hw, IEEE128 iterator): Likewise. (nfma4_hw, IEEE128 iterator): Likewise. (nfms4_hw, IEEE128 iterator): Likewise. (extend2_hw): Likewise. (truncdf2_hw, IEEE128 iterator): Likewise. (truncsf2_hw, IEEE128 iterator): Likewise. (fix_fixuns code attribute): Likewise. (float_floatuns code attribute): Likewise. (fix_si2_hw): Likewise. (fix_di2_hw): Likewise. (float_si2_hw): Likewise. (float_di2_hw): Likewise. (xscvqpwz_): Likewise. (xscvqpdz_): Likewise. (xscvdqp_df2_odd): Likewise. (cmp_h): Likewise. (128-bit GPR splitters): Don't split a 128-bit move that is a direct move between GPR and vector registers using ISA 3.0 direct move instructions. (mul3): Add support for the ISA 3.0 integer multiply-add instruction. * config/rs6000/rs6000.c (rs6000_debug_reg_global): Add ISA 3.0 debugging. (rs6000_init_hard_regno_mode_ok): If ISA 3.0 and 64-bit, enable we constraint. Disable the VSX<->GPR direct move helpers if we have the MFVSRLD and MTVSRDD instructions. (rs6000_secondary_reload_simple_move): Add support for doing vector direct moves directly without additional scratch registers if we have ISA 3.0 instructions. (rs6000_secondary_reload_direct_move): Update comments. (rs6000_output_move_128bit): Add support for ISA 3.0 vector instructions. * config/rs6000/vsx.md (vsx_mov): Add support for ISA 3.0 direct move instructions. (vsx_movti_64bit): Likewise. (vsx_extract_): Likewise. * config/rs6000/rs6000.h (VECTOR_ELEMENT_MFVSRLD_64BIT): New macros for ISA 3.0 direct move instructions. (TARGET_DIRECT_MOVE_128): Likewise. (TARGET_MADDLD): Add support for the ISA 3.0 integer multiply-add instruction. * doc/md.texi (RS/6000 constraints): Document we, wF, wG, wL constraints. Update wa documentation to say not to use %x on instructions that only take Altivec registers. [gcc/testsuite] 2015-11-13 Michael Meissner * gcc.target/powerpc/float128-hw.c: New test for IEEE 128-bit hardware floating point support. * gcc.target/powerpc/direct-move-vector.c: New test for 128-bit vector direct move instructions. * gcc.target/powerpc/maddld.c: New test. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@230342 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 99 +++++ gcc/config/rs6000/constraints.md | 9 +- gcc/config/rs6000/rs6000-protos.h | 2 + gcc/config/rs6000/rs6000.c | 279 +++++++++++++- gcc/config/rs6000/rs6000.h | 8 + gcc/config/rs6000/rs6000.md | 411 ++++++++++++++++++++- gcc/config/rs6000/vsx.md | 28 +- gcc/doc/md.texi | 29 ++ gcc/testsuite/ChangeLog | 10 + .../gcc.target/powerpc/direct-move-vector.c | 33 ++ gcc/testsuite/gcc.target/powerpc/float128-hw.c | 18 + gcc/testsuite/gcc.target/powerpc/maddld.c | 20 + 12 files changed, 901 insertions(+), 45 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/direct-move-vector.c create mode 100644 gcc/testsuite/gcc.target/powerpc/float128-hw.c create mode 100644 gcc/testsuite/gcc.target/powerpc/maddld.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a170b6015fa..5cd3c7e1887 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,102 @@ +2015-11-13 Michael Meissner + + * config/rs6000/constraints.md (we constraint): New constraint for + 64-bit power9 vector support. + (wL constraint): New constraint for the element in a vector that + can be addressed by the MFVSRLD instruction. + + * config/rs6000/rs6000-protos.h (convert_float128_to_int): Add + declaration. + (convert_int_to_float128): Likewise. + (rs6000_generate_compare): Add support for ISA 3.0 (power9) + hardware support for IEEE 128-bit floating point. + (rs6000_expand_float128_convert): Likewise. + (convert_float128_to_int): Likewise. + (convert_int_to_float128): Likewise. + + * config/rs6000/rs6000.md (UNSPEC_ROUND_TO_ODD): New unspecs for + ISA 3.0 hardware IEEE 128-bit floating point. + (UNSPEC_IEEE128_MOVE): Likewise. + (UNSPEC_IEEE128_CONVERT): Likewise. + (FMA_F): Add support for IEEE 128-bit floating point hardware + support. + (Ff): Add support for DImode. + (Fv): Likewise. + (any_fix code iterator): New and updated iterators for IEEE + 128-bit floating point hardware support. + (any_float code iterator): Likewise. + (s code attribute): Likewise. + (su code attribute): Likewise. + (az code attribute): Likewise. + (uns code attribute): Likewise. + (neg2, FLOAT128 iterator): Add support for IEEE 128-bit + floating point hardware support. + (abs2, FLOAT128 iterator): Likewise. + (add3, IEEE128 iterator): New insns for IEEE 128-bit + floating point hardware. + (sub3, IEEE128 iterator): Likewise. + (mul3, IEEE128 iterator): Likewise. + (div3, IEEE128 iterator): Likewise. + (copysign3, IEEE128 iterator): Likewise. + (sqrt2, IEEE128 iterator): Likewise. + (neg2, IEEE128 iterator): Likewise. + (abs2, IEEE128 iterator): Likewise. + (nabs2, IEEE128 iterator): Likewise. + (fma4_hw, IEEE128 iterator): Likewise. + (fms4_hw, IEEE128 iterator): Likewise. + (nfma4_hw, IEEE128 iterator): Likewise. + (nfms4_hw, IEEE128 iterator): Likewise. + (extend2_hw): Likewise. + (truncdf2_hw, IEEE128 iterator): Likewise. + (truncsf2_hw, IEEE128 iterator): Likewise. + (fix_fixuns code attribute): Likewise. + (float_floatuns code attribute): Likewise. + (fix_si2_hw): Likewise. + (fix_di2_hw): Likewise. + (float_si2_hw): Likewise. + (float_di2_hw): Likewise. + (xscvqpwz_): Likewise. + (xscvqpdz_): Likewise. + (xscvdqp_df2_odd): Likewise. + (cmp_h): Likewise. + (128-bit GPR splitters): Don't split a 128-bit move that is a + direct move between GPR and vector registers using ISA 3.0 direct + move instructions. + (mul3): Add support for the ISA 3.0 integer + multiply-add instruction. + + * config/rs6000/rs6000.c (rs6000_debug_reg_global): Add ISA 3.0 + debugging. + (rs6000_init_hard_regno_mode_ok): If ISA 3.0 and 64-bit, enable we + constraint. Disable the VSX<->GPR direct move helpers if we have + the MFVSRLD and MTVSRDD instructions. + (rs6000_secondary_reload_simple_move): Add support for doing + vector direct moves directly without additional scratch registers + if we have ISA 3.0 instructions. + (rs6000_secondary_reload_direct_move): Update comments. + (rs6000_output_move_128bit): Add support for ISA 3.0 vector + instructions. + + * config/rs6000/vsx.md (vsx_mov): Add support for ISA 3.0 + direct move instructions. + (vsx_movti_64bit): Likewise. + (vsx_extract_): Likewise. + + * config/rs6000/rs6000.h (VECTOR_ELEMENT_MFVSRLD_64BIT): New + macros for ISA 3.0 direct move instructions. + (TARGET_DIRECT_MOVE_128): Likewise. + (TARGET_MADDLD): Add support for the ISA 3.0 integer multiply-add + instruction. + + * doc/md.texi (RS/6000 constraints): Document we, wF, wG, wL + constraints. Update wa documentation to say not to use %x on + instructions that only take Altivec registers. + 2015-11-13 David Malcolm * Makefile.in (OBJS): Add gcc-rich-location.o. diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md index 7c69b6c9e1d..e4129453736 100644 --- a/gcc/config/rs6000/constraints.md +++ b/gcc/config/rs6000/constraints.md @@ -64,7 +64,8 @@ (define_register_constraint "wd" "rs6000_constraints[RS6000_CONSTRAINT_wd]" "VSX vector register to hold vector double data or NO_REGS.") -;; we is not currently used +(define_register_constraint "we" "rs6000_constraints[RS6000_CONSTRAINT_we]" + "VSX register if the -mpower9-vector -m64 options were used or NO_REGS.") (define_register_constraint "wf" "rs6000_constraints[RS6000_CONSTRAINT_wf]" "VSX vector register to hold vector float data or NO_REGS.") @@ -147,6 +148,12 @@ "Memory operand suitable for TOC fusion memory references" (match_operand 0 "toc_fusion_mem_wrapped")) +(define_constraint "wL" + "Int constant that is the element number mfvsrld accesses in a vector." + (and (match_code "const_int") + (and (match_test "TARGET_DIRECT_MOVE_128") + (match_test "(ival == VECTOR_ELEMENT_MFVSRLD_64BIT)")))) + ;; Lq/stq validates the address for load/store quad (define_memory_constraint "wQ" "Memory operand suitable for the load/store quad instructions" diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 9a149b9e29a..05007654787 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -55,6 +55,8 @@ extern const char *rs6000_output_move_128bit (rtx *); extern bool rs6000_move_128bit_ok_p (rtx []); extern bool rs6000_split_128bit_ok_p (rtx []); extern void rs6000_expand_float128_convert (rtx, rtx, bool); +extern void convert_float128_to_int (rtx *, enum rtx_code); +extern void convert_int_to_float128 (rtx *, enum rtx_code); extern void rs6000_expand_vector_init (rtx, rtx); extern void paired_expand_vector_init (rtx, rtx); extern void rs6000_expand_vector_set (rtx, rtx, int); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 7b6aca9e813..3e02d5cfb5c 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -2575,6 +2575,10 @@ rs6000_debug_reg_global (void) if (TARGET_VSX) fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element", (int)VECTOR_ELEMENT_SCALAR_64BIT); + + if (TARGET_DIRECT_MOVE_128) + fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element", + (int)VECTOR_ELEMENT_MFVSRLD_64BIT); } @@ -2986,6 +2990,10 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */ } + /* Support for new direct moves. */ + if (TARGET_DIRECT_MOVE_128) + rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS; + /* Set up the reload helper and direct move functions. */ if (TARGET_VSX || TARGET_ALTIVEC) { @@ -3034,7 +3042,7 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load; } - if (TARGET_DIRECT_MOVE) + if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128) { reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti; reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti; @@ -18081,6 +18089,11 @@ rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type, || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE))) return true; + else if (TARGET_DIRECT_MOVE_128 && size == 16 + && ((to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE) + || (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE))) + return true; + else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE) || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE))) @@ -18094,7 +18107,7 @@ rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type, return false; } -/* Power8 helper function for rs6000_secondary_reload, handle all of the +/* Direct move helper function for rs6000_secondary_reload, handle all of the special direct moves that involve allocating an extra register, return the insn code of the helper function if there is such a function or CODE_FOR_nothing if not. */ @@ -18116,8 +18129,8 @@ rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type, if (size == 16) { /* Handle moving 128-bit values from GPRs to VSX point registers on - power8 when running in 64-bit mode using XXPERMDI to glue the two - 64-bit values back together. */ + ISA 2.07 (power8, power9) when running in 64-bit mode using + XXPERMDI to glue the two 64-bit values back together. */ if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE) { cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */ @@ -18125,7 +18138,7 @@ rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type, } /* Handle moving 128-bit values from VSX point registers to GPRs on - power8 when running in 64-bit mode using XXPERMDI to get access to the + ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the bottom 64-bit value. */ else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) { @@ -18150,11 +18163,32 @@ rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type, } } - else if (size == 8) + if (TARGET_POWERPC64 && size == 16) + { + /* Handle moving 128-bit values from GPRs to VSX point registers on + ISA 2.07 when running in 64-bit mode using XXPERMDI to glue the two + 64-bit values back together. */ + if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE) + { + cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */ + icode = reg_addr[mode].reload_vsx_gpr; + } + + /* Handle moving 128-bit values from VSX point registers to GPRs on + ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the + bottom 64-bit value. */ + else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) + { + cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */ + icode = reg_addr[mode].reload_gpr_vsx; + } + } + + else if (!TARGET_POWERPC64 && size == 8) { /* Handle moving 64-bit values from GPRs to floating point registers on - power8 when running in 32-bit mode using FMRGOW to glue the two 32-bit - values back together. Altivec register classes must be handled + ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two + 32-bit values back together. Altivec register classes must be handled specially since a different instruction is used, and the secondary reload support requires a single instruction class in the scratch register constraint. However, right now TFmode is not allowed in @@ -18181,7 +18215,7 @@ rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type, /* Return whether a move between two register classes can be done either directly (simple move) or via a pattern that uses a single extra temporary - (using power8's direct move in this case. */ + (using ISA 2.07's direct move in this case. */ static bool rs6000_secondary_reload_move (enum rs6000_reg_type to_type, @@ -19220,6 +19254,11 @@ rs6000_output_move_128bit (rtx operands[]) if (src_gpr_p) return "#"; + if (TARGET_DIRECT_MOVE_128 && src_vsx_p) + return (WORDS_BIG_ENDIAN + ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1" + : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1"); + else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p) return "#"; } @@ -19229,6 +19268,11 @@ rs6000_output_move_128bit (rtx operands[]) if (src_vsx_p) return "xxlor %x0,%x1,%x1"; + else if (TARGET_DIRECT_MOVE_128 && src_gpr_p) + return (WORDS_BIG_ENDIAN + ? "mtvsrdd %x0,%1,%L1" + : "mtvsrdd %x0,%L1,%1"); + else if (TARGET_DIRECT_MOVE && src_gpr_p) return "#"; } @@ -20490,11 +20534,12 @@ rs6000_generate_compare (rtx cmp, machine_mode mode) emit_insn (cmp); } - /* IEEE 128-bit support in VSX registers. The comparison functions - (__cmpokf2 and __cmpukf2) returns 0..15 that is laid out the same way as - the PowerPC CR register would for a normal floating point comparison from - the fcmpo and fcmpu instructions. */ - else if (FLOAT128_IEEE_P (mode)) + /* IEEE 128-bit support in VSX registers. If we do not have IEEE 128-bit + hardware, the comparison functions (__cmpokf2 and __cmpukf2) returns 0..15 + that is laid out the same way as the PowerPC CR register would for a + normal floating point comparison from the fcmpo and fcmpu + instructions. */ + else if (!TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)) { rtx and_reg = gen_reg_rtx (SImode); rtx dest = gen_reg_rtx (SImode); @@ -20633,7 +20678,7 @@ rs6000_generate_compare (rtx cmp, machine_mode mode) /* Some kinds of FP comparisons need an OR operation; under flag_finite_math_only we don't bother. */ if (FLOAT_MODE_P (mode) - && !FLOAT128_IEEE_P (mode) + && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW) && !flag_finite_math_only && !(TARGET_HARD_FLOAT && !TARGET_FPRS) && (code == LE || code == GE @@ -20726,6 +20771,56 @@ rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p) bool do_move = false; rtx libfunc = NULL_RTX; rtx dest2; + typedef rtx (*rtx_2func_t) (rtx, rtx); + rtx_2func_t hw_convert = (rtx_2func_t)0; + size_t kf_or_tf; + + struct hw_conv_t { + rtx_2func_t from_df; + rtx_2func_t from_sf; + rtx_2func_t from_si_sign; + rtx_2func_t from_si_uns; + rtx_2func_t from_di_sign; + rtx_2func_t from_di_uns; + rtx_2func_t to_df; + rtx_2func_t to_sf; + rtx_2func_t to_si_sign; + rtx_2func_t to_si_uns; + rtx_2func_t to_di_sign; + rtx_2func_t to_di_uns; + } hw_conversions[2] = { + /* convertions to/from KFmode */ + { + gen_extenddfkf2_hw, /* KFmode <- DFmode. */ + gen_extendsfkf2_hw, /* KFmode <- SFmode. */ + gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */ + gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */ + gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */ + gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */ + gen_trunckfdf2_hw, /* DFmode <- KFmode. */ + gen_trunckfsf2_hw, /* SFmode <- KFmode. */ + gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */ + gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */ + gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */ + gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */ + }, + + /* convertions to/from TFmode */ + { + gen_extenddftf2_hw, /* TFmode <- DFmode. */ + gen_extendsftf2_hw, /* TFmode <- SFmode. */ + gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */ + gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */ + gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */ + gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */ + gen_trunctfdf2_hw, /* DFmode <- TFmode. */ + gen_trunctfsf2_hw, /* SFmode <- TFmode. */ + gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */ + gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */ + gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */ + gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */ + }, + }; if (dest_mode == src_mode) gcc_unreachable (); @@ -20745,14 +20840,23 @@ rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p) /* Convert to IEEE 128-bit floating point. */ if (FLOAT128_IEEE_P (dest_mode)) { + if (dest_mode == KFmode) + kf_or_tf = 0; + else if (dest_mode == TFmode) + kf_or_tf = 1; + else + gcc_unreachable (); + switch (src_mode) { case DFmode: cvt = sext_optab; + hw_convert = hw_conversions[kf_or_tf].from_df; break; case SFmode: cvt = sext_optab; + hw_convert = hw_conversions[kf_or_tf].from_sf; break; case KFmode: @@ -20765,8 +20869,29 @@ rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p) break; case SImode: + if (unsigned_p) + { + cvt = ufloat_optab; + hw_convert = hw_conversions[kf_or_tf].from_si_uns; + } + else + { + cvt = sfloat_optab; + hw_convert = hw_conversions[kf_or_tf].from_si_sign; + } + break; + case DImode: - cvt = (unsigned_p) ? ufloat_optab : sfloat_optab; + if (unsigned_p) + { + cvt = ufloat_optab; + hw_convert = hw_conversions[kf_or_tf].from_di_uns; + } + else + { + cvt = sfloat_optab; + hw_convert = hw_conversions[kf_or_tf].from_di_sign; + } break; default: @@ -20777,14 +20902,23 @@ rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p) /* Convert from IEEE 128-bit floating point. */ else if (FLOAT128_IEEE_P (src_mode)) { + if (src_mode == KFmode) + kf_or_tf = 0; + else if (src_mode == TFmode) + kf_or_tf = 1; + else + gcc_unreachable (); + switch (dest_mode) { case DFmode: cvt = trunc_optab; + hw_convert = hw_conversions[kf_or_tf].to_df; break; case SFmode: cvt = trunc_optab; + hw_convert = hw_conversions[kf_or_tf].to_sf; break; case KFmode: @@ -20797,8 +20931,29 @@ rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p) break; case SImode: + if (unsigned_p) + { + cvt = ufix_optab; + hw_convert = hw_conversions[kf_or_tf].to_si_uns; + } + else + { + cvt = sfix_optab; + hw_convert = hw_conversions[kf_or_tf].to_si_sign; + } + break; + case DImode: - cvt = (unsigned_p) ? ufix_optab : sfix_optab; + if (unsigned_p) + { + cvt = ufix_optab; + hw_convert = hw_conversions[kf_or_tf].to_di_uns; + } + else + { + cvt = sfix_optab; + hw_convert = hw_conversions[kf_or_tf].to_di_sign; + } break; default: @@ -20817,6 +20972,10 @@ rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p) if (do_move) emit_move_insn (dest, gen_lowpart (dest_mode, src)); + /* Handle conversion if we have hardware support. */ + else if (TARGET_FLOAT128_HW && hw_convert) + emit_insn ((hw_convert) (dest, src)); + /* Call an external function to do the conversion. */ else if (cvt != unknown_optab) { @@ -20837,6 +20996,92 @@ rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p) return; } +/* Split a conversion from __float128 to an integer type into separate insns. + OPERANDS points to the destination, source, and V2DI temporary + register. CODE is either FIX or UNSIGNED_FIX. */ + +void +convert_float128_to_int (rtx *operands, enum rtx_code code) +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp = operands[2]; + rtx cvt; + rtvec cvt_vec; + rtx cvt_unspec; + rtvec move_vec; + rtx move_unspec; + + if (GET_CODE (tmp) == SCRATCH) + tmp = gen_reg_rtx (V2DImode); + + if (MEM_P (dest)) + dest = rs6000_address_for_fpconvert (dest); + + /* Generate the actual convert insn of the form: + (set (tmp) (unspec:V2DI [(fix:SI (reg:KF))] UNSPEC_IEEE128_CONVERT)). */ + cvt = gen_rtx_fmt_e (code, GET_MODE (dest), src); + cvt_vec = gen_rtvec (1, cvt); + cvt_unspec = gen_rtx_UNSPEC (V2DImode, cvt_vec, UNSPEC_IEEE128_CONVERT); + emit_insn (gen_rtx_SET (tmp, cvt_unspec)); + + /* Generate the move insn of the form: + (set (dest:SI) (unspec:SI [(tmp:V2DI))] UNSPEC_IEEE128_MOVE)). */ + move_vec = gen_rtvec (1, tmp); + move_unspec = gen_rtx_UNSPEC (GET_MODE (dest), move_vec, UNSPEC_IEEE128_MOVE); + emit_insn (gen_rtx_SET (dest, move_unspec)); +} + +/* Split a conversion from an integer type to __float128 into separate insns. + OPERANDS points to the destination, source, and V2DI temporary + register. CODE is either FLOAT or UNSIGNED_FLOAT. */ + +void +convert_int_to_float128 (rtx *operands, enum rtx_code code) +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp = operands[2]; + rtx cvt; + rtvec cvt_vec; + rtx cvt_unspec; + rtvec move_vec; + rtx move_unspec; + rtx unsigned_flag; + + if (GET_CODE (tmp) == SCRATCH) + tmp = gen_reg_rtx (V2DImode); + + if (MEM_P (src)) + src = rs6000_address_for_fpconvert (src); + + /* Generate the move of the integer into the Altivec register of the form: + (set (tmp:V2DI) (unspec:V2DI [(src:SI) + (const_int 0)] UNSPEC_IEEE128_MOVE)). + + or: + (set (tmp:V2DI) (unspec:V2DI [(src:DI)] UNSPEC_IEEE128_MOVE)). */ + + if (GET_MODE (src) == SImode) + { + unsigned_flag = (code == UNSIGNED_FLOAT) ? const1_rtx : const0_rtx; + move_vec = gen_rtvec (2, src, unsigned_flag); + } + else + move_vec = gen_rtvec (1, src); + + move_unspec = gen_rtx_UNSPEC (V2DImode, move_vec, UNSPEC_IEEE128_MOVE); + emit_insn (gen_rtx_SET (tmp, move_unspec)); + + /* Generate the actual convert insn of the form: + (set (dest:KF) (float:KF (unspec:DI [(tmp:V2DI)] + UNSPEC_IEEE128_CONVERT))). */ + cvt_vec = gen_rtvec (1, tmp); + cvt_unspec = gen_rtx_UNSPEC (DImode, cvt_vec, UNSPEC_IEEE128_CONVERT); + cvt = gen_rtx_fmt_e (code, GET_MODE (dest), cvt_unspec); + emit_insn (gen_rtx_SET (dest, cvt)); +} + /* Emit the RTL for an sISEL pattern. */ diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 4ac4f3134d5..8c606ab0aea 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -516,6 +516,10 @@ extern int rs6000_vector_align[]; with scalar instructions. */ #define VECTOR_ELEMENT_SCALAR_64BIT ((BYTES_BIG_ENDIAN) ? 0 : 1) +/* Element number of the 64-bit value in a 128-bit vector that can be accessed + with the ISA 3.0 MFVSRLD instructions. */ +#define VECTOR_ELEMENT_MFVSRLD_64BIT ((BYTES_BIG_ENDIAN) ? 1 : 0) + /* Alignment options for fields in structures for sub-targets following AIX-like ABI. ALIGN_POWER word-aligns FP doubles (default AIX ABI). @@ -567,10 +571,13 @@ extern int rs6000_vector_align[]; #define TARGET_FCTIWUZ TARGET_POPCNTD #define TARGET_CTZ TARGET_MODULO #define TARGET_EXTSWSLI (TARGET_MODULO && TARGET_POWERPC64) +#define TARGET_MADDLD (TARGET_MODULO && TARGET_POWERPC64) #define TARGET_XSCVDPSPN (TARGET_DIRECT_MOVE || TARGET_P8_VECTOR) #define TARGET_XSCVSPDPN (TARGET_DIRECT_MOVE || TARGET_P8_VECTOR) #define TARGET_VADDUQM (TARGET_P8_VECTOR && TARGET_POWERPC64) +#define TARGET_DIRECT_MOVE_128 (TARGET_P9_VECTOR && TARGET_DIRECT_MOVE \ + && TARGET_POWERPC64) /* Byte/char syncs were added as phased in for ISA 2.06B, but are not present in power7, so conditionalize them on p8 features. TImode syncs need quad @@ -1517,6 +1524,7 @@ enum r6000_reg_class_enum { RS6000_CONSTRAINT_v, /* Altivec registers */ RS6000_CONSTRAINT_wa, /* Any VSX register */ RS6000_CONSTRAINT_wd, /* VSX register for V2DF */ + RS6000_CONSTRAINT_we, /* VSX register if ISA 3.0 vector. */ RS6000_CONSTRAINT_wf, /* VSX register for V4SF */ RS6000_CONSTRAINT_wg, /* FPR register for -mmfpgpr */ RS6000_CONSTRAINT_wh, /* FPR register for direct moves. */ diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 49c5c98f7d5..1fdc7bbda42 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -143,6 +143,9 @@ UNSPEC_STACK_CHECK UNSPEC_FUSION_P9 UNSPEC_FUSION_ADDIS + UNSPEC_ROUND_TO_ODD + UNSPEC_IEEE128_MOVE + UNSPEC_IEEE128_CONVERT ]) ;; @@ -381,6 +384,8 @@ (V2SF "TARGET_PAIRED_FLOAT") (V4SF "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)") (V2DF "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V2DFmode)") + (KF "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (KFmode)") + (TF "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (TFmode)") ]) ; Floating point move iterators to combine binary and decimal moves @@ -485,10 +490,10 @@ (define_mode_attr Fvsx [(SF "sp") (DF "dp")]) ; SF/DF constraint for arithmetic on traditional floating point registers -(define_mode_attr Ff [(SF "f") (DF "d")]) +(define_mode_attr Ff [(SF "f") (DF "d") (DI "d")]) ; SF/DF constraint for arithmetic on VSX registers -(define_mode_attr Fv [(SF "wy") (DF "ws")]) +(define_mode_attr Fv [(SF "wy") (DF "ws") (DI "wi")]) ; SF/DF constraint for arithmetic on altivec registers (define_mode_attr Fa [(SF "wu") (DF "wv")]) @@ -510,9 +515,31 @@ (define_code_iterator iorxor [ior xor]) ; Signed/unsigned variants of ops. -(define_code_iterator any_extend [sign_extend zero_extend]) -(define_code_attr u [(sign_extend "") (zero_extend "u")]) -(define_code_attr su [(sign_extend "s") (zero_extend "u")]) +(define_code_iterator any_extend [sign_extend zero_extend]) +(define_code_iterator any_fix [fix unsigned_fix]) +(define_code_iterator any_float [float unsigned_float]) + +(define_code_attr u [(sign_extend "") + (zero_extend "u")]) + +(define_code_attr su [(sign_extend "s") + (zero_extend "u") + (fix "s") + (unsigned_fix "s") + (float "s") + (unsigned_float "u")]) + +(define_code_attr az [(sign_extend "a") + (zero_extend "z") + (fix "a") + (unsigned_fix "z") + (float "a") + (unsigned_float "z")]) + +(define_code_attr uns [(fix "") + (unsigned_fix "uns") + (float "") + (unsigned_float "uns")]) ; Various instructions that come in SI and DI forms. ; A generic w/d attribute, for things like cmpw/cmpd. @@ -2815,6 +2842,14 @@ DONE; }) +(define_insn "*maddld4" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (plus:DI (mult:DI (match_operand:DI 1 "gpc_reg_operand" "r") + (match_operand:DI 2 "gpc_reg_operand" "r")) + (match_operand:DI 3 "gpc_reg_operand" "r")))] + "TARGET_MADDLD" + "maddld %0,%1,%2,%3" + [(set_attr "type" "mul")]) (define_insn "udiv3" [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") @@ -7003,7 +7038,16 @@ { if (FLOAT128_IEEE_P (mode)) { - if (TARGET_FLOAT128) + if (TARGET_FLOAT128_HW) + { + if (mode == TFmode) + emit_insn (gen_negtf2_hw (operands[0], operands[1])); + else if (mode == KFmode) + emit_insn (gen_negkf2_hw (operands[0], operands[1])); + else + gcc_unreachable (); + } + else if (TARGET_FLOAT128) { if (mode == TFmode) emit_insn (gen_ieee_128bit_vsx_negtf2 (operands[0], operands[1])); @@ -7053,7 +7097,17 @@ if (FLOAT128_IEEE_P (mode)) { - if (TARGET_FLOAT128) + if (TARGET_FLOAT128_HW) + { + if (mode == TFmode) + emit_insn (gen_abstf2_hw (operands[0], operands[1])); + else if (mode == KFmode) + emit_insn (gen_abskf2_hw (operands[0], operands[1])); + else + FAIL; + DONE; + } + else if (TARGET_FLOAT128) { if (mode == TFmode) emit_insn (gen_ieee_128bit_vsx_abstf2 (operands[0], operands[1])); @@ -7140,7 +7194,7 @@ [(set (match_operand:IEEE128 0 "register_operand" "=wa") (neg:IEEE128 (match_operand:IEEE128 1 "register_operand" "wa"))) (clobber (match_scratch:V16QI 2 "=v"))] - "TARGET_FLOAT128" + "TARGET_FLOAT128 && !TARGET_FLOAT128_HW" "#" "&& 1" [(parallel [(set (match_dup 0) @@ -7160,7 +7214,7 @@ [(set (match_operand:IEEE128 0 "register_operand" "=wa") (neg:IEEE128 (match_operand:IEEE128 1 "register_operand" "wa"))) (use (match_operand:V16QI 2 "register_operand" "=v"))] - "TARGET_FLOAT128" + "TARGET_FLOAT128 && !TARGET_FLOAT128_HW" "xxlxor %x0,%x1,%x2" [(set_attr "type" "vecsimple")]) @@ -7169,7 +7223,7 @@ [(set (match_operand:IEEE128 0 "register_operand" "=wa") (abs:IEEE128 (match_operand:IEEE128 1 "register_operand" "wa"))) (clobber (match_scratch:V16QI 2 "=v"))] - "TARGET_FLOAT128 && FLOAT128_IEEE_P (mode)" + "TARGET_FLOAT128 && !TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)" "#" "&& 1" [(parallel [(set (match_dup 0) @@ -7189,7 +7243,7 @@ [(set (match_operand:IEEE128 0 "register_operand" "=wa") (abs:IEEE128 (match_operand:IEEE128 1 "register_operand" "wa"))) (use (match_operand:V16QI 2 "register_operand" "=v"))] - "TARGET_FLOAT128" + "TARGET_FLOAT128 && !TARGET_FLOAT128_HW" "xxlandc %x0,%x1,%x2" [(set_attr "type" "vecsimple")]) @@ -7200,7 +7254,7 @@ (abs:IEEE128 (match_operand:IEEE128 1 "register_operand" "wa")))) (clobber (match_scratch:V16QI 2 "=v"))] - "TARGET_FLOAT128 && FLOAT128_IEEE_P (mode)" + "TARGET_FLOAT128 && !TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)" "#" "&& 1" [(parallel [(set (match_dup 0) @@ -7222,7 +7276,7 @@ (abs:IEEE128 (match_operand:IEEE128 1 "register_operand" "wa")))) (use (match_operand:V16QI 2 "register_operand" "=v"))] - "TARGET_FLOAT128" + "TARGET_FLOAT128 && !TARGET_FLOAT128_HW" "xxlor %x0,%x1,%x2" [(set_attr "type" "vecsimple")]) @@ -7480,7 +7534,10 @@ (match_operand:FMOVE128_GPR 1 "input_operand" ""))] "reload_completed && (int_reg_operand (operands[0], mode) - || int_reg_operand (operands[1], mode))" + || int_reg_operand (operands[1], mode)) + && (!TARGET_DIRECT_MOVE_128 + || (!vsx_register_operand (operands[0], mode) + && !vsx_register_operand (operands[1], mode)))" [(pc)] { rs6000_split_multireg_move (operands[0], operands[1]); DONE; }) @@ -12998,6 +13055,332 @@ +;; ISA 2.08 IEEE 128-bit floating point support. + +(define_insn "add3" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (plus:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "v") + (match_operand:IEEE128 2 "altivec_register_operand" "v")))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)" + "xsaddqp %0,%1,%2" + [(set_attr "type" "vecfloat")]) + +(define_insn "sub3" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (minus:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "v") + (match_operand:IEEE128 2 "altivec_register_operand" "v")))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)" + "xssubqp %0,%1,%2" + [(set_attr "type" "vecfloat")]) + +(define_insn "mul3" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (mult:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "v") + (match_operand:IEEE128 2 "altivec_register_operand" "v")))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)" + "xsmulqp %0,%1,%2" + [(set_attr "type" "vecfloat")]) + +(define_insn "div3" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (div:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "v") + (match_operand:IEEE128 2 "altivec_register_operand" "v")))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)" + "xsdivqp %0,%1,%2" + [(set_attr "type" "vecdiv")]) + +(define_insn "sqrt2" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (sqrt:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "v")))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)" + "xssqrtqp %0,%1" + [(set_attr "type" "vecdiv")]) + +(define_insn "copysign3" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (unspec:IEEE128 + [(match_operand:IEEE128 1 "altivec_register_operand" "v") + (match_operand:IEEE128 2 "altivec_register_operand" "v")] + UNSPEC_COPYSIGN))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)" + "xscpsgnqp %0,%2,%1" + [(set_attr "type" "vecsimple")]) + +(define_insn "neg2_hw" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (neg:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "v")))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)" + "xsnegqp %0,%1" + [(set_attr "type" "vecfloat")]) + + +(define_insn "abs2_hw" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (abs:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "v")))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)" + "xsabsqp %0,%1" + [(set_attr "type" "vecfloat")]) + + +(define_insn "*nabs2_hw" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (neg:IEEE128 + (abs:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "v"))))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)" + "xsnabsqp %0,%1" + [(set_attr "type" "vecfloat")]) + +;; Initially don't worry about doing fusion +(define_insn "*fma4_hw" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (fma:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "%v") + (match_operand:IEEE128 2 "altivec_register_operand" "v") + (match_operand:IEEE128 3 "altivec_register_operand" "0")))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)" + "xsmaddqp %0,%1,%2" + [(set_attr "type" "vecfloat")]) + +(define_insn "*fms4_hw" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (fma:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "%v") + (match_operand:IEEE128 2 "altivec_register_operand" "v") + (neg:IEEE128 + (match_operand:IEEE128 3 "altivec_register_operand" "0"))))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)" + "xsmsubqp %0,%1,%2" + [(set_attr "type" "vecfloat")]) + +(define_insn "*nfma4_hw" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (neg:IEEE128 + (fma:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "%v") + (match_operand:IEEE128 2 "altivec_register_operand" "v") + (match_operand:IEEE128 3 "altivec_register_operand" "0"))))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)" + "xsnmaddqp %0,%1,%2" + [(set_attr "type" "vecfloat")]) + +(define_insn "*nfms4_hw" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (neg:IEEE128 + (fma:IEEE128 + (match_operand:IEEE128 1 "altivec_register_operand" "%v") + (match_operand:IEEE128 2 "altivec_register_operand" "v") + (neg:IEEE128 + (match_operand:IEEE128 3 "altivec_register_operand" "0")))))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)" + "xsnmsubqp %0,%1,%2" + [(set_attr "type" "vecfloat")]) + +(define_insn "extend2_hw" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (float_extend:IEEE128 + (match_operand:SFDF 1 "altivec_register_operand" "v")))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)" + "xscvdpqp %0,%1" + [(set_attr "type" "vecfloat")]) + +(define_insn "truncdf2_hw" + [(set (match_operand:DF 0 "altivec_register_operand" "=v") + (float_truncate:DF + (match_operand:IEEE128 1 "altivec_register_operand" "v")))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)" + "xscvqpdp %0,%1" + [(set_attr "type" "vecfloat")]) + +;; There is no KFmode -> SFmode instruction. Preserve the accuracy by doing +;; the KFmode -> DFmode conversion using round to odd rather than the normal +;; conversion +(define_insn_and_split "truncsf2_hw" + [(set (match_operand:SF 0 "vsx_register_operand" "=wy") + (float_truncate:SF + (match_operand:IEEE128 1 "altivec_register_operand" "v"))) + (clobber (match_scratch:DF 2 "=v"))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)" + "#" + "&& 1" + [(set (match_dup 2) + (unspec:DF [(match_dup 1)] UNSPEC_ROUND_TO_ODD)) + (set (match_dup 0) + (float_truncate:SF (match_dup 2)))] +{ + if (GET_CODE (operands[2]) == SCRATCH) + operands[2] = gen_reg_rtx (DFmode); +} + [(set_attr "type" "vecfloat") + (set_attr "length" "8")]) + +;; At present SImode is not allowed in VSX registers at all, and DImode is only +;; allowed in the traditional floating point registers. Use V2DImode so that +;; we can get a value in an Altivec register. + +(define_insn_and_split "fix_si2_hw" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,Z") + (any_fix:SI (match_operand:IEEE128 1 "altivec_register_operand" "v,v"))) + (clobber (match_scratch:V2DI 2 "=v,v"))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)" + "#" + "&& 1" + [(pc)] +{ + convert_float128_to_int (operands, ); + DONE; +} + [(set_attr "length" "8") + (set_attr "type" "mftgpr,fpstore")]) + +(define_insn_and_split "fix_di2_hw" + [(set (match_operand:DI 0 "nonimmediate_operand" "=wr,wi,Z") + (any_fix:DI (match_operand:IEEE128 1 "altivec_register_operand" "v,v,v"))) + (clobber (match_scratch:V2DI 2 "=v,v,v"))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)" + "#" + "&& 1" + [(pc)] +{ + convert_float128_to_int (operands, ); + DONE; +} + [(set_attr "length" "8") + (set_attr "type" "mftgpr,vecsimple,fpstore")]) + +(define_insn_and_split "float_si2_hw" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v,v") + (any_float:IEEE128 (match_operand:SI 1 "nonimmediate_operand" "r,Z"))) + (clobber (match_scratch:V2DI 2 "=v,v"))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)" + "#" + "&& 1" + [(pc)] +{ + convert_int_to_float128 (operands, ); + DONE; +} + [(set_attr "length" "8") + (set_attr "type" "vecfloat")]) + +(define_insn_and_split "float_di2_hw" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v,v,v") + (any_float:IEEE128 (match_operand:DI 1 "nonimmediate_operand" "wi,wr,Z"))) + (clobber (match_scratch:V2DI 2 "=v,v,v"))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)" + "#" + "&& 1" + [(pc)] +{ + convert_int_to_float128 (operands, ); + DONE; +} + [(set_attr "length" "8") + (set_attr "type" "vecfloat")]) + +;; Integer conversion instructions, using V2DImode to get an Altivec register +(define_insn "*xscvqpwz_" + [(set (match_operand:V2DI 0 "altivec_register_operand" "=v") + (unspec:V2DI + [(any_fix:SI + (match_operand:IEEE128 1 "altivec_register_operand" "v"))] + UNSPEC_IEEE128_CONVERT))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)" + "xscvqpwz %0,%1" + [(set_attr "type" "vecfloat")]) + +(define_insn "*xscvqpdz_" + [(set (match_operand:V2DI 0 "altivec_register_operand" "=v") + (unspec:V2DI + [(any_fix:DI + (match_operand:IEEE128 1 "altivec_register_operand" "v"))] + UNSPEC_IEEE128_CONVERT))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)" + "xscvqpdz %0,%1" + [(set_attr "type" "vecfloat")]) + +(define_insn "*xscvdqp_" + [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") + (any_float:IEEE128 + (unspec:DI [(match_operand:V2DI 1 "altivec_register_operand" "v")] + UNSPEC_IEEE128_CONVERT)))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)" + "xscvdqp %0,%1" + [(set_attr "type" "vecfloat")]) + +(define_insn "*ieee128_mfvsrd" + [(set (match_operand:DI 0 "reg_or_indexed_operand" "=wr,Z,wi") + (unspec:DI [(match_operand:V2DI 1 "altivec_register_operand" "v,v,v")] + UNSPEC_IEEE128_MOVE))] + "TARGET_FLOAT128_HW && TARGET_POWERPC64" + "@ + mfvsrd %0,%x1 + stxsdx %x1,%y0 + xxlor %x0,%x1,%x1" + [(set_attr "type" "mftgpr,vecsimple,fpstore")]) + +(define_insn "*ieee128_mfvsrwz" + [(set (match_operand:SI 0 "reg_or_indexed_operand" "=r,Z") + (unspec:SI [(match_operand:V2DI 1 "altivec_register_operand" "v,v")] + UNSPEC_IEEE128_MOVE))] + "TARGET_FLOAT128_HW" + "@ + mfvsrwz %0,%x1 + stxsiwx %x1,%y0" + [(set_attr "type" "mftgpr,fpstore")]) + +;; 0 says do sign-extension, 1 says zero-extension +(define_insn "*ieee128_mtvsrw" + [(set (match_operand:V2DI 0 "altivec_register_operand" "=v,v,v,v") + (unspec:V2DI [(match_operand:SI 1 "nonimmediate_operand" "r,Z,r,Z") + (match_operand:SI 2 "const_0_to_1_operand" "O,O,n,n")] + UNSPEC_IEEE128_MOVE))] + "TARGET_FLOAT128_HW" + "@ + mtvsrwa %x0,%1 + lxsiwax %x0,%y1 + mtvsrwz %x0,%1 + lxsiwzx %x0,%y1" + [(set_attr "type" "mffgpr,fpload,mffgpr,fpload")]) + + +(define_insn "*ieee128_mtvsrd" + [(set (match_operand:V2DI 0 "altivec_register_operand" "=v,v,v") + (unspec:V2DI [(match_operand:DI 1 "nonimmediate_operand" "wr,Z,wi")] + UNSPEC_IEEE128_MOVE))] + "TARGET_FLOAT128_HW" + "@ + mtvsrd %x0,%1 + lxsdx %x0,%y1 + xxlor %x0,%x1,%x1" + [(set_attr "type" "mffgpr,fpload,vecsimple")]) + +;; IEEE 128-bit instructions with round to odd semantics +(define_insn "*truncdf2_odd" + [(set (match_operand:DF 0 "vsx_register_operand" "=v") + (unspec:DF [(match_operand:IEEE128 1 "altivec_register_operand" "v")] + UNSPEC_ROUND_TO_ODD))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)" + "xscvqpdpo %0,%1" + [(set_attr "type" "vecfloat")]) + +;; IEEE 128-bit comparisons +(define_insn "*cmp_hw" + [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") + (compare:CCFP (match_operand:IEEE128 1 "altivec_register_operand" "v") + (match_operand:IEEE128 2 "altivec_register_operand" "v")))] + "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode)" + "xscmpuqp %0,%1,%2" + [(set_attr "type" "fpcompare")]) + + (include "sync.md") (include "vector.md") diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 0e110ae2f7f..596b11d01ab 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -760,31 +760,31 @@ "") (define_insn "*vsx_mov" - [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,,,?Z,?,?,wQ,?&r,??Y,??r,??r,,?,*r,v,wZ, v") - (match_operand:VSX_M 1 "input_operand" ",Z,,,Z,,r,wQ,r,Y,r,j,j,j,W,v,wZ"))] + [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,,,?Z,?,?,r,we,wQ,?&r,??Y,??r,??r,,?,*r,v,wZ,v") + (match_operand:VSX_M 1 "input_operand" ",Z,,,Z,,we,b,r,wQ,r,Y,r,j,j,j,W,v,wZ"))] "VECTOR_MEM_VSX_P (mode) && (register_operand (operands[0], mode) || register_operand (operands[1], mode))" { return rs6000_output_move_128bit (operands); } - [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,load,store,store,load, *,vecsimple,vecsimple,*, *,vecstore,vecload") - (set_attr "length" "4,4,4,4,4,4,12,12,12,12,16,4,4,*,16,4,4")]) + [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,mffgpr,mftgpr,load,store,store,load, *,vecsimple,vecsimple,*, *,vecstore,vecload") + (set_attr "length" "4,4,4,4,4,4,8,4,12,12,12,12,16,4,4,*,16,4,4")]) ;; Unlike other VSX moves, allow the GPRs even for reloading, since a normal ;; use of TImode is for unions. However for plain data movement, slightly ;; favor the vector loads (define_insn "*vsx_movti_64bit" - [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v,v,wZ,wQ,&r,Y,r,r,?r") - (match_operand:TI 1 "input_operand" "wa,Z,wa,O,W,wZ,v,r,wQ,r,Y,r,n"))] + [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,r,we,v,v,wZ,wQ,&r,Y,r,r,?r") + (match_operand:TI 1 "input_operand" "wa,Z,wa,O,we,b,W,wZ,v,r,wQ,r,Y,r,n"))] "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode) && (register_operand (operands[0], TImode) || register_operand (operands[1], TImode))" { return rs6000_output_move_128bit (operands); } - [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store,load,store,load,*,*") - (set_attr "length" "4,4,4,4,16,4,4,8,8,8,8,8,8")]) + [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,mffgpr,mftgpr,vecsimple,vecstore,vecload,store,load,store,load,*,*") + (set_attr "length" "4,4,4,4,8,4,16,4,4,8,8,8,8,8,8")]) (define_insn "*vsx_movti_32bit" [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,Q,Y,????r,????r,????r,r") @@ -1909,11 +1909,11 @@ ;; Optimize cases were we can do a simple or direct move. ;; Or see if we can avoid doing the move at all (define_insn "*vsx_extract__internal1" - [(set (match_operand: 0 "register_operand" "=d,,r") + [(set (match_operand: 0 "register_operand" "=d,,r,r") (vec_select: - (match_operand:VSX_D 1 "register_operand" "d,,") + (match_operand:VSX_D 1 "register_operand" "d,,,") (parallel - [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))] + [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD,wL")])))] "VECTOR_MEM_VSX_P (mode) && TARGET_POWERPC64 && TARGET_DIRECT_MOVE" { int op0_regno = REGNO (operands[0]); @@ -1923,14 +1923,16 @@ return "nop"; if (INT_REGNO_P (op0_regno)) - return "mfvsrd %0,%x1"; + return ((INTVAL (operands[2]) == VECTOR_ELEMENT_MFVSRLD_64BIT) + ? "mfvsrdl %0,%x1" + : "mfvsrd %0,%x1"); if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno)) return "fmr %0,%1"; return "xxlor %x0,%x1,%x1"; } - [(set_attr "type" "fp,vecsimple,mftgpr") + [(set_attr "type" "fp,vecsimple,mftgpr,mftgpr") (set_attr "length" "4")]) (define_insn "*vsx_extract__internal2" diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 7fdc9353bfb..80a1f64a61f 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -3121,9 +3121,28 @@ asm ("xvadddp %0,%1,%2" : "=wa" (v1) : "wa" (v2), "wa" (v3)); is not correct. +If an instruction only takes Altivec registers, you do not want to use +@code{%x}. + +@smallexample +asm ("xsaddqp %0,%1,%2" : "=v" (v1) : "v" (v2), "v" (v3)); +@end smallexample + +is correct because the @code{xsaddqp} instruction only takes Altivec +registers, while: + +@smallexample +asm ("xsaddqp %x0,%x1,%x2" : "=v" (v1) : "v" (v2), "v" (v3)); +@end smallexample + +is incorrect. + @item wd VSX vector register to hold vector double data or NO_REGS. +@item we +VSX register if the -mpower9-vector -m64 options were used or NO_REGS. + @item wf VSX vector register to hold vector float data or NO_REGS. @@ -3187,6 +3206,16 @@ Floating point register if the LFIWZX instruction is enabled or NO_REGS. @item wD Int constant that is the element number of the 64-bit scalar in a vector. +@item wF +Memory operand suitable for power9 fusion load/stores. + +@item wG +Memory operand suitable for TOC fusion memory references. + +@item wL +Int constant that is the element number that the MFVSRLD instruction +targets. + @item wQ A memory address that will work with the @code{lq} and @code{stq} instructions. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 519a9e99835..deb1a712db2 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,13 @@ +2015-11-13 Michael Meissner + + * gcc.target/powerpc/float128-hw.c: New test for IEEE 128-bit + hardware floating point support. + + * gcc.target/powerpc/direct-move-vector.c: New test for 128-bit + vector direct move instructions. + + * gcc.target/powerpc/maddld.c: New test. + 2015-11-13 Uros Bizjak * gcc.dg/pr68306.c (dg-additional-options): Add i?86-*-* target. diff --git a/gcc/testsuite/gcc.target/powerpc/direct-move-vector.c b/gcc/testsuite/gcc.target/powerpc/direct-move-vector.c new file mode 100644 index 00000000000..1e8504ec66c --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/direct-move-vector.c @@ -0,0 +1,33 @@ +/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-options "-mcpu=power9 -O2" } */ + +/* Check code generation for direct move for long types. */ + +void +test (vector double *p) +{ + vector double v1 = *p; + vector double v2; + vector double v3; + vector double v4; + + /* Force memory -> FPR load. */ + __asm__ (" # reg %x0" : "+d" (v1)); + + /* force VSX -> GPR direct move. */ + v2 = v1; + __asm__ (" # reg %0" : "+r" (v2)); + + /* Force GPR -> Altivec direct move. */ + v3 = v2; + __asm__ (" # reg %x0" : "+v" (v3)); + *p = v3; +} + +/* { dg-final { scan-assembler "mfvsrd" } } */ +/* { dg-final { scan-assembler "mfvsrld" } } */ +/* { dg-final { scan-assembler "mtvsrdd" } } */ + + diff --git a/gcc/testsuite/gcc.target/powerpc/float128-hw.c b/gcc/testsuite/gcc.target/powerpc/float128-hw.c new file mode 100644 index 00000000000..71a0c24a2f2 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/float128-hw.c @@ -0,0 +1,18 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-require-effective-target powerpc_float128_hw_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-options "-mcpu=power9 -O2" } */ + +__float128 f128_add (__float128 a, __float128 b) { return a+b; } +__float128 f128_sub (__float128 a, __float128 b) { return a-b; } +__float128 f128_mul (__float128 a, __float128 b) { return a*b; } +__float128 f128_div (__float128 a, __float128 b) { return a/b; } +__float128 f128_fma (__float128 a, __float128 b, __float128 c) { return (a*b)+c; } +long f128_cmove (__float128 a, __float128 b, long c, long d) { return (a == b) ? c : d; } + +/* { dg-final { scan-assembler "xsaddqp" } } */ +/* { dg-final { scan-assembler "xssubqp" } } */ +/* { dg-final { scan-assembler "xsmulqp" } } */ +/* { dg-final { scan-assembler "xsdivqp" } } */ +/* { dg-final { scan-assembler "xsmaddqp" } } */ +/* { dg-final { scan-assembler "xscmpuqp" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/maddld.c b/gcc/testsuite/gcc.target/powerpc/maddld.c new file mode 100644 index 00000000000..c2b0c172080 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/maddld.c @@ -0,0 +1,20 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-require-effective-target powerpc_p9modulo_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-options "-mcpu=power9 -O2" } */ + +long +s_madd (long a, long b, long c) +{ + return (a * b) + c; +} + +unsigned long +u_madd (unsigned long a, unsigned long b, unsigned long c) +{ + return (a * b) + c; +} + +/* { dg-final { scan-assembler-times "maddld " 2 } } */ +/* { dg-final { scan-assembler-not "mulld " } } */ +/* { dg-final { scan-assembler-not "add " } } */ -- cgit v1.2.1