diff options
Diffstat (limited to 'gcc/config/rs6000/vsx.md')
-rw-r--r-- | gcc/config/rs6000/vsx.md | 145 |
1 files changed, 136 insertions, 9 deletions
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 18f3e86e29f..2c74a8ebbe2 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -288,6 +288,16 @@ (V8HI "v") (V4SI "wa")]) +;; Mode iterator for binary floating types other than double to +;; optimize convert to that floating point type from an extract +;; of an integer type +(define_mode_iterator VSX_EXTRACT_FL [SF + (IF "FLOAT128_2REG_P (IFmode)") + (KF "TARGET_FLOAT128_HW") + (TF "FLOAT128_2REG_P (TFmode) + || (FLOAT128_IEEE_P (TFmode) + && TARGET_FLOAT128_HW)")]) + ;; Iterator for the 2 short vector types to do a splat from an integer (define_mode_iterator VSX_SPLAT_I [V16QI V8HI]) @@ -1907,6 +1917,7 @@ [(set_attr "type" "vecdouble")]) ;; Convert from 32-bit to 64-bit types +;; Provide both vector and scalar targets (define_insn "vsx_xvcvsxwdp" [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa") (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")] @@ -1915,6 +1926,14 @@ "xvcvsxwdp %x0,%x1" [(set_attr "type" "vecdouble")]) +(define_insn "vsx_xvcvsxwdp_df" + [(set (match_operand:DF 0 "vsx_register_operand" "=ws") + (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_CVSXWDP))] + "TARGET_VSX" + "xvcvsxwdp %x0,%x1" + [(set_attr "type" "vecdouble")]) + (define_insn "vsx_xvcvuxwdp" [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa") (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")] @@ -1923,6 +1942,14 @@ "xvcvuxwdp %x0,%x1" [(set_attr "type" "vecdouble")]) +(define_insn "vsx_xvcvuxwdp_df" + [(set (match_operand:DF 0 "vsx_register_operand" "=ws") + (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_CVUXWDP))] + "TARGET_VSX" + "xvcvuxwdp %x0,%x1" + [(set_attr "type" "vecdouble")]) + (define_insn "vsx_xvcvspsxds" [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa") (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")] @@ -2559,11 +2586,10 @@ (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")]))))] "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB" { - int element = INTVAL (operands[2]); + /* Note, the element number has already been adjusted for endianness, so we + don't have to adjust it here. */ int unit_size = GET_MODE_UNIT_SIZE (<MODE>mode); - int offset = ((VECTOR_ELT_ORDER_BIG) - ? unit_size * element - : unit_size * (GET_MODE_NUNITS (<MODE>mode) - 1 - element)); + HOST_WIDE_INT offset = unit_size * INTVAL (operands[2]); operands[2] = GEN_INT (offset); if (unit_size == 4) @@ -2574,11 +2600,11 @@ [(set_attr "type" "vecsimple")]) (define_insn_and_split "*vsx_extract_si" - [(set (match_operand:SI 0 "nonimmediate_operand" "=r,Z,Z,wJwK") + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wHwI,Z") (vec_select:SI - (match_operand:V4SI 1 "gpc_reg_operand" "v,wJwK,v,v") - (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")]))) - (clobber (match_scratch:V4SI 3 "=v,wJwK,v,v"))] + (match_operand:V4SI 1 "gpc_reg_operand" "wJv,wJv,wJv") + (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")]))) + (clobber (match_scratch:V4SI 3 "=wJv,wJv,wJv"))] "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" "#" "&& reload_completed" @@ -2628,7 +2654,7 @@ DONE; } - [(set_attr "type" "mftgpr,fpstore,fpstore,vecsimple") + [(set_attr "type" "mftgpr,vecperm,fpstore") (set_attr "length" "8")]) (define_insn_and_split "*vsx_extract_<mode>_p8" @@ -2714,6 +2740,107 @@ DONE; }) +;; VSX_EXTRACT optimizations +;; Optimize double d = (double) vec_extract (vi, <n>) +;; Get the element into the top position and use XVCVSWDP/XVCVUWDP +(define_insn_and_split "*vsx_extract_si_<uns>float_df" + [(set (match_operand:DF 0 "gpc_reg_operand" "=ws") + (any_float:DF + (vec_select:SI + (match_operand:V4SI 1 "gpc_reg_operand" "v") + (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")])))) + (clobber (match_scratch:V4SI 3 "=v"))] + "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" + "#" + "&& 1" + [(const_int 0)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx element = operands[2]; + rtx v4si_tmp = operands[3]; + int value; + + if (!VECTOR_ELT_ORDER_BIG) + element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element)); + + /* If the value is in the correct position, we can avoid doing the VSPLT<x> + instruction. */ + value = INTVAL (element); + if (value != 0) + { + if (GET_CODE (v4si_tmp) == SCRATCH) + v4si_tmp = gen_reg_rtx (V4SImode); + emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element)); + } + else + v4si_tmp = src; + + emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp)); + DONE; +}) + +;; Optimize <type> f = (<type>) vec_extract (vi, <n>) +;; where <type> is a floating point type that supported by the hardware that is +;; not double. First convert the value to double, and then to the desired +;; type. +(define_insn_and_split "*vsx_extract_si_<uns>float_<mode>" + [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=ww") + (any_float:VSX_EXTRACT_FL + (vec_select:SI + (match_operand:V4SI 1 "gpc_reg_operand" "v") + (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")])))) + (clobber (match_scratch:V4SI 3 "=v")) + (clobber (match_scratch:DF 4 "=ws"))] + "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" + "#" + "&& 1" + [(const_int 0)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx element = operands[2]; + rtx v4si_tmp = operands[3]; + rtx df_tmp = operands[4]; + int value; + + if (!VECTOR_ELT_ORDER_BIG) + element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element)); + + /* If the value is in the correct position, we can avoid doing the VSPLT<x> + instruction. */ + value = INTVAL (element); + if (value != 0) + { + if (GET_CODE (v4si_tmp) == SCRATCH) + v4si_tmp = gen_reg_rtx (V4SImode); + emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element)); + } + else + v4si_tmp = src; + + if (GET_CODE (df_tmp) == SCRATCH) + df_tmp = gen_reg_rtx (DFmode); + + emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp)); + + if (<MODE>mode == SFmode) + emit_insn (gen_truncdfsf2 (dest, df_tmp)); + else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode)) + emit_insn (gen_extenddftf2_vsx (dest, df_tmp)); + else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode) + && TARGET_FLOAT128_HW) + emit_insn (gen_extenddftf2_hw (dest, df_tmp)); + else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode)) + emit_insn (gen_extenddfif2 (dest, df_tmp)); + else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW) + emit_insn (gen_extenddfkf2_hw (dest, df_tmp)); + else + gcc_unreachable (); + + DONE; +}) + ;; Expanders for builtins (define_expand "vsx_mergel_<mode>" [(use (match_operand:VSX_D 0 "vsx_register_operand" "")) |