summaryrefslogtreecommitdiff
path: root/gcc/config/rs6000/vsx.md
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/rs6000/vsx.md')
-rw-r--r--gcc/config/rs6000/vsx.md145
1 files changed, 136 insertions, 9 deletions
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 18f3e86e29f..2c74a8ebbe2 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -288,6 +288,16 @@
(V8HI "v")
(V4SI "wa")])
+;; Mode iterator for binary floating types other than double to
+;; optimize convert to that floating point type from an extract
+;; of an integer type
+(define_mode_iterator VSX_EXTRACT_FL [SF
+ (IF "FLOAT128_2REG_P (IFmode)")
+ (KF "TARGET_FLOAT128_HW")
+ (TF "FLOAT128_2REG_P (TFmode)
+ || (FLOAT128_IEEE_P (TFmode)
+ && TARGET_FLOAT128_HW)")])
+
;; Iterator for the 2 short vector types to do a splat from an integer
(define_mode_iterator VSX_SPLAT_I [V16QI V8HI])
@@ -1907,6 +1917,7 @@
[(set_attr "type" "vecdouble")])
;; Convert from 32-bit to 64-bit types
+;; Provide both vector and scalar targets
(define_insn "vsx_xvcvsxwdp"
[(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
(unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
@@ -1915,6 +1926,14 @@
"xvcvsxwdp %x0,%x1"
[(set_attr "type" "vecdouble")])
+(define_insn "vsx_xvcvsxwdp_df"
+ [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
+ (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
+ UNSPEC_VSX_CVSXWDP))]
+ "TARGET_VSX"
+ "xvcvsxwdp %x0,%x1"
+ [(set_attr "type" "vecdouble")])
+
(define_insn "vsx_xvcvuxwdp"
[(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa")
(unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")]
@@ -1923,6 +1942,14 @@
"xvcvuxwdp %x0,%x1"
[(set_attr "type" "vecdouble")])
+(define_insn "vsx_xvcvuxwdp_df"
+ [(set (match_operand:DF 0 "vsx_register_operand" "=ws")
+ (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
+ UNSPEC_VSX_CVUXWDP))]
+ "TARGET_VSX"
+ "xvcvuxwdp %x0,%x1"
+ [(set_attr "type" "vecdouble")])
+
(define_insn "vsx_xvcvspsxds"
[(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa")
(unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")]
@@ -2559,11 +2586,10 @@
(parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")]))))]
"VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB"
{
- int element = INTVAL (operands[2]);
+ /* Note, the element number has already been adjusted for endianness, so we
+ don't have to adjust it here. */
int unit_size = GET_MODE_UNIT_SIZE (<MODE>mode);
- int offset = ((VECTOR_ELT_ORDER_BIG)
- ? unit_size * element
- : unit_size * (GET_MODE_NUNITS (<MODE>mode) - 1 - element));
+ HOST_WIDE_INT offset = unit_size * INTVAL (operands[2]);
operands[2] = GEN_INT (offset);
if (unit_size == 4)
@@ -2574,11 +2600,11 @@
[(set_attr "type" "vecsimple")])
(define_insn_and_split "*vsx_extract_si"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=r,Z,Z,wJwK")
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wHwI,Z")
(vec_select:SI
- (match_operand:V4SI 1 "gpc_reg_operand" "v,wJwK,v,v")
- (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
- (clobber (match_scratch:V4SI 3 "=v,wJwK,v,v"))]
+ (match_operand:V4SI 1 "gpc_reg_operand" "wJv,wJv,wJv")
+ (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")])))
+ (clobber (match_scratch:V4SI 3 "=wJv,wJv,wJv"))]
"VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
"#"
"&& reload_completed"
@@ -2628,7 +2654,7 @@
DONE;
}
- [(set_attr "type" "mftgpr,fpstore,fpstore,vecsimple")
+ [(set_attr "type" "mftgpr,vecperm,fpstore")
(set_attr "length" "8")])
(define_insn_and_split "*vsx_extract_<mode>_p8"
@@ -2714,6 +2740,107 @@
DONE;
})
+;; VSX_EXTRACT optimizations
+;; Optimize double d = (double) vec_extract (vi, <n>)
+;; Get the element into the top position and use XVCVSWDP/XVCVUWDP
+(define_insn_and_split "*vsx_extract_si_<uns>float_df"
+ [(set (match_operand:DF 0 "gpc_reg_operand" "=ws")
+ (any_float:DF
+ (vec_select:SI
+ (match_operand:V4SI 1 "gpc_reg_operand" "v")
+ (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
+ (clobber (match_scratch:V4SI 3 "=v"))]
+ "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ rtx dest = operands[0];
+ rtx src = operands[1];
+ rtx element = operands[2];
+ rtx v4si_tmp = operands[3];
+ int value;
+
+ if (!VECTOR_ELT_ORDER_BIG)
+ element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
+
+ /* If the value is in the correct position, we can avoid doing the VSPLT<x>
+ instruction. */
+ value = INTVAL (element);
+ if (value != 0)
+ {
+ if (GET_CODE (v4si_tmp) == SCRATCH)
+ v4si_tmp = gen_reg_rtx (V4SImode);
+ emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
+ }
+ else
+ v4si_tmp = src;
+
+ emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp));
+ DONE;
+})
+
+;; Optimize <type> f = (<type>) vec_extract (vi, <n>)
+;; where <type> is a floating point type that supported by the hardware that is
+;; not double. First convert the value to double, and then to the desired
+;; type.
+(define_insn_and_split "*vsx_extract_si_<uns>float_<mode>"
+ [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=ww")
+ (any_float:VSX_EXTRACT_FL
+ (vec_select:SI
+ (match_operand:V4SI 1 "gpc_reg_operand" "v")
+ (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")]))))
+ (clobber (match_scratch:V4SI 3 "=v"))
+ (clobber (match_scratch:DF 4 "=ws"))]
+ "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ rtx dest = operands[0];
+ rtx src = operands[1];
+ rtx element = operands[2];
+ rtx v4si_tmp = operands[3];
+ rtx df_tmp = operands[4];
+ int value;
+
+ if (!VECTOR_ELT_ORDER_BIG)
+ element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element));
+
+ /* If the value is in the correct position, we can avoid doing the VSPLT<x>
+ instruction. */
+ value = INTVAL (element);
+ if (value != 0)
+ {
+ if (GET_CODE (v4si_tmp) == SCRATCH)
+ v4si_tmp = gen_reg_rtx (V4SImode);
+ emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element));
+ }
+ else
+ v4si_tmp = src;
+
+ if (GET_CODE (df_tmp) == SCRATCH)
+ df_tmp = gen_reg_rtx (DFmode);
+
+ emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp));
+
+ if (<MODE>mode == SFmode)
+ emit_insn (gen_truncdfsf2 (dest, df_tmp));
+ else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode))
+ emit_insn (gen_extenddftf2_vsx (dest, df_tmp));
+ else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode)
+ && TARGET_FLOAT128_HW)
+ emit_insn (gen_extenddftf2_hw (dest, df_tmp));
+ else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode))
+ emit_insn (gen_extenddfif2 (dest, df_tmp));
+ else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW)
+ emit_insn (gen_extenddfkf2_hw (dest, df_tmp));
+ else
+ gcc_unreachable ();
+
+ DONE;
+})
+
;; Expanders for builtins
(define_expand "vsx_mergel_<mode>"
[(use (match_operand:VSX_D 0 "vsx_register_operand" ""))