summaryrefslogtreecommitdiff
path: root/gcc/config/rs6000/vsx.md
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/rs6000/vsx.md')
-rw-r--r--gcc/config/rs6000/vsx.md289
1 files changed, 289 insertions, 0 deletions
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 141aa4237c3..4d73f0abe0a 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -21,6 +21,9 @@
;; Iterator for comparison types
(define_code_iterator CMP_TEST [eq lt gt unordered])
+;; Mode attribute for vector floate and floato conversions
+(define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
+
;; Iterator for both scalar and vector floating point types supported by VSX
(define_mode_iterator VSX_B [DF V4SF V2DF])
@@ -331,6 +334,14 @@
UNSPEC_VSX_CVUXDSP
UNSPEC_VSX_CVSPSXDS
UNSPEC_VSX_CVSPUXDS
+ UNSPEC_VSX_CVSXWSP
+ UNSPEC_VSX_CVUXWSP
+ UNSPEC_VSX_FLOAT2
+ UNSPEC_VSX_UNS_FLOAT2
+ UNSPEC_VSX_FLOATE
+ UNSPEC_VSX_UNS_FLOATE
+ UNSPEC_VSX_FLOATO
+ UNSPEC_VSX_UNS_FLOATO
UNSPEC_VSX_TDIV
UNSPEC_VSX_TSQRT
UNSPEC_VSX_SET
@@ -1976,6 +1987,156 @@
"xvcvspuxds %x0,%x1"
[(set_attr "type" "vecdouble")])
+(define_insn "vsx_xvcvsxwsp"
+ [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
+ (unspec:V4SF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
+ UNSPEC_VSX_CVSXWSP))]
+ "VECTOR_UNIT_VSX_P (V4SFmode)"
+ "xvcvsxwsp %x0,%x1"
+ [(set_attr "type" "vecfloat")])
+
+(define_insn "vsx_xvcvuxwsp"
+ [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
+ (unspec:V4SF[(match_operand:V4SI 1 "vsx_register_operand" "wa")]
+ UNSPEC_VSX_CVUXWSP))]
+ "VECTOR_UNIT_VSX_P (V4SFmode)"
+ "xvcvuxwsp %x0,%x1"
+ [(set_attr "type" "vecfloat")])
+
+;; Generate float2
+;; convert two long long signed ints to float
+(define_expand "float2_v2di"
+ [(use (match_operand:V4SF 0 "register_operand" "=wa"))
+ (use (match_operand:V2DI 1 "register_operand" "wa"))
+ (use (match_operand:V2DI 2 "register_operand" "wa"))]
+ "VECTOR_UNIT_VSX_P (V4SFmode)"
+{
+ rtx rtx_src1, rtx_src2, rtx_dst;
+
+ rtx_dst = operands[0];
+ rtx_src1 = operands[1];
+ rtx_src2 = operands[2];
+
+ rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
+ DONE;
+})
+
+;; Generate uns_float2
+;; convert two long long unsigned ints to float
+(define_expand "uns_float2_v2di"
+ [(use (match_operand:V4SF 0 "register_operand" "=wa"))
+ (use (match_operand:V2DI 1 "register_operand" "wa"))
+ (use (match_operand:V2DI 2 "register_operand" "wa"))]
+ "VECTOR_UNIT_VSX_P (V4SFmode)"
+{
+ rtx rtx_src1, rtx_src2, rtx_dst;
+
+ rtx_dst = operands[0];
+ rtx_src1 = operands[1];
+ rtx_src2 = operands[2];
+
+ rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
+ DONE;
+})
+
+;; Generate floate
+;; convert double or long long signed to float
+;; (Only even words are valid, BE numbering)
+(define_expand "floate<mode>"
+ [(use (match_operand:V4SF 0 "register_operand" "=wa"))
+ (use (match_operand:VSX_D 1 "register_operand" "wa"))]
+ "VECTOR_UNIT_VSX_P (V4SFmode)"
+{
+ if (VECTOR_ELT_ORDER_BIG)
+ {
+ /* Shift left one word to put even word correct location */
+ rtx rtx_tmp;
+ rtx rtx_val = GEN_INT (4);
+
+ rtx_tmp = gen_reg_rtx (V4SFmode);
+ emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
+ emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
+ rtx_tmp, rtx_tmp, rtx_val));
+ }
+ else
+ emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
+
+ DONE;
+})
+
+;; Generate uns_floate
+;; convert long long unsigned to float
+;; (Only even words are valid, BE numbering)
+(define_expand "unsfloatev2di"
+ [(use (match_operand:V4SF 0 "register_operand" "=wa"))
+ (use (match_operand:V2DI 1 "register_operand" "wa"))]
+ "VECTOR_UNIT_VSX_P (V4SFmode)"
+{
+ if (VECTOR_ELT_ORDER_BIG)
+ {
+ /* Shift left one word to put even word correct location */
+ rtx rtx_tmp;
+ rtx rtx_val = GEN_INT (4);
+
+ rtx_tmp = gen_reg_rtx (V4SFmode);
+ emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
+ emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
+ rtx_tmp, rtx_tmp, rtx_val));
+ }
+ else
+ emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
+
+ DONE;
+})
+
+;; Generate floato
+;; convert double or long long signed to float
+;; Only odd words are valid, BE numbering)
+(define_expand "floato<mode>"
+ [(use (match_operand:V4SF 0 "register_operand" "=wa"))
+ (use (match_operand:VSX_D 1 "register_operand" "wa"))]
+ "VECTOR_UNIT_VSX_P (V4SFmode)"
+{
+ if (VECTOR_ELT_ORDER_BIG)
+ emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
+ else
+ {
+ /* Shift left one word to put odd word correct location */
+ rtx rtx_tmp;
+ rtx rtx_val = GEN_INT (4);
+
+ rtx_tmp = gen_reg_rtx (V4SFmode);
+ emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
+ emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
+ rtx_tmp, rtx_tmp, rtx_val));
+ }
+ DONE;
+})
+
+;; Generate uns_floato
+;; convert long long unsigned to float
+;; (Only odd words are valid, BE numbering)
+(define_expand "unsfloatov2di"
+ [(use (match_operand:V4SF 0 "register_operand" "=wa"))
+ (use (match_operand:V2DI 1 "register_operand" "wa"))]
+ "VECTOR_UNIT_VSX_P (V4SFmode)"
+{
+ if (VECTOR_ELT_ORDER_BIG)
+ emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
+ else
+ {
+ /* Shift left one word to put odd word correct location */
+ rtx rtx_tmp;
+ rtx rtx_val = GEN_INT (4);
+
+ rtx_tmp = gen_reg_rtx (V4SFmode);
+ emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
+ emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
+ rtx_tmp, rtx_tmp, rtx_val));
+ }
+ DONE;
+})
+
;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
;; since the xvrdpiz instruction does not truncate the value if the floating
;; point value is < LONG_MIN or > LONG_MAX.
@@ -3012,6 +3173,134 @@
}
[(set_attr "type" "vecperm")])
+(define_insn_and_split "vsx_set_v4sf_p9"
+ [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
+ (unspec:V4SF
+ [(match_operand:V4SF 1 "gpc_reg_operand" "0")
+ (match_operand:SF 2 "gpc_reg_operand" "ww")
+ (match_operand:QI 3 "const_0_to_3_operand" "n")]
+ UNSPEC_VSX_SET))
+ (clobber (match_scratch:SI 4 "=&wJwK"))]
+ "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER
+ && TARGET_UPPER_REGS_DI && TARGET_POWERPC64"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 5)
+ (unspec:V4SF [(match_dup 2)]
+ UNSPEC_VSX_CVDPSPN))
+ (parallel [(set (match_dup 4)
+ (vec_select:SI (match_dup 6)
+ (parallel [(match_dup 7)])))
+ (clobber (scratch:SI))])
+ (set (match_dup 8)
+ (unspec:V4SI [(match_dup 8)
+ (match_dup 4)
+ (match_dup 3)]
+ UNSPEC_VSX_SET))]
+{
+ unsigned int tmp_regno = reg_or_subregno (operands[4]);
+
+ operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);
+ operands[6] = gen_rtx_REG (V4SImode, tmp_regno);
+ operands[7] = GEN_INT (VECTOR_ELT_ORDER_BIG ? 1 : 2);
+ operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
+}
+ [(set_attr "type" "vecperm")
+ (set_attr "length" "12")])
+
+;; Special case setting 0.0f to a V4SF element
+(define_insn_and_split "*vsx_set_v4sf_p9_zero"
+ [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
+ (unspec:V4SF
+ [(match_operand:V4SF 1 "gpc_reg_operand" "0")
+ (match_operand:SF 2 "zero_fp_constant" "j")
+ (match_operand:QI 3 "const_0_to_3_operand" "n")]
+ UNSPEC_VSX_SET))
+ (clobber (match_scratch:SI 4 "=&wJwK"))]
+ "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER
+ && TARGET_UPPER_REGS_DI && TARGET_POWERPC64"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 4)
+ (const_int 0))
+ (set (match_dup 5)
+ (unspec:V4SI [(match_dup 5)
+ (match_dup 4)
+ (match_dup 3)]
+ UNSPEC_VSX_SET))]
+{
+ operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
+}
+ [(set_attr "type" "vecperm")
+ (set_attr "length" "8")])
+
+;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element
+;; that is in the default scalar position (1 for big endian, 2 for little
+;; endian). We just need to do an xxinsertw since the element is in the
+;; correct location.
+
+(define_insn "*vsx_insert_extract_v4sf_p9"
+ [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
+ (unspec:V4SF
+ [(match_operand:V4SF 1 "gpc_reg_operand" "0")
+ (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
+ (parallel
+ [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
+ (match_operand:QI 4 "const_0_to_3_operand" "n")]
+ UNSPEC_VSX_SET))]
+ "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER
+ && TARGET_UPPER_REGS_DI && TARGET_POWERPC64
+ && (INTVAL (operands[3]) == (VECTOR_ELT_ORDER_BIG ? 1 : 2))"
+{
+ int ele = INTVAL (operands[4]);
+
+ if (!VECTOR_ELT_ORDER_BIG)
+ ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele;
+
+ operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);
+ return "xxinsertw %x0,%x2,%4";
+}
+ [(set_attr "type" "vecperm")])
+
+;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element
+;; that is in the default scalar position (1 for big endian, 2 for little
+;; endian). Convert the insert/extract to int and avoid doing the conversion.
+
+(define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"
+ [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
+ (unspec:V4SF
+ [(match_operand:V4SF 1 "gpc_reg_operand" "0")
+ (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
+ (parallel
+ [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
+ (match_operand:QI 4 "const_0_to_3_operand" "n")]
+ UNSPEC_VSX_SET))
+ (clobber (match_scratch:SI 5 "=&wJwK"))]
+ "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
+ && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER
+ && TARGET_UPPER_REGS_DI && TARGET_POWERPC64
+ && (INTVAL (operands[3]) != (VECTOR_ELT_ORDER_BIG ? 1 : 2))"
+ "#"
+ "&& 1"
+ [(parallel [(set (match_dup 5)
+ (vec_select:SI (match_dup 6)
+ (parallel [(match_dup 3)])))
+ (clobber (scratch:SI))])
+ (set (match_dup 7)
+ (unspec:V4SI [(match_dup 8)
+ (match_dup 5)
+ (match_dup 4)]
+ UNSPEC_VSX_SET))]
+{
+ if (GET_CODE (operands[5]) == SCRATCH)
+ operands[5] = gen_reg_rtx (SImode);
+
+ operands[6] = gen_lowpart (V4SImode, operands[2]);
+ operands[7] = gen_lowpart (V4SImode, operands[0]);
+ operands[8] = gen_lowpart (V4SImode, operands[1]);
+}
+ [(set_attr "type" "vecperm")])
+
;; Expanders for builtins
(define_expand "vsx_mergel_<mode>"
[(use (match_operand:VSX_D 0 "vsx_register_operand" ""))