1 files changed, 289 insertions, 0 deletions
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 141aa4237c3..4d73f0abe0a 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -21,6 +21,9 @@
 ;; Iterator for comparison types
 (define_code_iterator CMP_TEST [eq lt gt unordered])
 
+;; Mode attribute for vector floate and floato conversions
+(define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")])
+
 ;; Iterator for both scalar and vector floating point types supported by VSX
 (define_mode_iterator VSX_B [DF V4SF V2DF])
 
@@ -331,6 +334,14 @@
    UNSPEC_VSX_CVUXDSP
    UNSPEC_VSX_CVSPSXDS
    UNSPEC_VSX_CVSPUXDS
+   UNSPEC_VSX_CVSXWSP
+   UNSPEC_VSX_CVUXWSP
+   UNSPEC_VSX_FLOAT2
+   UNSPEC_VSX_UNS_FLOAT2
+   UNSPEC_VSX_FLOATE
+   UNSPEC_VSX_UNS_FLOATE
+   UNSPEC_VSX_FLOATO
+   UNSPEC_VSX_UNS_FLOATO
    UNSPEC_VSX_TDIV
    UNSPEC_VSX_TSQRT
    UNSPEC_VSX_SET
@@ -1976,6 +1987,156 @@
   "xvcvspuxds %x0,%x1"
   [(set_attr "type" "vecdouble")])
 
+(define_insn "vsx_xvcvsxwsp"
+  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
+	(unspec:V4SF [(match_operand:V4SI 1 "vsx_register_operand" "wa")]
+		     UNSPEC_VSX_CVSXWSP))]
+  "VECTOR_UNIT_VSX_P (V4SFmode)"
+  "xvcvsxwsp %x0,%x1"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "vsx_xvcvuxwsp"
+  [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
+	(unspec:V4SF[(match_operand:V4SI 1 "vsx_register_operand" "wa")]
+		    UNSPEC_VSX_CVUXWSP))]
+  "VECTOR_UNIT_VSX_P (V4SFmode)"
+  "xvcvuxwsp %x0,%x1"
+  [(set_attr "type" "vecfloat")])
+
+;; Generate float2
+;; convert two long long signed ints to float
+(define_expand "float2_v2di"
+  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
+   (use (match_operand:V2DI 1 "register_operand" "wa"))
+   (use (match_operand:V2DI 2 "register_operand" "wa"))]
+ "VECTOR_UNIT_VSX_P (V4SFmode)"
+{
+  rtx rtx_src1, rtx_src2, rtx_dst;
+
+  rtx_dst = operands[0];
+  rtx_src1 = operands[1];
+  rtx_src2 = operands[2];
+
+  rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
+  DONE;
+})
+
+;; Generate uns_float2
+;; convert two long long unsigned ints to float
+(define_expand "uns_float2_v2di"
+  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
+   (use (match_operand:V2DI 1 "register_operand" "wa"))
+   (use (match_operand:V2DI 2 "register_operand" "wa"))]
+ "VECTOR_UNIT_VSX_P (V4SFmode)"
+{
+  rtx rtx_src1, rtx_src2, rtx_dst;
+
+  rtx_dst = operands[0];
+  rtx_src1 = operands[1];
+  rtx_src2 = operands[2];
+
+  rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2);
+  DONE;
+})
+
+;; Generate floate
+;; convert  double or long long signed to float
+;; (Only even words are valid, BE numbering)
+(define_expand "floate<mode>"
+  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
+   (use (match_operand:VSX_D 1 "register_operand" "wa"))]
+  "VECTOR_UNIT_VSX_P (V4SFmode)"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    {
+      /* Shift left one word to put even word correct location */
+      rtx rtx_tmp;
+      rtx rtx_val = GEN_INT (4);
+
+      rtx_tmp = gen_reg_rtx (V4SFmode);
+      emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
+      emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
+		 rtx_tmp, rtx_tmp, rtx_val));
+    }
+  else
+    emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
+
+  DONE;
+})
+
+;; Generate uns_floate
+;; convert long long unsigned to float
+;; (Only even words are valid, BE numbering)
+(define_expand "unsfloatev2di"
+  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
+   (use (match_operand:V2DI 1 "register_operand" "wa"))]
+  "VECTOR_UNIT_VSX_P (V4SFmode)"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    {
+      /* Shift left one word to put even word correct location */
+      rtx rtx_tmp;
+      rtx rtx_val = GEN_INT (4);
+
+      rtx_tmp = gen_reg_rtx (V4SFmode);
+      emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
+      emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
+		 rtx_tmp, rtx_tmp, rtx_val));
+    }
+  else
+    emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
+
+  DONE;
+})
+
+;; Generate floato
+;; convert double or long long signed to float
+;; Only odd words are valid, BE numbering)
+(define_expand "floato<mode>"
+  [(use (match_operand:V4SF 0 "register_operand" "=wa"))
+   (use (match_operand:VSX_D 1 "register_operand" "wa"))]
+  "VECTOR_UNIT_VSX_P (V4SFmode)"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1]));
+  else
+    {
+      /* Shift left one word to put odd word correct location */
+      rtx rtx_tmp;
+      rtx rtx_val = GEN_INT (4);
+
+      rtx_tmp = gen_reg_rtx (V4SFmode);
+      emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1]));
+      emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
+		 rtx_tmp, rtx_tmp, rtx_val));
+    }
+  DONE;
+})
+
+;; Generate uns_floato
+;; convert long long unsigned to float
+;; (Only odd words are valid, BE numbering)
+(define_expand "unsfloatov2di"
+ [(use (match_operand:V4SF 0 "register_operand" "=wa"))
+  (use (match_operand:V2DI 1 "register_operand" "wa"))]
+ "VECTOR_UNIT_VSX_P (V4SFmode)"
+{
+  if (VECTOR_ELT_ORDER_BIG)
+    emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1]));
+  else
+    {
+      /* Shift left one word to put odd word correct location */
+      rtx rtx_tmp;
+      rtx rtx_val = GEN_INT (4);
+
+      rtx_tmp = gen_reg_rtx (V4SFmode);
+      emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1]));
+      emit_insn (gen_altivec_vsldoi_v4sf (operands[0],
+		 rtx_tmp, rtx_tmp, rtx_val));
+    }
+  DONE;
+})
+
 ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since
 ;; since the xvrdpiz instruction does not truncate the value if the floating
 ;; point value is < LONG_MIN or > LONG_MAX.
@@ -3012,6 +3173,134 @@
 }
   [(set_attr "type" "vecperm")])
 
+(define_insn_and_split "vsx_set_v4sf_p9"
+  [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
+	(unspec:V4SF
+	 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
+	  (match_operand:SF 2 "gpc_reg_operand" "ww")
+	  (match_operand:QI 3 "const_0_to_3_operand" "n")]
+	 UNSPEC_VSX_SET))
+   (clobber (match_scratch:SI 4 "=&wJwK"))]
+  "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER
+   && TARGET_UPPER_REGS_DI && TARGET_POWERPC64"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 5)
+	(unspec:V4SF [(match_dup 2)]
+		     UNSPEC_VSX_CVDPSPN))
+   (parallel [(set (match_dup 4)
+		   (vec_select:SI (match_dup 6)
+				  (parallel [(match_dup 7)])))
+	      (clobber (scratch:SI))])
+   (set (match_dup 8)
+	(unspec:V4SI [(match_dup 8)
+		      (match_dup 4)
+		      (match_dup 3)]
+		     UNSPEC_VSX_SET))]
+{
+  unsigned int tmp_regno = reg_or_subregno (operands[4]);
+
+  operands[5] = gen_rtx_REG (V4SFmode, tmp_regno);
+  operands[6] = gen_rtx_REG (V4SImode, tmp_regno);
+  operands[7] = GEN_INT (VECTOR_ELT_ORDER_BIG ? 1 : 2);
+  operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
+}
+  [(set_attr "type" "vecperm")
+   (set_attr "length" "12")])
+
+;; Special case setting 0.0f to a V4SF element
+(define_insn_and_split "*vsx_set_v4sf_p9_zero"
+  [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
+	(unspec:V4SF
+	 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
+	  (match_operand:SF 2 "zero_fp_constant" "j")
+	  (match_operand:QI 3 "const_0_to_3_operand" "n")]
+	 UNSPEC_VSX_SET))
+   (clobber (match_scratch:SI 4 "=&wJwK"))]
+  "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER
+   && TARGET_UPPER_REGS_DI && TARGET_POWERPC64"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4)
+	(const_int 0))
+   (set (match_dup 5)
+	(unspec:V4SI [(match_dup 5)
+		      (match_dup 4)
+		      (match_dup 3)]
+		     UNSPEC_VSX_SET))]
+{
+  operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0]));
+}
+  [(set_attr "type" "vecperm")
+   (set_attr "length" "8")])
+
+;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element
+;; that is in the default scalar position (1 for big endian, 2 for little
+;; endian).  We just need to do an xxinsertw since the element is in the
+;; correct location.
+
+(define_insn "*vsx_insert_extract_v4sf_p9"
+  [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
+	(unspec:V4SF
+	 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
+	  (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
+			 (parallel
+			  [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
+	  (match_operand:QI 4 "const_0_to_3_operand" "n")]
+	 UNSPEC_VSX_SET))]
+  "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER
+   && TARGET_UPPER_REGS_DI && TARGET_POWERPC64
+   && (INTVAL (operands[3]) == (VECTOR_ELT_ORDER_BIG ? 1 : 2))"
+{
+  int ele = INTVAL (operands[4]);
+
+  if (!VECTOR_ELT_ORDER_BIG)
+    ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele;
+
+  operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele);
+  return "xxinsertw %x0,%x2,%4";
+}
+  [(set_attr "type" "vecperm")])
+
+;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element
+;; that is in the default scalar position (1 for big endian, 2 for little
+;; endian).  Convert the insert/extract to int and avoid doing the conversion.
+
+(define_insn_and_split "*vsx_insert_extract_v4sf_p9_2"
+  [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa")
+	(unspec:V4SF
+	 [(match_operand:V4SF 1 "gpc_reg_operand" "0")
+	  (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa")
+			 (parallel
+			  [(match_operand:QI 3 "const_0_to_3_operand" "n")]))
+	  (match_operand:QI 4 "const_0_to_3_operand" "n")]
+	 UNSPEC_VSX_SET))
+   (clobber (match_scratch:SI 5 "=&wJwK"))]
+  "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode)
+   && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER
+   && TARGET_UPPER_REGS_DI && TARGET_POWERPC64
+   && (INTVAL (operands[3]) != (VECTOR_ELT_ORDER_BIG ? 1 : 2))"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 5)
+		   (vec_select:SI (match_dup 6)
+				  (parallel [(match_dup 3)])))
+	      (clobber (scratch:SI))])
+   (set (match_dup 7)
+	(unspec:V4SI [(match_dup 8)
+		      (match_dup 5)
+		      (match_dup 4)]
+		     UNSPEC_VSX_SET))]
+{
+  if (GET_CODE (operands[5]) == SCRATCH)
+    operands[5] = gen_reg_rtx (SImode);
+
+  operands[6] = gen_lowpart (V4SImode, operands[2]);
+  operands[7] = gen_lowpart (V4SImode, operands[0]);
+  operands[8] = gen_lowpart (V4SImode, operands[1]);
+}
+  [(set_attr "type" "vecperm")])
+
 ;; Expanders for builtins
 (define_expand "vsx_mergel_<mode>"
   [(use (match_operand:VSX_D 0 "vsx_register_operand" ""))