diff options
Diffstat (limited to 'gcc/config/rs6000/vsx.md')
-rw-r--r-- | gcc/config/rs6000/vsx.md | 289 |
1 files changed, 289 insertions, 0 deletions
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 141aa4237c3..4d73f0abe0a 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -21,6 +21,9 @@ ;; Iterator for comparison types (define_code_iterator CMP_TEST [eq lt gt unordered]) +;; Mode attribute for vector floate and floato conversions +(define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")]) + ;; Iterator for both scalar and vector floating point types supported by VSX (define_mode_iterator VSX_B [DF V4SF V2DF]) @@ -331,6 +334,14 @@ UNSPEC_VSX_CVUXDSP UNSPEC_VSX_CVSPSXDS UNSPEC_VSX_CVSPUXDS + UNSPEC_VSX_CVSXWSP + UNSPEC_VSX_CVUXWSP + UNSPEC_VSX_FLOAT2 + UNSPEC_VSX_UNS_FLOAT2 + UNSPEC_VSX_FLOATE + UNSPEC_VSX_UNS_FLOATE + UNSPEC_VSX_FLOATO + UNSPEC_VSX_UNS_FLOATO UNSPEC_VSX_TDIV UNSPEC_VSX_TSQRT UNSPEC_VSX_SET @@ -1976,6 +1987,156 @@ "xvcvspuxds %x0,%x1" [(set_attr "type" "vecdouble")]) +(define_insn "vsx_xvcvsxwsp" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") + (unspec:V4SF [(match_operand:V4SI 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_CVSXWSP))] + "VECTOR_UNIT_VSX_P (V4SFmode)" + "xvcvsxwsp %x0,%x1" + [(set_attr "type" "vecfloat")]) + +(define_insn "vsx_xvcvuxwsp" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") + (unspec:V4SF[(match_operand:V4SI 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_CVUXWSP))] + "VECTOR_UNIT_VSX_P (V4SFmode)" + "xvcvuxwsp %x0,%x1" + [(set_attr "type" "vecfloat")]) + +;; Generate float2 +;; convert two long long signed ints to float +(define_expand "float2_v2di" + [(use (match_operand:V4SF 0 "register_operand" "=wa")) + (use (match_operand:V2DI 1 "register_operand" "wa")) + (use (match_operand:V2DI 2 "register_operand" "wa"))] + "VECTOR_UNIT_VSX_P (V4SFmode)" +{ + rtx rtx_src1, rtx_src2, rtx_dst; + + rtx_dst = operands[0]; + rtx_src1 = operands[1]; + rtx_src2 = operands[2]; + + rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2); + DONE; +}) + +;; Generate uns_float2 +;; convert two long long unsigned ints to float +(define_expand "uns_float2_v2di" + [(use (match_operand:V4SF 0 "register_operand" "=wa")) + (use (match_operand:V2DI 1 "register_operand" "wa")) + (use (match_operand:V2DI 2 "register_operand" "wa"))] + "VECTOR_UNIT_VSX_P (V4SFmode)" +{ + rtx rtx_src1, rtx_src2, rtx_dst; + + rtx_dst = operands[0]; + rtx_src1 = operands[1]; + rtx_src2 = operands[2]; + + rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2); + DONE; +}) + +;; Generate floate +;; convert double or long long signed to float +;; (Only even words are valid, BE numbering) +(define_expand "floate<mode>" + [(use (match_operand:V4SF 0 "register_operand" "=wa")) + (use (match_operand:VSX_D 1 "register_operand" "wa"))] + "VECTOR_UNIT_VSX_P (V4SFmode)" +{ + if (VECTOR_ELT_ORDER_BIG) + { + /* Shift left one word to put even word correct location */ + rtx rtx_tmp; + rtx rtx_val = GEN_INT (4); + + rtx_tmp = gen_reg_rtx (V4SFmode); + emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1])); + emit_insn (gen_altivec_vsldoi_v4sf (operands[0], + rtx_tmp, rtx_tmp, rtx_val)); + } + else + emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1])); + + DONE; +}) + +;; Generate uns_floate +;; convert long long unsigned to float +;; (Only even words are valid, BE numbering) +(define_expand "unsfloatev2di" + [(use (match_operand:V4SF 0 "register_operand" "=wa")) + (use (match_operand:V2DI 1 "register_operand" "wa"))] + "VECTOR_UNIT_VSX_P (V4SFmode)" +{ + if (VECTOR_ELT_ORDER_BIG) + { + /* Shift left one word to put even word correct location */ + rtx rtx_tmp; + rtx rtx_val = GEN_INT (4); + + rtx_tmp = gen_reg_rtx (V4SFmode); + emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1])); + emit_insn (gen_altivec_vsldoi_v4sf (operands[0], + rtx_tmp, rtx_tmp, rtx_val)); + } + else + emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1])); + + DONE; +}) + +;; Generate floato +;; convert double or long long signed to float +;; Only odd words are valid, BE numbering) +(define_expand "floato<mode>" + [(use (match_operand:V4SF 0 "register_operand" "=wa")) + (use (match_operand:VSX_D 1 "register_operand" "wa"))] + "VECTOR_UNIT_VSX_P (V4SFmode)" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1])); + else + { + /* Shift left one word to put odd word correct location */ + rtx rtx_tmp; + rtx rtx_val = GEN_INT (4); + + rtx_tmp = gen_reg_rtx (V4SFmode); + emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1])); + emit_insn (gen_altivec_vsldoi_v4sf (operands[0], + rtx_tmp, rtx_tmp, rtx_val)); + } + DONE; +}) + +;; Generate uns_floato +;; convert long long unsigned to float +;; (Only odd words are valid, BE numbering) +(define_expand "unsfloatov2di" + [(use (match_operand:V4SF 0 "register_operand" "=wa")) + (use (match_operand:V2DI 1 "register_operand" "wa"))] + "VECTOR_UNIT_VSX_P (V4SFmode)" +{ + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1])); + else + { + /* Shift left one word to put odd word correct location */ + rtx rtx_tmp; + rtx rtx_val = GEN_INT (4); + + rtx_tmp = gen_reg_rtx (V4SFmode); + emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1])); + emit_insn (gen_altivec_vsldoi_v4sf (operands[0], + rtx_tmp, rtx_tmp, rtx_val)); + } + DONE; +}) + ;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since ;; since the xvrdpiz instruction does not truncate the value if the floating ;; point value is < LONG_MIN or > LONG_MAX. @@ -3012,6 +3173,134 @@ } [(set_attr "type" "vecperm")]) +(define_insn_and_split "vsx_set_v4sf_p9" + [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") + (unspec:V4SF + [(match_operand:V4SF 1 "gpc_reg_operand" "0") + (match_operand:SF 2 "gpc_reg_operand" "ww") + (match_operand:QI 3 "const_0_to_3_operand" "n")] + UNSPEC_VSX_SET)) + (clobber (match_scratch:SI 4 "=&wJwK"))] + "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER + && TARGET_UPPER_REGS_DI && TARGET_POWERPC64" + "#" + "&& reload_completed" + [(set (match_dup 5) + (unspec:V4SF [(match_dup 2)] + UNSPEC_VSX_CVDPSPN)) + (parallel [(set (match_dup 4) + (vec_select:SI (match_dup 6) + (parallel [(match_dup 7)]))) + (clobber (scratch:SI))]) + (set (match_dup 8) + (unspec:V4SI [(match_dup 8) + (match_dup 4) + (match_dup 3)] + UNSPEC_VSX_SET))] +{ + unsigned int tmp_regno = reg_or_subregno (operands[4]); + + operands[5] = gen_rtx_REG (V4SFmode, tmp_regno); + operands[6] = gen_rtx_REG (V4SImode, tmp_regno); + operands[7] = GEN_INT (VECTOR_ELT_ORDER_BIG ? 1 : 2); + operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0])); +} + [(set_attr "type" "vecperm") + (set_attr "length" "12")]) + +;; Special case setting 0.0f to a V4SF element +(define_insn_and_split "*vsx_set_v4sf_p9_zero" + [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") + (unspec:V4SF + [(match_operand:V4SF 1 "gpc_reg_operand" "0") + (match_operand:SF 2 "zero_fp_constant" "j") + (match_operand:QI 3 "const_0_to_3_operand" "n")] + UNSPEC_VSX_SET)) + (clobber (match_scratch:SI 4 "=&wJwK"))] + "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER + && TARGET_UPPER_REGS_DI && TARGET_POWERPC64" + "#" + "&& reload_completed" + [(set (match_dup 4) + (const_int 0)) + (set (match_dup 5) + (unspec:V4SI [(match_dup 5) + (match_dup 4) + (match_dup 3)] + UNSPEC_VSX_SET))] +{ + operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0])); +} + [(set_attr "type" "vecperm") + (set_attr "length" "8")]) + +;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element +;; that is in the default scalar position (1 for big endian, 2 for little +;; endian). We just need to do an xxinsertw since the element is in the +;; correct location. + +(define_insn "*vsx_insert_extract_v4sf_p9" + [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") + (unspec:V4SF + [(match_operand:V4SF 1 "gpc_reg_operand" "0") + (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa") + (parallel + [(match_operand:QI 3 "const_0_to_3_operand" "n")])) + (match_operand:QI 4 "const_0_to_3_operand" "n")] + UNSPEC_VSX_SET))] + "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER + && TARGET_UPPER_REGS_DI && TARGET_POWERPC64 + && (INTVAL (operands[3]) == (VECTOR_ELT_ORDER_BIG ? 1 : 2))" +{ + int ele = INTVAL (operands[4]); + + if (!VECTOR_ELT_ORDER_BIG) + ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele; + + operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele); + return "xxinsertw %x0,%x2,%4"; +} + [(set_attr "type" "vecperm")]) + +;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element +;; that is in the default scalar position (1 for big endian, 2 for little +;; endian). Convert the insert/extract to int and avoid doing the conversion. + +(define_insn_and_split "*vsx_insert_extract_v4sf_p9_2" + [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") + (unspec:V4SF + [(match_operand:V4SF 1 "gpc_reg_operand" "0") + (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa") + (parallel + [(match_operand:QI 3 "const_0_to_3_operand" "n")])) + (match_operand:QI 4 "const_0_to_3_operand" "n")] + UNSPEC_VSX_SET)) + (clobber (match_scratch:SI 5 "=&wJwK"))] + "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode) + && TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER + && TARGET_UPPER_REGS_DI && TARGET_POWERPC64 + && (INTVAL (operands[3]) != (VECTOR_ELT_ORDER_BIG ? 1 : 2))" + "#" + "&& 1" + [(parallel [(set (match_dup 5) + (vec_select:SI (match_dup 6) + (parallel [(match_dup 3)]))) + (clobber (scratch:SI))]) + (set (match_dup 7) + (unspec:V4SI [(match_dup 8) + (match_dup 5) + (match_dup 4)] + UNSPEC_VSX_SET))] +{ + if (GET_CODE (operands[5]) == SCRATCH) + operands[5] = gen_reg_rtx (SImode); + + operands[6] = gen_lowpart (V4SImode, operands[2]); + operands[7] = gen_lowpart (V4SImode, operands[0]); + operands[8] = gen_lowpart (V4SImode, operands[1]); +} + [(set_attr "type" "vecperm")]) + ;; Expanders for builtins (define_expand "vsx_mergel_<mode>" [(use (match_operand:VSX_D 0 "vsx_register_operand" "")) |