diff options
Diffstat (limited to 'gcc/config/gcn/gcn-valu.md')
-rw-r--r-- | gcc/config/gcn/gcn-valu.md | 982 |
1 files changed, 982 insertions, 0 deletions
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md new file mode 100644 index 00000000000..894340a46c5 --- /dev/null +++ b/gcc/config/gcn/gcn-valu.md @@ -0,0 +1,982 @@ +;; Copyright (C) 2016-2017 Free Software Foundation, Inc. + +;; This file is free software; you can redistribute it and/or modify it under +;; the terms of the GNU General Public License as published by the Free +;; Software Foundation; either version 3 of the License, or (at your option) +;; any later version. + +;; This file is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +;; for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;; ------------------------------------------------------------------------- +;; Moves +; Vector modes for one vector register +(define_mode_iterator VEC_1REG_MODE + [V64QI V64HI V64SI V64HF V64SF]) + +(define_mode_iterator VEC_1REG_INT_MODE + [V64QI V64HI V64SI]) + +; Vector modes for two vector registers +(define_mode_iterator VEC_2REG_MODE + [V64DI V64DF]) + +; All of above +(define_mode_iterator VEC_REG_MODE + [V64QI V64HI V64SI V64HF V64SF ; Single reg + V64DI V64DF]) ; Double reg + +; Modes supporting integer vector operations +(define_mode_iterator V_INT_MODE [SI V64SI]) + +; Modes we can perform flat memory operations in. +; FIXME: We can also do 96bit, 128bit and 256bit loads into multiple +; registers. Eventually add modes for this. +(define_mode_iterator VEC_FLAT_MODE + [V64QI V64HI V64SI V64HF V64SF ; Single reg + V64DI V64DF]) ; Double reg + +; Modes we can perform scalar flat memory operations in. +(define_mode_iterator SCALAR_FLAT_MODE + [BI QI HI HF SI SF ; single regs + DI DF V2SI V2SF ; two regs + V3SI V3SF ; three regs + TI V4SI V4SF V2DI V2DF]) ; four regs + +;; Mapping of full vector modes to shorter vectors +(define_mode_attr scalar_mode + [(V64QI "QI") (V64HI "HI") (V64SI "SI") + (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")]) + +(define_mode_attr v2_mode + [(V64QI "V2QI") (V64HI "V2HI") (V64SI "V2SI") + (V64HF "V2HF") (V64SF "V2SF") (V64DI "V2DI") (V64DF "V2DF")]) + +(define_mode_attr v4_mode + [(V64QI "V4QI") (V64HI "V4HI") (V64SI "V4SI") + (V64HF "V4HF") (V64SF "V4SF") (V64DI "V4DI") (V64DF "V4DF")]) + +(define_mode_attr v8_mode + [(V64QI "V8QI") (V64HI "V8HI") (V64SI "V8SI") + (V64HF "V8HF") (V64SF "V8SF") (V64DI "V8DI") (V64DF "V8DF")]) + +(define_mode_attr v16_mode + [(V64QI "V16QI") (V64HI "V16HI") (V64SI "V16SI") + (V64HF "V16HF") (V64SF "V16SF") (V64DI "V16DI") (V64DF "V16DF")]) + +(define_mode_attr v32_mode + [(V64QI "V32QI") (V64HI "V32HI") (V64SI "V32SI") + (V64HF "V32HF") (V64SF "V32SF") (V64DI "V32DI") (V64DF "V32DF")]) + +(define_insn "*simple_buffer_mov<mode>" + [(set (match_operand:SCALAR_FLAT_MODE 0 "nonimmediate_operand" "=RBm,v") + (match_operand:SCALAR_FLAT_MODE 1 "general_operand" "v, RBm")) + (use (match_operand:TI 2 "register_operand" "Sg, Sg")) + (use (match_operand:DI 3 "gcn_exec_reg_operand" "e, e"))] + "memory_operand (operands[0], VOIDmode) != memory_operand (operands[1], VOIDmode)" + "@ + buffer_store%s1 %1, off, %2, %A0\n\ts_waitcnt vmcnt(0) expcnt(0) + buffer_load%s0 %0, off, %2, %A1\n\ts_waitcnt vmcnt(0)" + [(set_attr "type" "flat") + (set_attr "mode" "<MODE>")]) + +;; This ethernal uglyness makes LRA to sort-of work. +(define_insn_and_split "*ugly_reload_mov<mode>" + [(set (match_operand:SCALAR_FLAT_MODE 0 "nonimmediate_operand" "=RBm,v, ^RBm,^v, SD, SD, v, v , ^v, ^v , v, Sm, RDRF,v") + (match_operand:SCALAR_FLAT_MODE 1 "general_operand" "v, RBm, v, RBm, SSB,SSn,vn, SS, vn, SS, SS, vSS,v ,RDRF")) + (use (match_operand:TI 2 "register_operand" "Sg, Sg, Sg, Sg, Sg, Sg, Sg, Sg, Sg, Sg, Sg, Sg, Sg ,Sg")) + (use (match_operand:DI 3 "gcn_exec_operand" "e ,e ,Sgn, Sgn, SSn,SSn,e ,e ,Sgn, Sgn,SSO,SSO,e ,e")) + (clobber (match_operand:DI 4 "register_operand" "=&Sg,Sg,Sg,Sg,Sg,Sg,Sg,Sg,Sg,Sg,Sg,Sg,Sg,Sg"))] + "(!immediate_operand (operands[1], VOIDmode) || register_operand (operands[0], VOIDmode)) + && (!memory_operand (operands[0], VOIDmode) || !memory_operand (operands[1], VOIDmode))" + "#" + "!memory_operand (operands[0],<MODE>mode) && !memory_operand (operands[1],<MODE>mode) + && !gcn_vgpr_register_operand (operands[0], <MODE>mode) + && !gcn_vgpr_register_operand (operands[1], <MODE>mode)" + [(set (match_dup 0) (match_dup 1))] +{} + [(set_attr "type" "flat") + (set_attr "mode" "<MODE>")]) + +(define_split + [(set (match_operand:SCALAR_FLAT_MODE 0 "nonimmediate_operand") + (match_operand:SCALAR_FLAT_MODE 1 "general_operand")) + (use (match_operand:TI 2 "register_operand")) + (use (match_operand:DI 3 "gcn_exec_operand")) + (clobber (match_scratch:DI 4 ""))] + "REG_P (operands[3]) && REGNO (operands[3]) == EXEC_REG + && (memory_operand (operands[0], VOIDmode) || memory_operand (operands[1], VOIDmode))" + [(parallel [(set (match_dup 0) (match_dup 1)) + (use (match_dup 2)) + (use (match_dup 3))])]) + +(define_split + [(set (match_operand:SCALAR_FLAT_MODE 0 "nonimmediate_operand") + (match_operand:SCALAR_FLAT_MODE 1 "general_operand")) + (use (match_operand:TI 2 "register_operand")) + (use (match_operand:DI 3 "gcn_exec_operand")) + (clobber (match_scratch:DI 4 ""))] + "REG_P (operands[3]) && REGNO (operands[3]) == EXEC_REG" + [(parallel [(set (match_dup 0) (match_dup 1)) + (use (match_dup 3))])]) + +(define_split + [(set (match_operand:SCALAR_FLAT_MODE 0 "nonimmediate_operand") + (match_operand:SCALAR_FLAT_MODE 1 "general_operand")) + (use (match_operand:TI 2 "register_operand")) + (use (match_operand:DI 3 "gcn_exec_operand")) + (clobber (match_operand:DI 4 "register_operand"))] + "(memory_operand (operands[0], VOIDmode) || memory_operand (operands[1], VOIDmode))" + [(set (match_dup 4) (reg:DI EXEC_REG)) + (set (reg:DI EXEC_REG) (match_dup 3)) + (parallel [(set (match_dup 0) (match_dup 1)) + (use (match_dup 2)) + (use (reg:DI EXEC_REG))]) + (set (reg:DI EXEC_REG) (match_dup 4))]) + +(define_split + [(set (match_operand:SCALAR_FLAT_MODE 0 "nonimmediate_operand") + (match_operand:SCALAR_FLAT_MODE 1 "general_operand")) + (use (match_operand:TI 2 "register_operand")) + (use (match_operand:DI 3 "gcn_exec_operand")) + (clobber (match_operand:DI 4 "register_operand"))] + "" + [(set (match_dup 4) (reg:DI EXEC_REG)) + (set (reg:DI EXEC_REG) (match_dup 3)) + (parallel [(set (match_dup 0) (match_dup 1)) + (use (reg:DI EXEC_REG))]) + (set (reg:DI EXEC_REG) (match_dup 4))]) + +(define_insn "*mov<mode>" + [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" "=v,RD,RF,v ,v") + (vec_merge:VEC_1REG_MODE + (match_operand:VEC_1REG_MODE 1 "gcn_load_operand" "vB,v ,v,RD,RF") + (match_operand:VEC_1REG_MODE 3 "gcn_register_or_unspec_operand" "0U,0U,0U,0U,0U") + (match_operand:DI 2 "gcn_exec_reg_operand" "e,e,e,e,e")))] + "!memory_operand (operands[0], VOIDmode) || register_operand (operands[1], VOIDmode)" + "@ + v_mov_b32\t%0, %1 + ds_write_b32\t%A0, %1%O0 + flat_store%s1\t%A0, %1 + ds_read_b32\t%0, %A1%O1 + flat_load%s0\t%0, %A1\n\ts_waitcnt\tlgkmcnt(0),vmcnt(0)" + [(set_attr "type" "vop1,dsmem,flat,dsmem,flat") + (set_attr "mode" "<MODE>")]) + +(define_insn "*mov<mode>" + [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=&v") + (vec_merge:VEC_2REG_MODE + (match_operand:VEC_2REG_MODE 1 "gcn_alu_operand" "0vB") + (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand" "0U") + (match_operand:DI 2 "gcn_exec_reg_operand" "e")))] + "!memory_operand (operands[0], VOIDmode) || register_operand (operands[1], VOIDmode)" + "v_mov_b32\t%L0, %L1\n\tv_mov_b32\t%H0, %H1" + [(set_attr "type" "vop1") + (set_attr "mode" "<MODE>")]) + +; TODO: Add zero/sign extending variants. + +;; ------------------------------------------------------------------------- +;; Vector lane moves +;; ------------------------------------------------------------------------- + +; v_writelane/readlane works regardless of exec flags. +; We allow source to be scratch +(define_insn "*vec_set<mode>" + [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v") + (vec_merge:VEC_1REG_MODE + (vec_duplicate:VEC_1REG_MODE + (match_operand:<scalar_mode> 1 "register_operand" "SS")) + (match_operand:VEC_1REG_MODE 3 "gcn_register_or_unspec_operand" "0U") + (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand" "SSB"))))] + "" + "v_writelane_b32 %0, %1, %2" + [(set_attr "type" "vop3a") + (set_attr "mode" "<scalar_mode>") + (set_attr "exec" "any")]) + +; FIXME: 64bit operations really should be splitters, but I am not sure how +; to represent vertical subregs. +(define_insn "*vec_set<mode>" + [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=v") + (vec_merge:VEC_2REG_MODE + (vec_duplicate:VEC_2REG_MODE + (match_operand:<scalar_mode> 1 "register_operand" "SS")) + (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand" "0U") + (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand" "SSB"))))] + "" + "v_writelane_b32 %L0, %L1, %2\n\tv_writelane_b32 %H0, %H1, %2" + [(set_attr "type" "vop3a") + (set_attr "mode" "<scalar_mode>") + (set_attr "exec" "any")]) + +(define_expand "vec_set<mode>" + [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "") + (vec_merge:VEC_1REG_MODE + (vec_duplicate:VEC_1REG_MODE + (match_operand:<scalar_mode> 1 "register_operand" "")) + (match_dup 0) + (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand" ""))))] + "") + +(define_insn "*vec_set<mode>_1" + [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "") + (vec_merge:VEC_1REG_MODE + (vec_duplicate:VEC_1REG_MODE + (match_operand:<scalar_mode> 1 "register_operand" "")) + (match_operand:VEC_1REG_MODE 3 "gcn_register_or_unspec_operand" "") + (match_operand:SI 2 "const_int_operand")))] + "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)" +{ + operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2]))); + return "v_writelane_b32 %0, %1, %2"; +} + [(set_attr "type" "vop3a") + (set_attr "mode" "<scalar_mode>") + (set_attr "exec" "any")]) + +(define_insn "*vec_set<mode>_1" + [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "") + (vec_merge:VEC_2REG_MODE + (vec_duplicate:VEC_2REG_MODE + (match_operand:<scalar_mode> 1 "register_operand" "")) + (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand" "") + (match_operand:SI 2 "const_int_operand")))] + "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)" +{ + operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2]))); + return "v_writelane_b32 %L0, %L1, %2\nv_writelane_b32 %H0, %H1, %2"; +} + [(set_attr "type" "vop3a") + (set_attr "mode" "<scalar_mode>") + (set_attr "exec" "any")]) + +(define_insn "vec_duplicate<mode>" + [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v") + (vec_merge:VEC_1REG_MODE + (vec_duplicate:VEC_1REG_MODE + (match_operand:<scalar_mode> 1 "gcn_alu_operand" "SSB")) + (match_operand:VEC_1REG_MODE 3 "gcn_register_or_unspec_operand" "0U") + (match_operand:DI 2 "gcn_exec_reg_operand" "e")))] + "" + "v_mov_b32\t%0, %1" + [(set_attr "type" "vop3a") + (set_attr "mode" "<scalar_mode>") + (set_attr "exec" "any")]) + +(define_insn "vec_duplicate<mode>" + [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=v") + (vec_merge:VEC_2REG_MODE + (vec_duplicate:VEC_2REG_MODE + (match_operand:<scalar_mode> 1 "register_operand" "SS")) + (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand" "0U") + (match_operand:DI 2 "gcn_exec_reg_operand" "e")))] + "" + "v_mov_b32\t%L0, %L1\n\tv_mov_b32\t%H0, %H1" + [(set_attr "type" "vop3a") + (set_attr "mode" "<scalar_mode>") + (set_attr "exec" "any")]) + +(define_insn "vec_extract<mode>" + [(set (match_operand:<scalar_mode> 0 "register_operand" "=Sm") + (vec_select:<scalar_mode> + (match_operand:VEC_1REG_MODE 1 "register_operand" "v") + (parallel [(match_operand:SI 2 "gcn_alu_operand" "SSB")])))] + "" + "v_readlane_b32 %0, %1, %2" + [(set_attr "type" "vop3a") + (set_attr "mode" "<scalar_mode>") + (set_attr "exec" "any")]) + +(define_insn "vec_extract<mode>" + [(set (match_operand:<scalar_mode> 0 "register_operand" "=Sm") + (vec_select:<scalar_mode> + (match_operand:VEC_2REG_MODE 1 "register_operand" "v") + (parallel [(match_operand:SI 2 "gcn_alu_operand" "SSB")])))] + "" + "v_readlane_b32 %L0, %L1, %L2 + v_readlane_b32 %H0, %H1, %H2" + [(set_attr "type" "vop3a") + (set_attr "mode" "<scalar_mode>") + (set_attr "exec" "any")]) + +(define_expand "vec_init<mode>" + [(match_operand:VEC_REG_MODE 0 "register_operand") + (match_operand 1)] + "" +{ + gcn_expand_vector_init (operands[0], operands[1]); + DONE; +}) + +;; ------------------------------------------------------------------------- +;; Generic expanders for vector operations. +;; ------------------------------------------------------------------------- + +(define_mode_iterator V64SIDI [V64SI V64DI]) + +; Integer operations that produce condition code +(define_expand "<expander><mode>3" + [(parallel [ + (set (match_operand:V64SIDI 0 "register_operand" "") + (vec_merge:V64SIDI + (plus_minus:V64SIDI + (match_operand:V64SIDI 1 "register_operand" "") + (match_operand:V64SIDI 2 "gcn_alu_operand" "")) + (match_dup 4) + (match_dup 3))) + (clobber (reg:DI VCC_REG))])] + "" +{ + operands[3] = gcn_full_exec_reg (); + operands[4] = gcn_gen_undef (<MODE>mode); +}) + +(define_mode_iterator VEC_INT_MODE [V64QI V64HI V64SI V64DI]) +(define_code_iterator bitop [and ior xor]) + +(define_expand "<expander><mode>3" + [(set (match_operand:VEC_INT_MODE 0 "gcn_valu_dst_operand" "") + (vec_merge:VEC_INT_MODE + (bitop:VEC_INT_MODE + (match_operand:VEC_INT_MODE 1 "gcn_valu_src0_operand" "") + (match_operand:VEC_INT_MODE 2 "gcn_valu_src1com_operand" "")) + (match_dup 4) + (match_dup 3)))] + "" +{ + operands[3] = gcn_full_exec_reg (); + operands[4] = gcn_gen_undef (<MODE>mode); +}) + +(define_code_iterator shiftop [ashift lshiftrt ashiftrt]) +(define_expand "<expander><mode>3" + [(set (match_operand:VEC_INT_MODE 0 "register_operand" "") + (vec_merge:VEC_INT_MODE + (shiftop:VEC_INT_MODE + (match_operand:VEC_INT_MODE 1 "register_operand" "") + (match_operand:V64SI 2 "gcn_alu_operand" "")) + (match_dup 4) + (match_dup 3)))] + "" +{ + operands[3] = gcn_full_exec_reg (); + operands[4] = gcn_gen_undef (<MODE>mode); +}) + +;; ------------------------------------------------------------------------- +;; ALU special cases: Plus +;; ------------------------------------------------------------------------- + + +; Turn vector pattern into scalar variant. + +(define_subst "vec_to_scalar" + [(set (match_operand:VEC_REG_MODE 0) + (vec_merge:VEC_REG_MODE + (match_operator:VEC_REG_MODE 1 "" + [(match_operand:VEC_REG_MODE 2) + (match_operand:VEC_REG_MODE 3)]) + (match_operand:VEC_REG_MODE 4) + (match_operand:DI 5))) + (clobber (reg:DI VCC_REG))] + "" + [(set (match_dup:<scalar_mode> 0) + (match_op_dup:<scalar_mode> 1 + [(match_dup:<scalar_mode> 1) (match_dup:<scalar_mode> 2)])) + (use (match_dup:<scalar_mode> 5)) + (clobber (reg:DI VCC_REG))]) + +(define_subst "vec_to_scalar" + [(set (match_operand:VEC_REG_MODE 0) + (vec_merge:VEC_REG_MODE + (match_operator:VEC_REG_MODE 1 "" + [(match_operand:VEC_REG_MODE 2) + (match_operand:VEC_REG_MODE 3)]) + (match_operand:VEC_REG_MODE 4) + (match_operand:DI 5)))] + "" + [(set (match_dup:<scalar_mode> 0) + (match_op_dup:<scalar_mode> 1 + [(match_dup:<scalar_mode> 1) (match_dup:<scalar_mode> 2)])) + (use (match_dup:<scalar_mode> 5))]) + +(define_subst_attr "vec_suffix" + "vec_to_scalar" "vector" "scalar") + +(define_insn "addv64si3_<vec_suffix>" + [(set (match_operand:V64SI 0 "register_operand" "=v") + (vec_merge:V64SI + (plus:V64SI + (match_operand:V64SI 1 "register_operand" "%v") + (match_operand:V64SI 2 "gcn_alu_operand" " vSSB")) + (match_operand:V64SI 4 "gcn_register_or_unspec_operand" "0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e"))) + (clobber (reg:DI VCC_REG))] + "" + "v_add_u32\t%0, vcc, %2, %1" + [(set_attr "type" "vop2") + (set_attr "mode" "V64SI")]) + +(define_insn "addv64si3_vector_dup" + [(set (match_operand:V64SI 0 "register_operand" "=v") + (vec_merge:V64SI + (plus:V64SI + (vec_duplicate:V64SI + (match_operand:SI 2 "gcn_alu_operand" "SSB")) + (match_operand:V64SI 1 "register_operand" "v")) + (match_operand:V64SI 4 "gcn_register_or_unspec_operand" "0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e"))) + (clobber (reg:DI VCC_REG))] + "" + "v_add_u32\t%0, vcc, %1, %2" + [(set_attr "type" "vop2") + (set_attr "mode" "SI")]) + +(define_insn "addv64si3_vector_vcc" + [(set (match_operand:V64SI 0 "register_operand" "=v,v") + (vec_merge:V64SI + (plus:V64SI + (match_operand:V64SI 1 "register_operand" "%v,v") + (match_operand:V64SI 2 "gcn_alu_operand" " vSSB,vSSB")) + (match_operand:V64SI 4 "gcn_register_or_unspec_operand" "0U,0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e,e"))) + (set (match_operand:DI 5 "register_operand" "=cV,Sg") + (ior:DI (and:DI (ltu:DI (plus:V64SI (match_dup 1) + (match_dup 2)) + (match_dup 1)) + (match_dup 3)) + (and:DI (not:DI (match_dup 3)) + (match_operand:DI 6 "gcn_register_or_unspec_operand" "5U,5U"))))] + "" + "v_add_u32\t%0, %5, %1, %2" + [(set_attr "type" "vop2,vop3b") + (set_attr "mode" "SI")]) + +;; Tom says that he thinks the previous value of VCC is unchanged when +;; execution lane is masked out. + +(define_insn "addv64si3_vector_vcc_dup" + [(set (match_operand:V64SI 0 "register_operand" "=v,v") + (vec_merge:V64SI + (plus:V64SI + (vec_duplicate:V64SI (match_operand:SI 2 "gcn_alu_operand" "SSB,SSB")) + (match_operand:V64SI 1 "register_operand" "v,v")) + (match_operand:V64SI 4 "gcn_register_or_unspec_operand" "0U,0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e,e"))) + (set (match_operand:DI 5 "register_operand" "=cV,Sg") + (ior:DI (and:DI (ltu:DI (plus:V64SI (vec_duplicate:V64SI (match_dup 2)) + (match_dup 1)) + (vec_duplicate:V64SI (match_dup 2))) + (match_dup 3)) + (and:DI (not:DI (match_dup 3)) + (match_operand:DI 6 "gcn_register_or_unspec_operand" "5U,5U"))))] + "" + "v_add_u32\t%0, %5, %1, %2" + [(set_attr "type" "vop2,vop3b") + (set_attr "mode" "SI")]) + +;; This pattern does not accept SGPR because VCC read already counts as a SGPR use +;; and number of SGPR operands is limited to 1. +(define_insn "addcv64si3_vec" + [(set (match_operand:V64SI 0 "register_operand" "=v,v") + (vec_merge:V64SI (plus:V64SI (plus:V64SI + (vec_merge:V64SI + (match_operand:V64SI 7 "gcn_vec1_operand" "A,A") + (match_operand:V64SI 8 "gcn_vec0_operand" "A,A") + (match_operand:DI 5 "register_operand" "cV,cV")) + (match_operand:V64SI 1 "gcn_alu_operand" "%vA,vA")) + (match_operand:V64SI 2 "gcn_alu_operand" "vB,vB")) + (match_operand:V64SI 4 "gcn_register_or_unspec_operand" "0U,0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e,e"))) + (set (match_operand:DI 6 "register_operand" "=cV,Sg") + (ior:DI (and:DI (ior:DI (ltu:DI (plus:V64SI (plus:V64SI + (vec_merge:V64SI + (match_dup 7) + (match_dup 8) + (match_dup 5)) + (match_dup 1)) + (match_dup 2)) + (match_dup 2)) + (ltu:DI (plus:V64SI (vec_merge:V64SI + (match_dup 7) + (match_dup 8) + (match_dup 5)) + (match_dup 1)) + (match_dup 1))) + (match_dup 3)) + (and:DI (not:DI (match_dup 3)) + (match_operand:DI 9 "gcn_register_or_unspec_operand" "6U,6U"))))] + "" + "v_addc_u32\t%0, %6, %1, %2, vcc" + [(set_attr "type" "vop2,vop3b") + (set_attr "mode" "SI")]) + + +(define_insn "subv64si3_<vec_suffix>" + [(set (match_operand:V64SI 0 "register_operand" "=v,v") + (vec_merge:V64SI + (minus:V64SI + (match_operand:V64SI 1 "gcn_alu_operand" "vSSB,v") + (match_operand:V64SI 2 "gcn_alu_operand" "v,vSSB")) + (match_operand:V64SI 4 "gcn_register_or_unspec_operand" "0U,0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e,e"))) + (clobber (reg:DI VCC_REG))] + "register_operand (operands[1], VOIDmode) || register_operand (operands[2], VOIDmode)" + "@ + v_sub_u32\t%0, vcc, %2, %1 + v_subrev_u32\t%0, vcc, %2, %1" + [(set_attr "type" "vop2") + (set_attr "mode" "SI")]) + +(define_insn "subv64si3_vector_vcc" + [(set (match_operand:V64SI 0 "register_operand" "=v,v,v,v") + (vec_merge:V64SI + (minus:V64SI + (match_operand:V64SI 1 "gcn_alu_operand" "vSSB,vSSB,v,v") + (match_operand:V64SI 2 "gcn_alu_operand" "v,v,vSSB,vSSB")) + (match_operand:V64SI 4 "gcn_register_or_unspec_operand" "0U,0U,0U,0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e,e,e,e"))) + (set (match_operand:DI 5 "register_operand" "=cV,Sg,cV,Sg") + (ior:DI (and:DI (gtu:DI (minus:V64SI (match_dup 1) + (match_dup 2)) + (match_dup 1)) + (match_dup 3)) + (and:DI (not:DI (match_dup 3)) + (match_operand:DI 6 "gcn_register_or_unspec_operand" "5U,5U,5U,5U"))))] + "register_operand (operands[1], VOIDmode) || register_operand (operands[2], VOIDmode)" + "@ + v_sub_u32\t%0, %5, %2, %1 + v_sub_u32\t%0, %5, %2, %1 + v_subrev_u32\t%0, %5, %2, %1 + v_subrev_u32\t%0, %5, %2, %1" + [(set_attr "type" "vop2,vop3b,vop2,vop3b") + (set_attr "mode" "SI")]) + +;; This pattern does not accept SGPR because VCC read already counts +;; as a SGPR use and number of SGPR operands is limited to 1. +(define_insn "subcv64si3_vec" + [(set (match_operand:V64SI 0 "register_operand" "=v,v,v,v") + (vec_merge:V64SI (minus:V64SI (minus:V64SI + (vec_merge:V64SI + (match_operand:V64SI 7 "gcn_vec1_operand" "A,A,A,A") + (match_operand:V64SI 8 "gcn_vec0_operand" "A,A,A,A") + (match_operand:DI 5 "gcn_alu_operand" "cV,cV,cV,cV")) + (match_operand:V64SI 1 "gcn_alu_operand" "vA,vA,vB,vB")) + (match_operand:V64SI 2 "gcn_alu_operand" "vB,vB,vA,vA")) + (match_operand:V64SI 4 "gcn_register_or_unspec_operand" "0U,0U,0U,0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e,e,e,e"))) + (set (match_operand:DI 6 "register_operand" "=cV,Sg,cV,Sg") + (ior:DI (and:DI (ior:DI (gtu:DI (minus:V64SI (minus:V64SI + (vec_merge:V64SI + (match_dup 7) + (match_dup 8) + (match_dup 5)) + (match_dup 1)) + (match_dup 2)) + (match_dup 2)) + (ltu:DI (minus:V64SI (vec_merge:V64SI + (match_dup 7) + (match_dup 8) + (match_dup 5)) + (match_dup 1)) + (match_dup 1))) + (match_dup 3)) + (and:DI (not:DI (match_dup 3)) + (match_operand:DI 9 "gcn_register_or_unspec_operand" "6U,6U,6U,6U"))))] + "register_operand (operands[1], VOIDmode) || register_operand (operands[2], VOIDmode)" + "@ + v_sub_u32\t%0, %5, %2, %1 + v_sub_u32\t%0, %5, %2, %1 + v_subrev_u32\t%0, %5, %2, %1 + v_subrev_u32\t%0, %5, %2, %1" + [(set_attr "type" "vop2,vop3b,vop2,vop3b") + (set_attr "mode" "SI")]) + +(define_insn_and_split "*addv64di3_vector" + [(set (match_operand:V64DI 0 "register_operand" "=v") + (vec_merge:V64DI + (plus:V64DI + (match_operand:V64DI 1 "register_operand" "%v") + (match_operand:V64DI 2 "gcn_alu_operand" "vSSB")) + (match_operand:V64DI 4 "gcn_register_or_unspec_operand" "0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e"))) + (clobber (reg:DI VCC_REG))] + "" + "#" + "gcn_can_split_p (V64DImode, operands[0]) + && gcn_can_split_p (V64DImode, operands[1]) + && gcn_can_split_p (V64DImode, operands[2]) + && gcn_can_split_p (V64DImode, operands[4])" + [(const_int 0)] +{ + rtx vcc = gen_rtx_REG (DImode, VCC_REG); + emit_insn (gen_addv64si3_vector_vcc (gcn_operand_part (V64DImode, operands[0], 0), + gcn_operand_part (V64DImode, operands[1], 0), + gcn_operand_part (V64DImode, operands[2], 0), + operands[3], + gcn_operand_part (V64DImode, operands[4], 0), + vcc, + gcn_gen_undef (DImode))); + emit_insn (gen_addcv64si3_vec (gcn_operand_part (V64DImode, operands[0], 1), + gcn_operand_part (V64DImode, operands[1], 1), + gcn_operand_part (V64DImode, operands[2], 1), + operands[3], + gcn_operand_part (V64DImode, operands[4], 1), + vcc, vcc, gcn_vec_constant (V64SImode, 1), + gcn_vec_constant (V64SImode, 0), + gcn_gen_undef (DImode))); + DONE; +}) + +(define_insn_and_split "subv64di3_vec" + [(set (match_operand:V64DI 0 "register_operand" "=v,v") + (vec_merge:V64DI + (minus:V64DI + (match_operand:V64DI 1 "gcn_alu_operand" "vSSB,v") + (match_operand:V64DI 2 "gcn_alu_operand" "v,vSSB")) + (match_operand:V64DI 4 "gcn_register_or_unspec_operand" "0U,0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e,e"))) + (clobber (reg:DI VCC_REG))] + "register_operand (operands[1], VOIDmode) || register_operand (operands[2], VOIDmode)" + "#" + "gcn_can_split_p (V64DImode, operands[0]) + && gcn_can_split_p (V64DImode, operands[1]) + && gcn_can_split_p (V64DImode, operands[2]) + && gcn_can_split_p (V64DImode, operands[4])" + [(const_int 0)] +{ + rtx vcc = gen_rtx_REG (DImode, VCC_REG); + emit_insn (gen_subv64si3_vector_vcc (gcn_operand_part (V64DImode, operands[0], 0), + gcn_operand_part (V64DImode, operands[1], 0), + gcn_operand_part (V64DImode, operands[2], 0), + operands[3], + gcn_operand_part (V64DImode, operands[4], 0), + vcc, + gcn_gen_undef (DImode))); + emit_insn (gen_subcv64si3_vec (gcn_operand_part (V64DImode, operands[0], 1), + gcn_operand_part (V64DImode, operands[1], 1), + gcn_operand_part (V64DImode, operands[2], 1), + operands[3], + gcn_operand_part (V64DImode, operands[4], 1), + vcc, vcc, gcn_vec_constant (V64SImode, 1), + gcn_vec_constant (V64SImode, 0), + gcn_gen_undef (DImode))); + DONE; +}) + +(define_insn_and_split "addv64di3_zext" + [(set (match_operand:V64DI 0 "register_operand" "=&v,v") + (vec_merge:V64DI + (plus:V64DI + (zero_extend:V64DI + (match_operand:V64SI 1 "gcn_alu_operand" "0vA,0vB")) + (match_operand:V64DI 2 "gcn_alu_operand" "0vB,0vA")) + (match_operand:V64DI 4 "gcn_register_or_unspec_operand" "0U,0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e,e"))) + (clobber (reg:DI VCC_REG))] + "" + "#" + "gcn_can_split_p (V64DImode, operands[0]) + && gcn_can_split_p (V64DImode, operands[2]) + && gcn_can_split_p (V64DImode, operands[4])" + [(const_int 0)] +{ + rtx vcc = gen_rtx_REG (DImode, VCC_REG); + emit_insn (gen_addv64si3_vector_vcc (gcn_operand_part (V64DImode, operands[0], 0), + operands[1], + gcn_operand_part (V64DImode, operands[2], 0), + operands[3], + gcn_operand_part (V64DImode, operands[4], 0), + vcc, + gcn_gen_undef (DImode))); + emit_insn (gen_addcv64si3_vec (gcn_operand_part (V64DImode, operands[0], 1), + gcn_operand_part (V64DImode, operands[2], 1), + const0_rtx, + operands[3], + gcn_operand_part (V64DImode, operands[4], 1), + vcc, vcc, gcn_vec_constant (V64SImode, 1), + gcn_vec_constant (V64SImode, 0), + gcn_gen_undef (DImode))); + DONE; +}) + +(define_insn_and_split "addv64di3_zext_dup" + [(set (match_operand:V64DI 0 "register_operand" "=&v") + (vec_merge:V64DI + (plus:V64DI + (zero_extend:V64DI + (vec_duplicate:V64SI + (match_operand:SI 1 "gcn_alu_operand" "BSS"))) + (match_operand:V64DI 2 "gcn_alu_operand" "0vA")) + (match_operand:V64DI 4 "gcn_register_or_unspec_operand" "0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e"))) + (clobber (reg:DI VCC_REG))] + "" + "#" + "gcn_can_split_p (V64DImode, operands[0]) + && gcn_can_split_p (V64DImode, operands[2]) + && gcn_can_split_p (V64DImode, operands[4])" + [(const_int 0)] +{ + rtx vcc = gen_rtx_REG (DImode, VCC_REG); + emit_insn (gen_addv64si3_vector_vcc_dup (gcn_operand_part (V64DImode, operands[0], 0), + gcn_operand_part (DImode, operands[1], 0), + gcn_operand_part (V64DImode, operands[2], 0), + operands[3], + gcn_operand_part (V64DImode, operands[4], 0), + vcc, + gcn_gen_undef (DImode))); + emit_insn (gen_addcv64si3_vec (gcn_operand_part (V64DImode, operands[0], 1), + gcn_operand_part (V64DImode, operands[2], 1), + const0_rtx, operands[3], + gcn_operand_part (V64DImode, operands[4], 1), + vcc, vcc, gcn_vec_constant (V64SImode, 1), + gcn_vec_constant (V64SImode, 0), + gcn_gen_undef (DImode))); + DONE; +}) + +(define_insn_and_split "addv64di3_zext_dup2" + [(set (match_operand:V64DI 0 "register_operand" "=v") + (vec_merge:V64DI + (plus:V64DI + (zero_extend:V64DI + (match_operand:V64SI 1 "gcn_alu_operand" "vA")) + (vec_duplicate:V64DI + (match_operand:DI 2 "gcn_alu_operand" "BSS"))) + (match_operand:V64DI 4 "gcn_register_or_unspec_operand" "0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e"))) + (clobber (reg:DI VCC_REG))] + "" + "#" + "gcn_can_split_p (V64DImode, operands[0]) + && gcn_can_split_p (V64DImode, operands[4])" + [(const_int 0)] +{ + rtx vcc = gen_rtx_REG (DImode, VCC_REG); + emit_insn (gen_addv64si3_vector_vcc_dup (gcn_operand_part (V64DImode, operands[0], 0), + operands[1], + gcn_operand_part (DImode, operands[2], 0), + operands[3], + gcn_operand_part (V64DImode, operands[4], 0), + vcc, + gcn_gen_undef (DImode))); + rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1); + emit_insn (gen_vec_duplicatev64si (dsthi, gcn_operand_part (DImode, operands[2], 1), + operands[3], + gcn_gen_undef (V64SImode))); + emit_insn (gen_addcv64si3_vec (dsthi, dsthi, const0_rtx, operands[3], + gcn_operand_part (V64DImode, operands[4], 1), + vcc, vcc, gcn_vec_constant (V64SImode, 1), + gcn_vec_constant (V64SImode, 0), + gcn_gen_undef (DImode))); + DONE; +}) + +(define_mode_iterator DS_ARITH_MODE [V64SI V64SF V64DI]) + +(define_insn "add<mode>3_ds_<vec_suffix>" + [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD") + (vec_merge:DS_ARITH_MODE + (plus:DS_ARITH_MODE + (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "%RD") + (match_operand:DS_ARITH_MODE 2 "register_operand" "v")) + (match_operand:DS_ARITH_MODE 4 "gcn_register_ds_or_unspec_operand" "0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e")))] + "rtx_equal_p (operands[0], operands[1])" + "ds_add%u0\t%A0, %2%O0" + [(set_attr "type" "vop2") + (set_attr "mode" "SI")]) + +(define_insn "sub<mode>3_ds_<vec_suffix>" + [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD") + (vec_merge:DS_ARITH_MODE + (minus:DS_ARITH_MODE + (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "RD") + (match_operand:DS_ARITH_MODE 2 "register_operand" "v")) + (match_operand:DS_ARITH_MODE 4 "gcn_register_ds_or_unspec_operand" "0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e")))] + "rtx_equal_p (operands[0], operands[1])" + "ds_sub%u0\t%A0, %2%O0" + [(set_attr "type" "vop2") + (set_attr "mode" "SI")]) + +(define_insn "subr<mode>3_ds_<vec_suffix>" + [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD") + (vec_merge:DS_ARITH_MODE + (minus:DS_ARITH_MODE + (match_operand:DS_ARITH_MODE 2 "register_operand" "v") + (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "RD")) + (match_operand:DS_ARITH_MODE 4 "gcn_register_ds_or_unspec_operand" "0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e")))] + "rtx_equal_p (operands[0], operands[1])" + "ds_rsub%u0\t%A0, %2%O0" + [(set_attr "type" "vop2") + (set_attr "mode" "SI")]) + +;; ------------------------------------------------------------------------- +;; ALU: mult +;; ------------------------------------------------------------------------- + +(define_code_iterator any_extend [sign_extend zero_extend]) +(define_code_attr sgnsuffix [(sign_extend "%i") (zero_extend "%u")]) +(define_code_attr s [(sign_extend "s") (zero_extend "u")]) + +(define_insn "<s>mulsi3_highpart_vector" + [(set (match_operand:V64SI 0 "register_operand" "=v") + (vec_merge:V64SI + (truncate:V64SI + (lshiftrt:V64DI + (mult:V64DI + (any_extend:V64DI + (match_operand:V64SI 1 "gcn_alu_operand" "%v")) + (any_extend:V64DI + (match_operand:V64SI 2 "gcn_alu_operand" "vSSB"))) + (const_int 32))) + (match_operand:V64SI 4 "gcn_register_ds_or_unspec_operand" "0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e")))] + "" + "v_mul_hi<sgnsuffix>0\t%0, %1, %2" + [(set_attr "type" "vop3a") + (set_attr "mode" "SI")]) + +(define_insn "<s>mulsi3_highpart_scalar" + [(set (match_operand:SI 0 "register_operand" "=v") + (truncate:SI + (lshiftrt:DI + (mult:DI + (any_extend:DI + (match_operand:SI 1 "register_operand" "%v")) + (any_extend:DI + (match_operand:SI 2 "gcn_alu_operand" "vSSB"))) + (const_int 32)))) + (use (match_operand:DI 3 "gcn_exec_reg_operand" "e"))] + "" + "v_mul_hi<sgnsuffix>0\t%0, %1, %2" + [(set_attr "type" "vop3a") + (set_attr "mode" "SI")]) + +;; ------------------------------------------------------------------------- +;; ALU the generic 32bit case +;; ------------------------------------------------------------------------- + +(define_insn "<expander><mode>3_<vec_suffix>" + [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "=v,RD") + (vec_merge:VEC_1REG_INT_MODE + (bitop:VEC_1REG_INT_MODE + (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand" "%v,0") + (match_operand:VEC_1REG_INT_MODE 2 "gcn_valu_src1com_operand" "vSSB,v")) + (match_operand:VEC_1REG_INT_MODE 4 "gcn_register_ds_or_unspec_operand" "0U,0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e,e")))] + "!memory_operand (operands[0], VOIDmode) + || (rtx_equal_p (operands[0], operands[1]) && register_operand (operands[2], VOIDmode))" + "@ + v_<mnemonic>0\t%0, %2, %1 + ds_<mnemonic>0\t%A0, %2%O0" + [(set_attr "type" "vop2,dsmem") + (set_attr "mode" "V64SI")]) + +; We add earlyclobber just because I am lazy to determine order of oeprations +; in output template. VGPR register pairs are not aligned. +(define_insn "<expander>v64di3_<vec_suffix>" + [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD") + (vec_merge:V64DI + (bitop:V64DI + (match_operand:V64DI 1 "gcn_valu_src0_operand" "%0v,RD") + (match_operand:V64DI 2 "gcn_valu_src1com_operand" "v0SSB,v")) + (match_operand:V64DI 4 "gcn_register_ds_or_unspec_operand" "0U,0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e,e")))] + "!memory_operand (operands[0], VOIDmode) + || (rtx_equal_p (operands[0], operands[1]) && register_operand (operands[2], VOIDmode))" + "@ + v_<mnemonic>0\t%L0, %L2, %L1\n\tv_<mnemonic>0\t%H0, %H2, %H1 + ds_<mnemonic>0\t%A0, %2%O0" + [(set_attr "type" "vop2,dsmem") + (set_attr "mode" "V64SI,V64DI")]) + +(define_insn "<expander><mode>3_<vec_suffix>" + [(set (match_operand:V_INT_MODE 0 "register_operand" "=v") + (vec_merge:V_INT_MODE + (shiftop:V_INT_MODE + (match_operand:V_INT_MODE 1 "gcn_alu_operand" "v") + (match_operand:V64SI 2 "gcn_alu_operand" "vSSB")) + (match_operand:V_INT_MODE 4 "gcn_register_or_unspec_operand" "0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e")))] + "" + "v_<revmnemonic>0\t%0, %2, %1" + [(set_attr "type" "vop2") + (set_attr "mode" "<MODE>")]) + +;; ------------------------------------------------------------------------- +;; Generic FP binary operations +;; ------------------------------------------------------------------------- + +(define_mode_iterator VEC_FP_MODE + [V64HF V64SF V64DF]) +(define_mode_iterator FP_MODE + [HF SF DF]) + +(define_code_iterator comm_fp [plus mult smin smax]) +(define_code_iterator nocomm_fp [minus]) +(define_code_iterator all_fp [plus mult minus smin smax]) + +(define_insn "<expander><mode>_<vec_suffix>" + [(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v") + (vec_merge:VEC_FP_MODE + (comm_fp:VEC_FP_MODE + (match_operand:VEC_FP_MODE 1 "register_operand" "%v") + (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" " vSSB")) + (match_operand:VEC_FP_MODE 4 "gcn_register_or_unspec_operand" "0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e")))] + "" + "v_<mnemonic>0\t%0, %2, %1" + [(set_attr "type" "vop2") + (set_attr "mode" "<MODE>")]) + + +(define_insn "<expander><mode>_<vec_suffix>" + [(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v,v") + (vec_merge:VEC_FP_MODE + (nocomm_fp:VEC_FP_MODE + (match_operand:VEC_FP_MODE 1 "register_operand" "vSSB,v") + (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" "v,vSSB")) + (match_operand:VEC_FP_MODE 4 "gcn_register_or_unspec_operand" "0U,0U") + (match_operand:DI 3 "gcn_exec_reg_operand" "e,e")))] + "" + "@ + v_<mnemonic>0\t%0, %2, %1 + v_<revmnemonic>0\t%0, %2, %1" + [(set_attr "type" "vop2") + (set_attr "mode" "<MODE>")]) + +(define_expand "<expander><mode>3" + [(set (match_operand:VEC_FP_MODE 0 "gcn_valu_dst_operand" "") + (vec_merge:VEC_FP_MODE + (all_fp:VEC_FP_MODE + (match_operand:VEC_FP_MODE 1 "gcn_valu_src0_operand" "") + (match_operand:VEC_FP_MODE 2 "gcn_valu_src1_operand" "")) + (match_dup 4) + (match_dup 3)))] + "" +{ + operands[3] = gcn_full_exec_reg (); + operands[4] = gcn_gen_undef (<MODE>mode); +}) + +(define_expand "<expander><mode>3" + [(parallel [(set (match_operand:FP_MODE 0 "gcn_valu_dst_operand" "") + (all_fp:FP_MODE + (match_operand:FP_MODE 1 "gcn_valu_src0_operand" "") + (match_operand:FP_MODE 2 "gcn_valu_src1_operand" ""))) + (use (match_dup 3))])] + "" +{ + operands[3] = gcn_scalar_exec (); +}) |