diff options
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/i386/i386-protos.h | 2 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 19 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 2 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 1632 |
4 files changed, 1642 insertions, 13 deletions
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index fb86b1b6f20..7ffb299ee88 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -51,11 +51,13 @@ extern int const1_operand PARAMS ((rtx, enum machine_mode)); extern int const248_operand PARAMS ((rtx, enum machine_mode)); extern int incdec_operand PARAMS ((rtx, enum machine_mode)); extern int reg_no_sp_operand PARAMS ((rtx, enum machine_mode)); +extern int mmx_reg_operand PARAMS ((rtx, enum machine_mode)); extern int general_no_elim_operand PARAMS ((rtx, enum machine_mode)); extern int nonmemory_no_elim_operand PARAMS ((rtx, enum machine_mode)); extern int q_regs_operand PARAMS ((rtx, enum machine_mode)); extern int non_q_regs_operand PARAMS ((rtx, enum machine_mode)); extern int no_comparison_operator PARAMS ((rtx, enum machine_mode)); +extern int sse_comparison_operator PARAMS ((rtx, enum machine_mode)); extern int fcmov_comparison_operator PARAMS ((rtx, enum machine_mode)); extern int uno_comparison_operator PARAMS ((rtx, enum machine_mode)); extern int cmp_fp_expander_operand PARAMS ((rtx, enum machine_mode)); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index a1746a15695..7485d1904bd 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1174,6 +1174,14 @@ reg_no_sp_operand (op, mode) return register_operand (op, mode); } +int +mmx_reg_operand (op, mode) + register rtx op; + enum machine_mode mode; +{ + return MMX_REG_P (op); +} + /* Return false if this is any eliminable register. Otherwise general_operand. */ @@ -1264,6 +1272,17 @@ no_comparison_operator (op, mode) } } +/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS + insns. */ +int +sse_comparison_operator (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + enum rtx_code code = GET_CODE (op); + return code == EQ || code == LT || code == LE || code == UNORDERED; +} + /* Return 1 if OP is a comparison operator that can be issued by fcmov. */ int diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index ff396df357f..ab0b5369012 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2584,6 +2584,7 @@ do { long l; \ {"const1_operand", {CONST_INT}}, \ {"const248_operand", {CONST_INT}}, \ {"incdec_operand", {CONST_INT}}, \ + {"mmx_reg_operand", {REG}}, \ {"reg_no_sp_operand", {SUBREG, REG}}, \ {"general_no_elim_operand", {CONST_INT, CONST_DOUBLE, CONST, \ SYMBOL_REF, LABEL_REF, SUBREG, REG, MEM}}, \ @@ -2592,6 +2593,7 @@ do { long l; \ {"non_q_regs_operand", {SUBREG, REG}}, \ {"no_comparison_operator", {EQ, NE, LT, GE, LTU, GTU, LEU, GEU}}, \ {"fcmov_comparison_operator", {EQ, NE, LTU, GTU, LEU, GEU}}, \ + {"sse_comparison_operator", {EQ, LT, LE, UNORDERED }}, \ {"uno_comparison_operator", {EQ, NE, LE, LT, GE, GT, LEU, LTU, GEU, \ GTU, UNORDERED, ORDERED}}, \ {"cmp_fp_expander_operand", {CONST_DOUBLE, SUBREG, REG, MEM}}, \ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 72fd72088c0..51386bc0114 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -71,7 +71,25 @@ ;; 9 This is an `fnstsw' operation. ;; 10 This is a `sahf' operation. ;; 11 This is a `fstcw' operation -;; + +;; For SSE/MMX support: +;; 30 This is `fix', guaranteed to be truncating. +;; 31 This is a `emms' operation. +;; 32 This is a `maskmov' operation. +;; 33 This is a `movmsk' operation. +;; 34 This is a `non-temporal' move. +;; 35 This is a `prefetch' operation. +;; 36 This is used to distinguish COMISS from UCOMISS. +;; 37 This is a `ldmxcsr' operation. +;; 38 This is a forced `movaps' instruction (rather than whatever movti does) +;; 39 This is a forced `movups' instruction (rather than whatever movti does) +;; 40 This is a `stmxcsr' operation. +;; 41 This is a `shuffle' operation. +;; 42 This is a `rcp' operation. +;; 43 This is a `rsqsrt' operation. +;; 44 This is a `sfence' operation. +;; 45 This is a noop to prevent excessive combiner cleverness. + ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls ;; from i386.c. @@ -84,7 +102,7 @@ ;; A basic instruction type. Refinements due to arguments to be ;; provided in other attributes. (define_attr "type" - "other,multi,alu1,negnot,alu,icmp,test,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov,fmov,fop,fop1,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,str,cld" + "other,multi,alu1,negnot,alu,icmp,test,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov,fmov,fop,fop1,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,str,cld,sse,mmx" (const_string "other")) ;; Main data type used by the insn @@ -234,7 +252,7 @@ (const_string "store") (match_operand 1 "memory_operand" "") (const_string "load") - (and (eq_attr "type" "!icmp,test,alu1,negnot,fop1,fsgn,imov,imovx,fmov,fcmp") + (and (eq_attr "type" "!icmp,test,alu1,negnot,fop1,fsgn,imov,imovx,fmov,fcmp,sse,mmx") (match_operand 2 "memory_operand" "")) (const_string "load") (and (eq_attr "type" "icmov") @@ -1530,15 +1548,19 @@ (set_attr "length_immediate" "1")]) (define_insn "*movsi_1" - [(set (match_operand:SI 0 "nonimmediate_operand" "=*a,r,*a,m") - (match_operand:SI 1 "general_operand" "im,rinm,rinm,rin"))] + [(set (match_operand:SI 0 "nonimmediate_operand" "=*a,r,*a,m,!*y,!r") + (match_operand:SI 1 "general_operand" "im,rinm,rinm,rin,r,*y"))] "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM" "* { switch (get_attr_type (insn)) { + case TYPE_MMX: + return \"movd\\t{%1, %0|%0, %1}\"; + case TYPE_LEA: return \"lea{l}\\t{%1, %0|%0, %1}\"; + default: if (flag_pic && SYMBOLIC_CONST (operands[1])) abort(); @@ -1546,12 +1568,15 @@ } }" [(set (attr "type") - (cond [(and (ne (symbol_ref "flag_pic") (const_int 0)) + (cond [(ior (match_operand:SI 0 "mmx_reg_operand" "") + (match_operand:SI 1 "mmx_reg_operand" "")) + (const_string "mmx") + (and (ne (symbol_ref "flag_pic") (const_int 0)) (match_operand:SI 1 "symbolic_operand" "")) (const_string "lea") ] (const_string "imov"))) - (set_attr "modrm" "0,*,0,*") + (set_attr "modrm" "0,*,0,*,*,*") (set_attr "mode" "SI")]) (define_insn "*swapsi" @@ -1983,15 +2008,20 @@ "#") (define_insn "*movdi_2" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o") - (match_operand:DI 1 "general_operand" "riFo,riF"))] + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,!m*y,!*y") + (match_operand:DI 1 "general_operand" "riFo,riF,*y,m"))] "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM" - "#") + "@ + # + # + movq\\t{%1, %0|%0, %1} + movq\\t{%1, %0|%0, %1}" + [(set_attr "type" "*,*,mmx,mmx")]) (define_split [(set (match_operand:DI 0 "push_operand" "") (match_operand:DI 1 "general_operand" ""))] - "reload_completed" + "reload_completed && ! MMX_REG_P (operands[1])" [(const_int 0)] "if (!ix86_split_long_move (operands)) abort (); DONE;") @@ -1999,7 +2029,7 @@ (define_split [(set (match_operand:DI 0 "nonimmediate_operand" "") (match_operand:DI 1 "general_operand" ""))] - "reload_completed" + "reload_completed && ! MMX_REG_P (operands[0]) && ! MMX_REG_P (operands[1])" [(set (match_dup 2) (match_dup 5)) (set (match_dup 3) (match_dup 6))] "if (ix86_split_long_move (operands)) DONE;") @@ -7864,7 +7894,7 @@ [(set_attr "type" "setcc") (set_attr "mode" "QI")]) -(define_insn "*setcc_4" +(define_insn "setcc_4" [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm")) (match_operator:QI 1 "uno_comparison_operator" [(reg:CC 17) (const_int 0)]))] @@ -11170,3 +11200,1579 @@ CODE_LABEL_NUMBER (operands[2])); RET; }") + + ;; Pentium III SIMD instructions. + +;; Moves for SSE/MMX regs. + +(define_insn "movv4sf_internal" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") + (match_operand:V4SF 1 "general_operand" "xm,x"))] + "TARGET_SSE" + ;; @@@ let's try to use movaps here. + "movaps\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "movv4si_internal" + [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m") + (match_operand:V4SI 1 "general_operand" "xm,x"))] + "TARGET_SSE" + ;; @@@ let's try to use movaps here. + "movaps\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "movv8qi_internal" + [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m") + (match_operand:V8QI 1 "general_operand" "ym,y"))] + "TARGET_MMX" + "movq\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx")]) + +(define_insn "movv4hi_internal" + [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,m") + (match_operand:V4HI 1 "general_operand" "ym,y"))] + "TARGET_MMX" + "movq\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx")]) + +(define_insn "movv2si_internal" + [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,m") + (match_operand:V2SI 1 "general_operand" "ym,y"))] + "TARGET_MMX" + "movq\\t{%1, %0|%0, %1}" + [(set_attr "type" "mmx")]) + +(define_expand "movti" + [(set (match_operand:TI 0 "general_operand" "") + (match_operand:TI 1 "general_operand" ""))] + "TARGET_SSE" + " +{ + /* For constants other than zero into memory. We do not know how the + instructions used to build constants modify the upper 64 bits + of the register, once we have that information we may be able + to handle some of them more efficiently. */ + if ((reload_in_progress | reload_completed) == 0 + && register_operand (operands[0], TImode) + && CONSTANT_P (operands[1])) + { + rtx addr = gen_reg_rtx (Pmode); + + emit_move_insn (addr, XEXP (force_const_mem (TImode, operands[1]), 0)); + operands[1] = gen_rtx_MEM (TImode, addr); + } + + /* Make operand1 a register if it isn't already. */ + if ((reload_in_progress | reload_completed) == 0 + && !register_operand (operands[0], TImode) + && !register_operand (operands[1], TImode) + && operands[1] != CONST0_RTX (TImode)) + { + rtx temp = force_reg (TImode, operands[1]); + emit_move_insn (operands[0], temp); + DONE; + } +}") + +(define_expand "movv4sf" + [(set (match_operand:V4SF 0 "general_operand" "") + (match_operand:V4SF 1 "general_operand" ""))] + "TARGET_SSE" + " +{ + /* For constants other than zero into memory. We do not know how the + instructions used to build constants modify the upper 64 bits + of the register, once we have that information we may be able + to handle some of them more efficiently. */ + if ((reload_in_progress | reload_completed) == 0 + && register_operand (operands[0], V4SFmode) + && CONSTANT_P (operands[1])) + { + rtx addr = gen_reg_rtx (Pmode); + + emit_move_insn (addr, XEXP (force_const_mem (V4SFmode, operands[1]), 0)); + operands[1] = gen_rtx_MEM (V4SFmode, addr); + } + + /* Make operand1 a register if it isn't already. */ + if ((reload_in_progress | reload_completed) == 0 + && !register_operand (operands[0], V4SFmode) + && !register_operand (operands[1], V4SFmode) + && operands[1] != CONST0_RTX (V4SFmode)) + { + rtx temp = force_reg (V4SFmode, operands[1]); + emit_move_insn (operands[0], temp); + DONE; + } +}") + +(define_expand "movv4si" + [(set (match_operand:V4SI 0 "general_operand" "") + (match_operand:V4SI 1 "general_operand" ""))] + "TARGET_MMX" + " +{ + /* For constants other than zero into memory. We do not know how the + instructions used to build constants modify the upper 64 bits + of the register, once we have that information we may be able + to handle some of them more efficiently. */ + if ((reload_in_progress | reload_completed) == 0 + && register_operand (operands[0], V4SImode) + && CONSTANT_P (operands[1])) + { + rtx addr = gen_reg_rtx (Pmode); + + emit_move_insn (addr, XEXP (force_const_mem (V4SImode, operands[1]), 0)); + operands[1] = gen_rtx_MEM (V4SImode, addr); + } + + /* Make operand1 a register if it isn't already. */ + if ((reload_in_progress | reload_completed) == 0 + && !register_operand (operands[0], V4SImode) + && !register_operand (operands[1], V4SImode) + && operands[1] != CONST0_RTX (V4SImode)) + { + rtx temp = force_reg (V4SImode, operands[1]); + emit_move_insn (operands[0], temp); + DONE; + } +}") + +(define_expand "movv2si" + [(set (match_operand:V2SI 0 "general_operand" "") + (match_operand:V2SI 1 "general_operand" ""))] + "TARGET_MMX" + " +{ + /* For constants other than zero into memory. We do not know how the + instructions used to build constants modify the upper 64 bits + of the register, once we have that information we may be able + to handle some of them more efficiently. */ + if ((reload_in_progress | reload_completed) == 0 + && register_operand (operands[0], V2SImode) + && CONSTANT_P (operands[1])) + { + rtx addr = gen_reg_rtx (Pmode); + + emit_move_insn (addr, XEXP (force_const_mem (V2SImode, operands[1]), 0)); + operands[1] = gen_rtx_MEM (V2SImode, addr); + } + + /* Make operand1 a register if it isn't already. */ + if ((reload_in_progress | reload_completed) == 0 + && !register_operand (operands[0], V2SImode) + && !register_operand (operands[1], V2SImode) + && operands[1] != CONST0_RTX (V2SImode)) + { + rtx temp = force_reg (V2SImode, operands[1]); + emit_move_insn (operands[0], temp); + DONE; + } +}") + +(define_expand "movv4hi" + [(set (match_operand:V4HI 0 "general_operand" "") + (match_operand:V4HI 1 "general_operand" ""))] + "TARGET_MMX" + " +{ + /* For constants other than zero into memory. We do not know how the + instructions used to build constants modify the upper 64 bits + of the register, once we have that information we may be able + to handle some of them more efficiently. */ + if ((reload_in_progress | reload_completed) == 0 + && register_operand (operands[0], V4HImode) + && CONSTANT_P (operands[1])) + { + rtx addr = gen_reg_rtx (Pmode); + + emit_move_insn (addr, XEXP (force_const_mem (V4HImode, operands[1]), 0)); + operands[1] = gen_rtx_MEM (V4HImode, addr); + } + + /* Make operand1 a register if it isn't already. */ + if ((reload_in_progress | reload_completed) == 0 + && !register_operand (operands[0], V4HImode) + && !register_operand (operands[1], V4HImode) + && operands[1] != CONST0_RTX (V4HImode)) + { + rtx temp = force_reg (V4HImode, operands[1]); + emit_move_insn (operands[0], temp); + DONE; + } +}") + +(define_expand "movv8qi" + [(set (match_operand:V8QI 0 "general_operand" "") + (match_operand:V8QI 1 "general_operand" ""))] + "TARGET_MMX" + " +{ + /* For constants other than zero into memory. We do not know how the + instructions used to build constants modify the upper 64 bits + of the register, once we have that information we may be able + to handle some of them more efficiently. */ + if ((reload_in_progress | reload_completed) == 0 + && register_operand (operands[0], V8QImode) + && CONSTANT_P (operands[1])) + { + rtx addr = gen_reg_rtx (Pmode); + + emit_move_insn (addr, XEXP (force_const_mem (V8QImode, operands[1]), 0)); + operands[1] = gen_rtx_MEM (V8QImode, addr); + } + + /* Make operand1 a register if it isn't already. */ + if ((reload_in_progress | reload_completed) == 0 + && !register_operand (operands[0], V8QImode) + && !register_operand (operands[1], V8QImode) + && operands[1] != CONST0_RTX (V8QImode)) + { + rtx temp = force_reg (V8QImode, operands[1]); + emit_move_insn (operands[0], temp); + DONE; + } +}") + +(define_insn_and_split "*pushti" + [(set (match_operand:TI 0 "push_operand" "=<") + (match_operand:TI 1 "nonmemory_operand" "x"))] + "TARGET_SSE" + "#" + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) + (set (mem:TI (reg:SI 7)) (match_dup 1))] + "" + [(set_attr "type" "sse")]) + +(define_insn_and_split "*pushv4sf" + [(set (match_operand:V4SF 0 "push_operand" "=<") + (match_operand:V4SF 1 "nonmemory_operand" "x"))] + "TARGET_SSE" + "#" + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) + (set (mem:V4SF (reg:SI 7)) (match_dup 1))] + "" + [(set_attr "type" "sse")]) + +(define_insn_and_split "*pushv4si" + [(set (match_operand:V4SI 0 "push_operand" "=<") + (match_operand:V4SI 1 "nonmemory_operand" "x"))] + "TARGET_SSE" + "#" + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -16))) + (set (mem:V4SI (reg:SI 7)) (match_dup 1))] + "" + [(set_attr "type" "sse")]) + +(define_insn_and_split "*pushv2si" + [(set (match_operand:V2SI 0 "push_operand" "=<") + (match_operand:V2SI 1 "nonmemory_operand" "y"))] + "TARGET_MMX" + "#" + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) + (set (mem:V2SI (reg:SI 7)) (match_dup 1))] + "" + [(set_attr "type" "mmx")]) + +(define_insn_and_split "*pushv4hi" + [(set (match_operand:V4HI 0 "push_operand" "=<") + (match_operand:V4HI 1 "nonmemory_operand" "y"))] + "TARGET_MMX" + "#" + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) + (set (mem:V4HI (reg:SI 7)) (match_dup 1))] + "" + [(set_attr "type" "mmx")]) + +(define_insn_and_split "*pushv8qi" + [(set (match_operand:V8QI 0 "push_operand" "=<") + (match_operand:V8QI 1 "nonmemory_operand" "y"))] + "TARGET_MMX" + "#" + "" + [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8))) + (set (mem:V8QI (reg:SI 7)) (match_dup 1))] + "" + [(set_attr "type" "mmx")]) + +(define_insn "movti_internal" + [(set (match_operand:TI 0 "nonimmediate_operand" "=x,m") + (match_operand:TI 1 "general_operand" "xm,x"))] + "TARGET_SSE" + "@ + movaps\\t{%1, %0|%0, %1} + movaps\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +;; These two patterns are useful for specifying exactly whether to use +;; movaps or movups +(define_insn "sse_movaps" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") + (unspec:V4SF [(match_operand:V4SF 1 "general_operand" "xm,x")] 38))] + "TARGET_SSE" + "@ + movaps\\t{%1, %0|%0, %1} + movaps\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sse_movups" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") + (unspec:V4SF [(match_operand:V4SF 1 "general_operand" "xm,x")] 39))] + "TARGET_SSE" + "@ + movups\\t{%1, %0|%0, %1} + movups\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + + +;; SSE Strange Moves. + +(define_insn "sse_movmskps" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")] 33))] + "TARGET_SSE" + "movmskps\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "mmx_pmovmskb" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] 33))] + "TARGET_SSE" + "pmovmskb\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "mmx_maskmovq" + [(set (mem:V8QI (match_operand:SI 0 "register_operand" "D")) + (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y") + (match_operand:V8QI 2 "register_operand" "y")] 32))] + "TARGET_SSE" + ;; @@@ check ordering of operands in intel/nonintel syntax + "maskmovq\\t{%2, %1|%1, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_movntv4sf" + [(set (match_operand:V4SF 0 "memory_operand" "=m") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")] 34))] + "TARGET_SSE" + "movntps\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sse_movntdi" + [(set (match_operand:DI 0 "memory_operand" "=m") + (unspec:DI [(match_operand:DI 1 "register_operand" "x")] 34))] + "TARGET_SSE" + "movntq\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sse_movhlps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (match_operand:V4SF 1 "register_operand" "0") + (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x") + (parallel [(const_int 2) + (const_int 3) + (const_int 0) + (const_int 1)])) + (const_int 3)))] + "TARGET_SSE" + "movhlps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_movlhps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (match_operand:V4SF 1 "register_operand" "0") + (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x") + (parallel [(const_int 2) + (const_int 3) + (const_int 0) + (const_int 1)])) + (const_int 12)))] + "TARGET_SSE" + "movlhps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_movhps" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") + (vec_merge:V4SF + (match_operand:V4SF 1 "nonimmediate_operand" "0,0") + (match_operand:V4SF 2 "nonimmediate_operand" "m,x") + (const_int 12)))] + "TARGET_SSE && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)" + "movhps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_movlps" + [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") + (vec_merge:V4SF + (match_operand:V4SF 1 "nonimmediate_operand" "0,0") + (match_operand:V4SF 2 "nonimmediate_operand" "m,x") + (const_int 3)))] + "TARGET_SSE && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)" + "movlps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_loadss" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (match_operand:V4SF 1 "memory_operand" "m") + (vec_duplicate:V4SF (float:SF (const_int 0))) + (const_int 1)))] + "TARGET_SSE" + "movss\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sse_movss" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "register_operand" "x") + (const_int 1)))] + "TARGET_SSE" + "movss\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_storess" + [(set (match_operand:SF 0 "memory_operand" "=m") + (vec_select:SF + (match_operand:V4SF 1 "register_operand" "x") + (parallel [(const_int 0)])))] + "TARGET_SSE" + "movss\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sse_shufps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm") + (match_operand:SI 3 "immediate_operand" "i")] 41))] + "TARGET_SSE" + ;; @@@ check operand order for intel/nonintel syntax + "shufps\\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "sse")]) + + +;; SSE arithmetic + +(define_insn "addv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (plus:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "addps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "vmaddv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF (plus:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "addss\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "subv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "subps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "vmsubv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "subss\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "mulv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (mult:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "mulps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "vmmulv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF (mult:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "mulss\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "divv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (div:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "divps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "vmdivv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF (div:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "divss\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + + +;; SSE square root/reciprocal + +(define_insn "rcpv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 42))] + "TARGET_SSE" + "rcpps\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "vmrcpv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 42) + (match_operand:V4SF 2 "register_operand" "0") + (const_int 1)))] + "TARGET_SSE" + "rcpss\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "rsqrtv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 43))] + "TARGET_SSE" + "rsqrtps\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "vmrsqrtv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 43) + (match_operand:V4SF 2 "register_operand" "0") + (const_int 1)))] + "TARGET_SSE" + "rsqrtss\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "sqrtv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (sqrt:V4SF (match_operand:V4SF 1 "register_operand" "xm")))] + "TARGET_SSE" + "sqrtps\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "vmsqrtv4sf2" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF (sqrt:V4SF (match_operand:V4SF 1 "register_operand" "xm")) + (match_operand:V4SF 2 "register_operand" "0") + (const_int 1)))] + "TARGET_SSE" + "sqrtss\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + + +;; SSE logical operations. + +;; These are not called andti3 etc. because we really really don't want +;; the compiler to widen DImode ands to TImode ands and then try to move +;; into DImode subregs of SSE registers, and them together, and move out +;; of DImode subregs again! + +(define_insn "sse_andti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (and:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "andps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_nandti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (and:TI (not:TI (match_operand:TI 1 "register_operand" "0")) + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "andnps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_iorti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (ior:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "iorps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_xorti3" + [(set (match_operand:TI 0 "register_operand" "=x") + (xor:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:TI 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "xorps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +;; Use xor, but don't show input operands so they aren't live before +;; this insn. +(define_insn "sse_clrti" + [(set (match_operand:TI 0 "register_operand" "=x") + (unspec:TI [(const_int 0)] 45))] + "TARGET_SSE" + "xorps\\t{%0, %0|%0, %0}" + [(set_attr "type" "sse")]) + + +;; SSE mask-generating compares + +(define_insn "maskcmpv4sf3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (match_operator:V4SI 3 "sse_comparison_operator" + [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "x")]))] + "TARGET_SSE" + "* +{ + switch (GET_CODE (operands[3])) + { + case EQ: + return \"cmpeqps\\t{%2, %0|%0, %2}\"; + case LT: + return \"cmpltps\\t{%2, %0|%0, %2}\"; + case LE: + return \"cmpleps\\t{%2, %0|%0, %2}\"; + case UNORDERED: + return \"cmpunordps\\t{%2, %0|%0, %2}\"; + default: + abort (); + } +}" + [(set_attr "type" "sse")]) + +(define_insn "maskncmpv4sf3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (not:V4SI + (match_operator:V4SI 3 "sse_comparison_operator" + [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "x")])))] + "TARGET_SSE" + "* +{ + switch (GET_CODE (operands[3])) + { + case EQ: + return \"cmpneqps\\t{%2, %0|%0, %2}\"; + case LT: + return \"cmpnltps\\t{%2, %0|%0, %2}\"; + case LE: + return \"cmpnleps\\t{%2, %0|%0, %2}\"; + case UNORDERED: + return \"cmpordps\\t{%2, %0|%0, %2}\"; + default: + abort (); + } +}" + [(set_attr "type" "sse")]) + +(define_insn "vmmaskcmpv4sf3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_merge:V4SI + (match_operator:V4SI 3 "sse_comparison_operator" + [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "x")]) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "* +{ + switch (GET_CODE (operands[3])) + { + case EQ: + return \"cmpeqss\\t{%2, %0|%0, %2}\"; + case LT: + return \"cmpltss\\t{%2, %0|%0, %2}\"; + case LE: + return \"cmpless\\t{%2, %0|%0, %2}\"; + case UNORDERED: + return \"cmpunordss\\t{%2, %0|%0, %2}\"; + default: + abort (); + } +}" + [(set_attr "type" "sse")]) + +(define_insn "vmmaskncmpv4sf3" + [(set (match_operand:V4SI 0 "register_operand" "=x") + (vec_merge:V4SI + (not:V4SI + (match_operator:V4SI 3 "sse_comparison_operator" + [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "x")])) + (subreg:V4SI (match_dup 1) 0) + (const_int 1)))] + "TARGET_SSE" + "* +{ + switch (GET_CODE (operands[3])) + { + case EQ: + return \"cmpneqss\\t{%2, %0|%0, %2}\"; + case LT: + return \"cmpnltss\\t{%2, %0|%0, %2}\"; + case LE: + return \"cmpnless\\t{%2, %0|%0, %2}\"; + case UNORDERED: + return \"cmpordss\\t{%2, %0|%0, %2}\"; + default: + abort (); + } +}" + [(set_attr "type" "sse")]) + +(define_insn "sse_comi" + [(set (reg:CCFP 17) + (match_operator:CCFP 2 "sse_comparison_operator" + [(vec_select:SF + (match_operand:V4SF 0 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:SF + (match_operand:V4SF 1 "register_operand" "x") + (parallel [(const_int 0)]))]))] + "TARGET_SSE" + "comiss\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_ucomi" + [(set (reg:CCFPU 17) + (match_operator:CCFPU 2 "sse_comparison_operator" + [(vec_select:SF + (match_operand:V4SF 0 "register_operand" "x") + (parallel [(const_int 0)])) + (vec_select:SF + (match_operand:V4SF 1 "register_operand" "x") + (parallel [(const_int 0)]))]))] + "TARGET_SSE" + "ucomiss\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + + +;; SSE unpack + +(define_insn "sse_unpckhps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "0") + (parallel [(const_int 2) + (const_int 0) + (const_int 3) + (const_int 1)])) + (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "x") + (parallel [(const_int 0) + (const_int 2) + (const_int 1) + (const_int 3)])) + (const_int 5)))] + "TARGET_SSE" + "unpckhps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sse_unpcklps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF + (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "0") + (parallel [(const_int 0) + (const_int 2) + (const_int 1) + (const_int 3)])) + (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "x") + (parallel [(const_int 2) + (const_int 0) + (const_int 3) + (const_int 1)])) + (const_int 5)))] + "TARGET_SSE" + "unpcklps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + + +;; SSE min/max + +(define_insn "smaxv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (smax:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "maxps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "vmsmaxv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF (smax:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "maxss\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sminv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (smin:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + "TARGET_SSE" + "minps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "vmsminv4sf3" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF (smin:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] + "TARGET_SSE" + "minss\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + + +;; SSE <-> integer/MMX conversions + +(define_insn "cvtpi2ps" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0") + (vec_duplicate:V4SF + (float:V2SF (match_operand:V2SI 2 "register_operand" "ym"))) + (const_int 12)))] + "TARGET_SSE" + "cvtpi2ps\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "cvtps2pi" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_select:V2SI (fix:V4SI (match_operand:V4SF 1 "register_operand" "xm")) + (parallel + [(const_int 0) + (const_int 1)])))] + "TARGET_SSE" + "cvtps2pi\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "cvttps2pi" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_select:V2SI (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "xm")] 30) + (parallel + [(const_int 0) + (const_int 1)])))] + "TARGET_SSE" + "cvttps2pi\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "cvtsi2ss" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0") + (vec_duplicate:V4SF + (float:SF (match_operand:SI 2 "register_operand" "rm"))) + (const_int 15)))] + "TARGET_SSE" + "cvtsi2ss\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "cvtss2si" + [(set (match_operand:SI 0 "register_operand" "=y") + (vec_select:SI (fix:V4SI (match_operand:V4SF 1 "register_operand" "xm")) + (parallel [(const_int 0)])))] + "TARGET_SSE" + "cvtss2si\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "cvttss2si" + [(set (match_operand:SI 0 "register_operand" "=y") + (vec_select:SI (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "xm")] 30) + (parallel [(const_int 0)])))] + "TARGET_SSE" + "cvttss2si\\t{%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + + +;; MMX insns + +;; MMX arithmetic + +(define_insn "addv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (plus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "paddb\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "addv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (plus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "paddw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "addv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (plus:V2SI (match_operand:V2SI 1 "register_operand" "0") + (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "paddd\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "ssaddv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "paddsb\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "ssaddv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "paddsw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "usaddv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "paddusb\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "usaddv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "paddusw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "subv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (minus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubb\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "subv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (minus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "subv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (minus:V2SI (match_operand:V2SI 1 "register_operand" "0") + (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubd\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "sssubv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (ss_minus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubsb\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "sssubv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (ss_minus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubsw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "ussubv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (us_minus:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubusb\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "ussubv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (us_minus:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "psubusw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mulv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (mult:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pmullw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "smulv4hi3_highpart" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (truncate:V4HI + (lshiftrt:V4SI + (mult:V4SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "0")) + (sign_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (const_int 16))))] + "TARGET_MMX" + "pmulhw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "umulv4hi3_highpart" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (truncate:V4HI + (lshiftrt:V4SI + (mult:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "0")) + (zero_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (const_int 16))))] + "TARGET_MMX" + "pmulhuw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_pmaddwd" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (plus:V2SI + (mult:V2SI + (sign_extend:V2SI (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "0") + (parallel [(const_int 0) + (const_int 2)]))) + (sign_extend:V2SI (vec_select:V2HI (match_operand:V4HI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0) + (const_int 2)])))) + (mult:V2SI + (sign_extend:V2SI (vec_select:V2HI (match_dup 1) + (parallel [(const_int 1) + (const_int 3)]))) + (sign_extend:V2SI (vec_select:V2HI (match_dup 2) + (parallel [(const_int 1) + (const_int 3)]))))))] + "TARGET_MMX" + "pmaddwd\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + + +;; MMX logical operations +;; Note we don't want to declare these as regular iordi3 insns to prevent +;; normal code that also wants to use the FPU from getting broken. +;; The UNSPECs are there to prevent the combiner from getting overly clever. +(define_insn "mmx_iordi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(ior:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))] + "TARGET_MMX" + "por\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_xordi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(xor:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))] + "TARGET_MMX" + "pxor\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +;; Same as pxor, but don't show input operands so that we don't think +;; they are live. +(define_insn "mmx_clrdi" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI [(const_int 0)] 45))] + "TARGET_MMX" + "pxor\\t{%0, %0|%0, %0}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_anddi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(and:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))] + "TARGET_MMX" + "pand\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_nanddi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (unspec:DI + [(and:DI (not:DI (match_operand:DI 1 "register_operand" "0")) + (match_operand:DI 2 "nonimmediate_operand" "ym"))] 45))] + "TARGET_MMX" + "pandn\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + + +;; MMX unsigned averages/sum of absolute differences + +(define_insn "mmx_uavgv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (ashiftrt:V8QI + (plus:V8QI (plus:V8QI + (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")) + (vec_const:V8QI (parallel [(const_int 1) + (const_int 1) + (const_int 1) + (const_int 1) + (const_int 1) + (const_int 1) + (const_int 1) + (const_int 1)]))) + (const_int 1)))] + "TARGET_SSE" + "pavgbn\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "mmx_uavgv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (ashiftrt:V4HI + (plus:V4HI (plus:V4HI + (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")) + (vec_const:V4HI (parallel [(const_int 1) + (const_int 1) + (const_int 1) + (const_int 1)]))) + (const_int 1)))] + "TARGET_SSE" + "pavgwn\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "mmx_psadbw" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (abs:V8QI (minus:V8QI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonimmediate_operand" "ym"))))] + "TARGET_SSE" + "padbw\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + + +;; MMX insert/extract/shuffle + +(define_insn "mmx_pinsrw" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_merge:V4HI (match_operand:V4HI 1 "register_operand" "0") + (vec_duplicate:V4HI + (truncate:HI (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (match_operand:SI 3 "immediate_operand" "i")))] + "TARGET_SSE" + ;; @@@ check operand order for intel/nonintel syntax. + "pinsrw\\t%3, {%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "mmx_pextrw" + [(set (match_operand:SI 0 "register_operand" "=r") + (zero_extend:SI (vec_select:HI (match_operand:V4HI 1 "register_operand" "y") + (parallel + [(match_operand:SI 2 "immediate_operand" "i")]))))] + "TARGET_SSE" + ;; @@@ check operand order for intel/nonintel syntax. + "pextrw\\t%2, {%1, %0|%0, %1}" + [(set_attr "type" "sse")]) + +(define_insn "mmx_pshufw" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym") + (match_operand:SI 3 "immediate_operand" "i")] 41))] + "TARGET_SSE" + ;; @@@ check operand order for intel/nonintel syntax + "pshufw\\t %3,{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + + +;; MMX mask-generating comparisons + +(define_insn "eqv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (eq:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pcmpeqb\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "eqv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (eq:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pcmpeqw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "eqv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (eq:V2SI (match_operand:V2SI 1 "register_operand" "0") + (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pcmpeqd\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "gtv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (gt:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pcmpgtb\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "gtv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (gt:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pcmpgtw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "gtv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (gt:V2SI (match_operand:V2SI 1 "register_operand" "0") + (match_operand:V2SI 2 "nonimmediate_operand" "ym")))] + "TARGET_MMX" + "pcmpgtd\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + + +;; MMX max/min insns + +(define_insn "umaxv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (umax:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_SSE" + "pmaxub\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "smaxv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (smax:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_SSE" + "pmaxsw\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "uminv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (umin:V8QI (match_operand:V8QI 1 "register_operand" "0") + (match_operand:V8QI 2 "nonimmediate_operand" "ym")))] + "TARGET_SSE" + "pminub\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + +(define_insn "sminv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (smin:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:V4HI 2 "nonimmediate_operand" "ym")))] + "TARGET_SSE" + "pminsw\\t{%2, %0|%0, %2}" + [(set_attr "type" "sse")]) + + +;; MMX shifts + +(define_insn "ashrv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (ashiftrt:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "psraw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "ashrv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (ashiftrt:V2SI (match_operand:V2SI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "psrad\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "lshrv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (lshiftrt:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "psrlw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "lshrv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (lshiftrt:V2SI (match_operand:V2SI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "psrld\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +;; See logical MMX insns. +(define_insn "mmx_lshrdi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (lshiftrt:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "psrlq\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "ashlv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (ashift:V4HI (match_operand:V4HI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "psllw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "ashlv2si3" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (ashift:V2SI (match_operand:V2SI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "pslld\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +;; See logical MMX insns. +(define_insn "mmx_ashldi3" + [(set (match_operand:DI 0 "register_operand" "=y") + (ashift:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:DI 2 "nonmemory_operand" "yi")))] + "TARGET_MMX" + "psllq\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + + +;; MMX pack/unpack insns. + +(define_insn "mmx_packsswb" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_concat:V8QI + (ss_truncate:V4QI (match_operand:V4HI 1 "register_operand" "0")) + (ss_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))] + "TARGET_MMX" + "packsswb\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_packssdw" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_concat:V4HI + (ss_truncate:V2HI (match_operand:V2SI 1 "register_operand" "0")) + (ss_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))] + "TARGET_MMX" + "packssdw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_packuswb" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_concat:V8QI + (us_truncate:V4QI (match_operand:V4HI 1 "register_operand" "0")) + (us_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))] + "TARGET_MMX" + "packuswb\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_punpckhbw" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_merge:V8QI + (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "0") + (parallel [(const_int 4) + (const_int 0) + (const_int 5) + (const_int 1) + (const_int 6) + (const_int 2) + (const_int 7) + (const_int 3)])) + (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y") + (parallel [(const_int 0) + (const_int 4) + (const_int 1) + (const_int 5) + (const_int 2) + (const_int 6) + (const_int 3) + (const_int 7)])) + (const_int 85)))] + "TARGET_MMX" + "punpckhbw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_punpckhwd" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_merge:V4HI + (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "0") + (parallel [(const_int 0) + (const_int 2) + (const_int 1) + (const_int 3)])) + (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y") + (parallel [(const_int 2) + (const_int 0) + (const_int 3) + (const_int 1)])) + (const_int 5)))] + "TARGET_MMX" + "punpckhbw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_punpckhdq" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_merge:V2SI + (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0") + (parallel [(const_int 0) + (const_int 1)])) + (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y") + (parallel [(const_int 1) + (const_int 0)])) + (const_int 1)))] + "TARGET_MMX" + "punpckhbw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_punpcklbw" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (vec_merge:V8QI + (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "0") + (parallel [(const_int 0) + (const_int 4) + (const_int 1) + (const_int 5) + (const_int 2) + (const_int 6) + (const_int 3) + (const_int 7)])) + (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y") + (parallel [(const_int 4) + (const_int 0) + (const_int 5) + (const_int 1) + (const_int 6) + (const_int 2) + (const_int 7) + (const_int 3)])) + (const_int 85)))] + "TARGET_MMX" + "punpcklbw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_punpcklwd" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (vec_merge:V4HI + (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "0") + (parallel [(const_int 2) + (const_int 0) + (const_int 3) + (const_int 1)])) + (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y") + (parallel [(const_int 0) + (const_int 2) + (const_int 1) + (const_int 3)])) + (const_int 5)))] + "TARGET_MMX" + "punpcklbw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + +(define_insn "mmx_punpckldq" + [(set (match_operand:V2SI 0 "register_operand" "=y") + (vec_merge:V2SI + (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0") + (parallel [(const_int 1) + (const_int 0)])) + (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y") + (parallel [(const_int 0) + (const_int 1)])) + (const_int 1)))] + "TARGET_MMX" + "punpcklbw\\t{%2, %0|%0, %2}" + [(set_attr "type" "mmx")]) + + +;; Miscellaneous stuff + +(define_insn "emms" + [(unspec_volatile [(const_int 0)] 31) + (clobber (reg:XF 8)) + (clobber (reg:XF 9)) + (clobber (reg:XF 10)) + (clobber (reg:XF 11)) + (clobber (reg:XF 12)) + (clobber (reg:XF 13)) + (clobber (reg:XF 14)) + (clobber (reg:XF 15)) + (clobber (reg:DI 27)) + (clobber (reg:DI 28)) + (clobber (reg:DI 29)) + (clobber (reg:DI 30)) + (clobber (reg:DI 31)) + (clobber (reg:DI 32)) + (clobber (reg:DI 33)) + (clobber (reg:DI 34))] + "TARGET_MMX" + "emms" + [(set_attr "type" "mmx")]) + +(define_insn "ldmxcsr" + [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] 37)] + "TARGET_MMX" + "ldmxcsr\\t%0" + [(set_attr "type" "mmx")]) + +(define_insn "stmxcsr" + [(set (match_operand:SI 0 "memory_operand" "=m") + (unspec_volatile:SI [(const_int 0)] 40))] + "TARGET_MMX" + "stmxcsr\\t%0" + [(set_attr "type" "mmx")]) + +(define_expand "sfence" + [(set (match_dup 0) + (unspec:BLK [(match_dup 0)] 44))] + "TARGET_SSE" + " +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}") + +(define_insn "*sfence_insn" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0)] 44))] + "TARGET_SSE" + "sfence" + [(set_attr "type" "sse")]) + +(define_insn "prefetch" + [(unspec [(match_operand:SI 0 "address_operand" "p") + (match_operand:SI 1 "address_operand" "p")] 35)] + "TARGET_SSE" + "* +{ + switch (INTVAL (operands[1])) + { + case 0: + return \"prefetcht0\\t%0\"; + case 1: + return \"prefetcht1\\t%0\"; + case 2: + return \"prefetcht2\\t%0\"; + case 3: + return \"prefetchnta\\t%0\"; + default: + abort (); + } +}" + [(set_attr "type" "sse")]) + |