diff options
author | Richard Henderson <rth@redhat.com> | 2002-01-12 02:05:28 -0800 |
---|---|---|
committer | Richard Henderson <rth@gcc.gnu.org> | 2002-01-12 02:05:28 -0800 |
commit | e37af218eed960ea5d499158db780aa4821e02cc (patch) | |
tree | 9015cbde63a553c647631990a8859f8b29b4af0f /gcc/config/i386/i386.md | |
parent | b0d723da3660fdff9096353054bfcf6c39f3769c (diff) | |
download | gcc-e37af218eed960ea5d499158db780aa4821e02cc.tar.gz |
i386.c (override_options): If SSE, enable sse prefetch.
* config/i386/i386.c (override_options): If SSE, enable sse prefetch.
(ix86_expand_vector_move): New.
(bdesc_2arg): Remove andps, andnps, orps, xorps.
(ix86_init_mmx_sse_builtins): Make static. Remove composite builtins.
Remove old prefetch builtins. Special case the logicals removed above.
(ix86_expand_builtin): Likewise.
(safe_vector_operand): Use V4SFmode, not TImode.
(ix86_expand_store_builtin): Remove shuffle arg. Update callers.
(ix86_expand_timode_binop_builtin): New.
* config/i386/i386-protos.h: Update.
* config/i386/i386.h (enum ix86_builtins): Update.
* config/i386/i386.md: Correct predicates on MMX/SSE patterns.
Use ix86_expand_vector_move in vector move expanders.
(movti_internal, movti_rex64): Add xorps alternative.
(sse_clrv4sf): Rename and adjust from sse_clrti.
(prefetch): Don't work so hard.
(prefetch_sse, prefetch_3dnow): Use PREFETCH rtx, not UNSPEC.
* config/i386/xmmintrin.h (__m128): Use V4SFmode.
(_mm_getcsr, _mm_setcsr): Fix typo in builtin name.
From-SVN: r48796
Diffstat (limited to 'gcc/config/i386/i386.md')
-rw-r--r-- | gcc/config/i386/i386.md | 562 |
1 files changed, 199 insertions, 363 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 4673c2b2191..c892fc183ce 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -81,7 +81,6 @@ ;; 32 This is a `maskmov' operation. ;; 33 This is a `movmsk' operation. ;; 34 This is a `non-temporal' move. -;; 35 This is a `prefetch' (SSE) operation. ;; 36 This is used to distinguish COMISS from UCOMISS. ;; 37 This is a `ldmxcsr' operation. ;; 38 This is a forced `movaps' instruction (rather than whatever movti does) @@ -17686,7 +17685,7 @@ (define_insn "movv4sf_internal" [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") - (match_operand:V4SF 1 "general_operand" "xm,x"))] + (match_operand:V4SF 1 "nonimmediate_operand" "xm,x"))] "TARGET_SSE" ;; @@@ let's try to use movaps here. "movaps\t{%1, %0|%0, %1}" @@ -17694,7 +17693,7 @@ (define_insn "movv4si_internal" [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m") - (match_operand:V4SI 1 "general_operand" "xm,x"))] + (match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))] "TARGET_SSE" ;; @@@ let's try to use movaps here. "movaps\t{%1, %0|%0, %1}" @@ -17702,28 +17701,28 @@ (define_insn "movv8qi_internal" [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m") - (match_operand:V8QI 1 "general_operand" "ym,y"))] + (match_operand:V8QI 1 "nonimmediate_operand" "ym,y"))] "TARGET_MMX" "movq\t{%1, %0|%0, %1}" [(set_attr "type" "mmx")]) (define_insn "movv4hi_internal" [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,m") - (match_operand:V4HI 1 "general_operand" "ym,y"))] + (match_operand:V4HI 1 "nonimmediate_operand" "ym,y"))] "TARGET_MMX" "movq\t{%1, %0|%0, %1}" [(set_attr "type" "mmx")]) (define_insn "movv2si_internal" [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,m") - (match_operand:V2SI 1 "general_operand" "ym,y"))] + (match_operand:V2SI 1 "nonimmediate_operand" "ym,y"))] "TARGET_MMX" "movq\t{%1, %0|%0, %1}" [(set_attr "type" "mmx")]) (define_insn "movv2sf_internal" [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,m") - (match_operand:V2SF 1 "general_operand" "ym,y"))] + (match_operand:V2SF 1 "nonimmediate_operand" "ym,y"))] "TARGET_3DNOW" "movq\\t{%1, %0|%0, %1}" [(set_attr "type" "mmx")]) @@ -17734,34 +17733,10 @@ "TARGET_SSE || TARGET_64BIT" { if (TARGET_64BIT) - { - ix86_expand_move (TImode, operands); - DONE; - } - /* For constants other than zero into memory. We do not know how the - instructions used to build constants modify the upper 64 bits - of the register, once we have that information we may be able - to handle some of them more efficiently. */ - if ((reload_in_progress | reload_completed) == 0 - && register_operand (operands[0], TImode) - && CONSTANT_P (operands[1])) - { - rtx addr = gen_reg_rtx (Pmode); - - emit_move_insn (addr, XEXP (force_const_mem (TImode, operands[1]), 0)); - operands[1] = gen_rtx_MEM (TImode, addr); - } - - /* Make operand1 a register if it isn't already. */ - if ((reload_in_progress | reload_completed) == 0 - && !register_operand (operands[0], TImode) - && !register_operand (operands[1], TImode) - && operands[1] != CONST0_RTX (TImode)) - { - rtx temp = force_reg (TImode, operands[1]); - emit_move_insn (operands[0], temp); - DONE; - } + ix86_expand_move (TImode, operands); + else + ix86_expand_vector_move (TImode, operands); + DONE; }) (define_expand "movv4sf" @@ -17769,30 +17744,8 @@ (match_operand:V4SF 1 "general_operand" ""))] "TARGET_SSE" { - /* For constants other than zero into memory. We do not know how the - instructions used to build constants modify the upper 64 bits - of the register, once we have that information we may be able - to handle some of them more efficiently. */ - if ((reload_in_progress | reload_completed) == 0 - && register_operand (operands[0], V4SFmode) - && CONSTANT_P (operands[1])) - { - rtx addr = gen_reg_rtx (Pmode); - - emit_move_insn (addr, XEXP (force_const_mem (V4SFmode, operands[1]), 0)); - operands[1] = gen_rtx_MEM (V4SFmode, addr); - } - - /* Make operand1 a register if it isn't already. */ - if ((reload_in_progress | reload_completed) == 0 - && !register_operand (operands[0], V4SFmode) - && !register_operand (operands[1], V4SFmode) - && operands[1] != CONST0_RTX (V4SFmode)) - { - rtx temp = force_reg (V4SFmode, operands[1]); - emit_move_insn (operands[0], temp); - DONE; - } + ix86_expand_vector_move (V4SFmode, operands); + DONE; }) (define_expand "movv4si" @@ -17800,30 +17753,8 @@ (match_operand:V4SI 1 "general_operand" ""))] "TARGET_MMX" { - /* For constants other than zero into memory. We do not know how the - instructions used to build constants modify the upper 64 bits - of the register, once we have that information we may be able - to handle some of them more efficiently. */ - if ((reload_in_progress | reload_completed) == 0 - && register_operand (operands[0], V4SImode) - && CONSTANT_P (operands[1])) - { - rtx addr = gen_reg_rtx (Pmode); - - emit_move_insn (addr, XEXP (force_const_mem (V4SImode, operands[1]), 0)); - operands[1] = gen_rtx_MEM (V4SImode, addr); - } - - /* Make operand1 a register if it isn't already. */ - if ((reload_in_progress | reload_completed) == 0 - && !register_operand (operands[0], V4SImode) - && !register_operand (operands[1], V4SImode) - && operands[1] != CONST0_RTX (V4SImode)) - { - rtx temp = force_reg (V4SImode, operands[1]); - emit_move_insn (operands[0], temp); - DONE; - } + ix86_expand_vector_move (V4SImode, operands); + DONE; }) (define_expand "movv2si" @@ -17831,30 +17762,8 @@ (match_operand:V2SI 1 "general_operand" ""))] "TARGET_MMX" { - /* For constants other than zero into memory. We do not know how the - instructions used to build constants modify the upper 64 bits - of the register, once we have that information we may be able - to handle some of them more efficiently. */ - if ((reload_in_progress | reload_completed) == 0 - && register_operand (operands[0], V2SImode) - && CONSTANT_P (operands[1])) - { - rtx addr = gen_reg_rtx (Pmode); - - emit_move_insn (addr, XEXP (force_const_mem (V2SImode, operands[1]), 0)); - operands[1] = gen_rtx_MEM (V2SImode, addr); - } - - /* Make operand1 a register if it isn't already. */ - if ((reload_in_progress | reload_completed) == 0 - && !register_operand (operands[0], V2SImode) - && !register_operand (operands[1], V2SImode) - && operands[1] != CONST0_RTX (V2SImode)) - { - rtx temp = force_reg (V2SImode, operands[1]); - emit_move_insn (operands[0], temp); - DONE; - } + ix86_expand_vector_move (V2SImode, operands); + DONE; }) (define_expand "movv4hi" @@ -17862,30 +17771,8 @@ (match_operand:V4HI 1 "general_operand" ""))] "TARGET_MMX" { - /* For constants other than zero into memory. We do not know how the - instructions used to build constants modify the upper 64 bits - of the register, once we have that information we may be able - to handle some of them more efficiently. */ - if ((reload_in_progress | reload_completed) == 0 - && register_operand (operands[0], V4HImode) - && CONSTANT_P (operands[1])) - { - rtx addr = gen_reg_rtx (Pmode); - - emit_move_insn (addr, XEXP (force_const_mem (V4HImode, operands[1]), 0)); - operands[1] = gen_rtx_MEM (V4HImode, addr); - } - - /* Make operand1 a register if it isn't already. */ - if ((reload_in_progress | reload_completed) == 0 - && !register_operand (operands[0], V4HImode) - && !register_operand (operands[1], V4HImode) - && operands[1] != CONST0_RTX (V4HImode)) - { - rtx temp = force_reg (V4HImode, operands[1]); - emit_move_insn (operands[0], temp); - DONE; - } + ix86_expand_vector_move (V4HImode, operands); + DONE; }) (define_expand "movv8qi" @@ -17893,65 +17780,18 @@ (match_operand:V8QI 1 "general_operand" ""))] "TARGET_MMX" { - /* For constants other than zero into memory. We do not know how the - instructions used to build constants modify the upper 64 bits - of the register, once we have that information we may be able - to handle some of them more efficiently. */ - if ((reload_in_progress | reload_completed) == 0 - && register_operand (operands[0], V8QImode) - && CONSTANT_P (operands[1])) - { - rtx addr = gen_reg_rtx (Pmode); - - emit_move_insn (addr, XEXP (force_const_mem (V8QImode, operands[1]), 0)); - operands[1] = gen_rtx_MEM (V8QImode, addr); - } - - /* Make operand1 a register if it isn't already. */ - if ((reload_in_progress | reload_completed) == 0 - && !register_operand (operands[0], V8QImode) - && !register_operand (operands[1], V8QImode) - && operands[1] != CONST0_RTX (V8QImode)) - { - rtx temp = force_reg (V8QImode, operands[1]); - emit_move_insn (operands[0], temp); - DONE; - } + ix86_expand_vector_move (V8QImode, operands); + DONE; }) (define_expand "movv2sf" [(set (match_operand:V2SF 0 "general_operand" "") (match_operand:V2SF 1 "general_operand" ""))] "TARGET_3DNOW" - " { - /* For constants other than zero into memory. We do not know how the - instructions used to build constants modify the upper 64 bits - of the register, once we have that information we may be able - to handle some of them more efficiently. */ - if ((reload_in_progress | reload_completed) == 0 - && register_operand (operands[0], V2SFmode) - && CONSTANT_P (operands[1])) - { - rtx addr = gen_reg_rtx (Pmode); - - emit_move_insn (addr, - XEXP (force_const_mem (V2SFmode, operands[1]), 0)); - operands[1] = gen_rtx_MEM (V2SFmode, addr); - } - - /* Make operand1 a register is it isn't already. */ - if ((reload_in_progress | reload_completed) == 0 - && !register_operand (operands[0], V2SFmode) - && !register_operand (operands[1], V2SFmode) - && (GET_CODE (operands[1]) != CONST_INT || INTVAL (operands[1]) != 0) - && operands[1] != CONST0_RTX (V2SFmode)) - { - rtx temp = force_reg (V2SFmode, operands[1]); - emit_move_insn (operands[0], temp); - DONE; - } -}") + ix86_expand_vector_move (V2SFmode, operands); + DONE; +}) (define_insn_and_split "*pushti" [(set (match_operand:TI 0 "push_operand" "=<") @@ -18031,25 +17871,27 @@ [(set_attr "type" "mmx")]) (define_insn "movti_internal" - [(set (match_operand:TI 0 "nonimmediate_operand" "=x,m") - (match_operand:TI 1 "general_operand" "xm,x"))] + [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m") + (match_operand:TI 1 "general_operand" "O,xm,x"))] "TARGET_SSE && !TARGET_64BIT" "@ + xorps\t%0, %0 movaps\t{%1, %0|%0, %1} movaps\t{%1, %0|%0, %1}" [(set_attr "type" "sse")]) (define_insn "*movti_rex64" - [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,mx,x") - (match_operand:TI 1 "general_operand" "riFo,riF,x,m"))] + [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x") + (match_operand:TI 1 "general_operand" "riFo,riF,O,x,m"))] "TARGET_64BIT && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "@ # # + xorps\t%0, %0 movaps\\t{%1, %0|%0, %1} movaps\\t{%1, %0|%0, %1}" - [(set_attr "type" "*,*,sse,sse") + [(set_attr "type" "*,*,sse,sse,sse") (set_attr "mode" "TI")]) (define_split @@ -18064,7 +17906,8 @@ ;; movaps or movups (define_insn "sse_movaps" [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") - (unspec:V4SF [(match_operand:V4SF 1 "general_operand" "xm,x")] 38))] + (unspec:V4SF + [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] 38))] "TARGET_SSE" "@ movaps\t{%1, %0|%0, %1} @@ -18073,7 +17916,8 @@ (define_insn "sse_movups" [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") - (unspec:V4SF [(match_operand:V4SF 1 "general_operand" "xm,x")] 39))] + (unspec:V4SF + [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] 39))] "TARGET_SSE" "@ movups\t{%1, %0|%0, %1} @@ -18154,7 +17998,8 @@ (match_operand:V4SF 1 "nonimmediate_operand" "0,0") (match_operand:V4SF 2 "nonimmediate_operand" "m,x") (const_int 12)))] - "TARGET_SSE && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)" + "TARGET_SSE + && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)" "movhps\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) @@ -18164,7 +18009,8 @@ (match_operand:V4SF 1 "nonimmediate_operand" "0,0") (match_operand:V4SF 2 "nonimmediate_operand" "m,x") (const_int 3)))] - "TARGET_SSE && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)" + "TARGET_SSE + && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)" "movlps\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) @@ -18220,10 +18066,11 @@ (define_insn "vmaddv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF (plus:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] + (vec_merge:V4SF + (plus:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] "TARGET_SSE" "addss\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) @@ -18231,17 +18078,18 @@ (define_insn "subv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] + (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] "TARGET_SSE" "subps\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) (define_insn "vmsubv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] + (vec_merge:V4SF + (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] "TARGET_SSE" "subss\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) @@ -18256,10 +18104,11 @@ (define_insn "vmmulv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF (mult:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] + (vec_merge:V4SF + (mult:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] "TARGET_SSE" "mulss\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) @@ -18274,10 +18123,11 @@ (define_insn "vmdivv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF (div:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] + (vec_merge:V4SF + (div:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] "TARGET_SSE" "divss\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) @@ -18287,53 +18137,57 @@ (define_insn "rcpv4sf2" [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 42))] + (unspec:V4SF + [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 42))] "TARGET_SSE" "rcpps\t{%1, %0|%0, %1}" [(set_attr "type" "sse")]) (define_insn "vmrcpv4sf2" [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 42) - (match_operand:V4SF 2 "register_operand" "0") - (const_int 1)))] + (vec_merge:V4SF + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 42) + (match_operand:V4SF 2 "register_operand" "0") + (const_int 1)))] "TARGET_SSE" "rcpss\t{%1, %0|%0, %1}" [(set_attr "type" "sse")]) (define_insn "rsqrtv4sf2" [(set (match_operand:V4SF 0 "register_operand" "=x") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 43))] + (unspec:V4SF + [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 43))] "TARGET_SSE" "rsqrtps\t{%1, %0|%0, %1}" [(set_attr "type" "sse")]) (define_insn "vmrsqrtv4sf2" [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "xm")] 43) - (match_operand:V4SF 2 "register_operand" "0") - (const_int 1)))] + (vec_merge:V4SF + (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 43) + (match_operand:V4SF 2 "register_operand" "0") + (const_int 1)))] "TARGET_SSE" "rsqrtss\t{%1, %0|%0, %1}" [(set_attr "type" "sse")]) (define_insn "sqrtv4sf2" [(set (match_operand:V4SF 0 "register_operand" "=x") - (sqrt:V4SF (match_operand:V4SF 1 "register_operand" "xm")))] + (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] "TARGET_SSE" "sqrtps\t{%1, %0|%0, %1}" [(set_attr "type" "sse")]) (define_insn "vmsqrtv4sf2" [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF (sqrt:V4SF (match_operand:V4SF 1 "register_operand" "xm")) - (match_operand:V4SF 2 "register_operand" "0") - (const_int 1)))] + (vec_merge:V4SF + (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")) + (match_operand:V4SF 2 "register_operand" "0") + (const_int 1)))] "TARGET_SSE" "sqrtss\t{%1, %0|%0, %1}" [(set_attr "type" "sse")]) - ;; SSE logical operations. ;; These are not called andti3 etc. because we really really don't want @@ -18519,9 +18373,9 @@ ;; Use xor, but don't show input operands so they aren't live before ;; this insn. -(define_insn "sse_clrti" - [(set (match_operand:TI 0 "register_operand" "=x") - (unspec:TI [(const_int 0)] 45))] +(define_insn "sse_clrv4sf" + [(set (match_operand:V4SF 0 "register_operand" "=x") + (unspec:V4SF [(const_int 0)] 45))] "TARGET_SSE" "xorps\t{%0, %0|%0, %0}" [(set_attr "type" "sse") @@ -18532,8 +18386,8 @@ (define_insn "maskcmpv4sf3" [(set (match_operand:V4SI 0 "register_operand" "=x") (match_operator:V4SI 3 "sse_comparison_operator" - [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "x")]))] + [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "register_operand" "x")]))] "TARGET_SSE" "cmp%D3ps\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) @@ -18542,24 +18396,23 @@ [(set (match_operand:V4SI 0 "register_operand" "=x") (not:V4SI (match_operator:V4SI 3 "sse_comparison_operator" - [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "x")])))] + [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "register_operand" "x")])))] "TARGET_SSE" - "* { if (GET_CODE (operands[3]) == UNORDERED) - return \"cmpordps\t{%2, %0|%0, %2}\"; - - return \"cmpn%D3ps\t{%2, %0|%0, %2}\"; -}" + return "cmpordps\t{%2, %0|%0, %2}"; + else + return "cmpn%D3ps\t{%2, %0|%0, %2}"; +} [(set_attr "type" "sse")]) (define_insn "vmmaskcmpv4sf3" [(set (match_operand:V4SI 0 "register_operand" "=x") (vec_merge:V4SI (match_operator:V4SI 3 "sse_comparison_operator" - [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "x")]) + [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "register_operand" "x")]) (match_dup 1) (const_int 1)))] "TARGET_SSE" @@ -18571,18 +18424,17 @@ (vec_merge:V4SI (not:V4SI (match_operator:V4SI 3 "sse_comparison_operator" - [(match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "x")])) + [(match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "register_operand" "x")])) (subreg:V4SI (match_dup 1) 0) (const_int 1)))] "TARGET_SSE" - "* { if (GET_CODE (operands[3]) == UNORDERED) - return \"cmpordss\t{%2, %0|%0, %2}\"; - - return \"cmpn%D3ss\t{%2, %0|%0, %2}\"; -}" + return "cmpordss\t{%2, %0|%0, %2}"; + else + return "cmpn%D3ss\t{%2, %0|%0, %2}"; +} [(set_attr "type" "sse")]) (define_insn "sse_comi" @@ -18663,10 +18515,11 @@ (define_insn "vmsmaxv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF (smax:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] + (vec_merge:V4SF + (smax:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] "TARGET_SSE" "maxss\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) @@ -18681,10 +18534,11 @@ (define_insn "vmsminv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF (smin:V4SF (match_operand:V4SF 1 "register_operand" "0") - (match_operand:V4SF 2 "nonimmediate_operand" "xm")) - (match_dup 1) - (const_int 1)))] + (vec_merge:V4SF + (smin:V4SF (match_operand:V4SF 1 "register_operand" "0") + (match_operand:V4SF 2 "nonimmediate_operand" "xm")) + (match_dup 1) + (const_int 1)))] "TARGET_SSE" "minss\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) @@ -18694,56 +18548,58 @@ (define_insn "cvtpi2ps" [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0") - (vec_duplicate:V4SF - (float:V2SF (match_operand:V2SI 2 "register_operand" "ym"))) - (const_int 12)))] + (vec_merge:V4SF + (match_operand:V4SF 1 "register_operand" "0") + (vec_duplicate:V4SF + (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym"))) + (const_int 12)))] "TARGET_SSE" "cvtpi2ps\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) (define_insn "cvtps2pi" [(set (match_operand:V2SI 0 "register_operand" "=y") - (vec_select:V2SI (fix:V4SI (match_operand:V4SF 1 "register_operand" "xm")) - (parallel - [(const_int 0) - (const_int 1)])))] + (vec_select:V2SI + (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")) + (parallel [(const_int 0) (const_int 1)])))] "TARGET_SSE" "cvtps2pi\t{%1, %0|%0, %1}" [(set_attr "type" "sse")]) (define_insn "cvttps2pi" [(set (match_operand:V2SI 0 "register_operand" "=y") - (vec_select:V2SI (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "xm")] 30) - (parallel - [(const_int 0) - (const_int 1)])))] + (vec_select:V2SI + (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 30) + (parallel [(const_int 0) (const_int 1)])))] "TARGET_SSE" "cvttps2pi\t{%1, %0|%0, %1}" [(set_attr "type" "sse")]) (define_insn "cvtsi2ss" [(set (match_operand:V4SF 0 "register_operand" "=x") - (vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0") - (vec_duplicate:V4SF - (float:SF (match_operand:SI 2 "register_operand" "rm"))) - (const_int 14)))] + (vec_merge:V4SF + (match_operand:V4SF 1 "register_operand" "0") + (vec_duplicate:V4SF + (float:SF (match_operand:SI 2 "nonimmediate_operand" "rm"))) + (const_int 14)))] "TARGET_SSE" "cvtsi2ss\t{%2, %0|%0, %2}" [(set_attr "type" "sse")]) (define_insn "cvtss2si" [(set (match_operand:SI 0 "register_operand" "=r") - (vec_select:SI (fix:V4SI (match_operand:V4SF 1 "register_operand" "xm")) - (parallel [(const_int 0)])))] + (vec_select:SI + (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")) + (parallel [(const_int 0)])))] "TARGET_SSE" "cvtss2si\t{%1, %0|%0, %1}" [(set_attr "type" "sse")]) (define_insn "cvttss2si" [(set (match_operand:SI 0 "register_operand" "=r") - (vec_select:SI (unspec:V4SI [(match_operand:V4SF 1 "register_operand" "xm")] 30) - (parallel [(const_int 0)])))] + (vec_select:SI + (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 30) + (parallel [(const_int 0)])))] "TARGET_SSE" "cvttss2si\t{%1, %0|%0, %1}" [(set_attr "type" "sse")]) @@ -18877,8 +18733,10 @@ [(set (match_operand:V4HI 0 "register_operand" "=y") (truncate:V4HI (lshiftrt:V4SI - (mult:V4SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "0")) - (sign_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (mult:V4SI (sign_extend:V4SI + (match_operand:V4HI 1 "register_operand" "0")) + (sign_extend:V4SI + (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) (const_int 16))))] "TARGET_MMX" "pmulhw\t{%2, %0|%0, %2}" @@ -18888,8 +18746,10 @@ [(set (match_operand:V4HI 0 "register_operand" "=y") (truncate:V4HI (lshiftrt:V4SI - (mult:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "0")) - (zero_extend:V4SI (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) + (mult:V4SI (zero_extend:V4SI + (match_operand:V4HI 1 "register_operand" "0")) + (zero_extend:V4SI + (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) (const_int 16))))] "TARGET_SSE || TARGET_3DNOW_A" "pmulhuw\t{%2, %0|%0, %2}" @@ -18899,12 +18759,12 @@ [(set (match_operand:V2SI 0 "register_operand" "=y") (plus:V2SI (mult:V2SI - (sign_extend:V2SI (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "0") - (parallel [(const_int 0) - (const_int 2)]))) - (sign_extend:V2SI (vec_select:V2HI (match_operand:V4HI 2 "nonimmediate_operand" "ym") - (parallel [(const_int 0) - (const_int 2)])))) + (sign_extend:V2SI + (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "0") + (parallel [(const_int 0) (const_int 2)]))) + (sign_extend:V2SI + (vec_select:V2HI (match_operand:V4HI 2 "nonimmediate_operand" "ym") + (parallel [(const_int 0) (const_int 2)])))) (mult:V2SI (sign_extend:V2SI (vec_select:V2HI (match_dup 1) (parallel [(const_int 1) @@ -19404,75 +19264,6 @@ [(set_attr "type" "sse") (set_attr "memory" "unknown")]) -(define_expand "prefetch" - [(prefetch (match_operand:SI 0 "address_operand" "p") - (match_operand:SI 1 "const_int_operand" "n") - (match_operand:SI 2 "const_int_operand" "n"))] - "TARGET_PREFETCH_SSE || TARGET_3DNOW" - " -{ - int rw = INTVAL (operands[1]); - int locality = INTVAL (operands[2]); - if (rw != 0 && rw != 1) - abort (); - if (locality < 0 || locality > 3) - abort (); - /* Use 3dNOW prefetch in case we are asking for write prefetch not - suported by SSE counterpart or the SSE prefetch is not available - (K6 machines). Otherwise use SSE prefetch as it allows specifying - of locality. */ - if (TARGET_3DNOW - && (!TARGET_PREFETCH_SSE || rw)) - { - emit_insn (gen_prefetch_3dnow (operands[0], operands[1])); - } - else - { - int i; - switch (locality) - { - case 0: /* No temporal locality. */ - i = 0; - break; - case 1: /* Lowest level of temporal locality. */ - i = 3; - break; - case 2: /* Moderate level of temporal locality. */ - i = 2; - break; - case 3: /* Highest level of temporal locality. */ - i = 1; - break; - default: - abort (); /* We already checked for valid values above. */ - break; - } - emit_insn (gen_prefetch_sse (operands[0], GEN_INT (i))); - } - DONE; -}") - -(define_insn "prefetch_sse" - [(unspec [(match_operand:SI 0 "address_operand" "p") - (match_operand:SI 1 "immediate_operand" "n")] 35)] - "TARGET_PREFETCH_SSE" -{ - switch (INTVAL (operands[1])) - { - case 0: - return "prefetchnta\t%a0"; - case 1: - return "prefetcht0\t%a0"; - case 2: - return "prefetcht1\t%a0"; - case 3: - return "prefetcht2\t%a0"; - default: - abort (); - } -} - [(set_attr "type" "sse")]) - (define_expand "sse_prologue_save" [(parallel [(set (match_operand:BLK 0 "" "") (unspec:BLK [(reg:DI 21) @@ -19630,19 +19421,6 @@ "femms" [(set_attr "type" "mmx")]) -(define_insn "prefetch_3dnow" - [(prefetch (match_operand:SI 0 "address_operand" "p") - (match_operand:SI 1 "const_int_operand" "n") - (const_int 0))] - "TARGET_3DNOW" -{ - if (INTVAL (operands[1]) == 0) - return "prefetch\t%a0"; - else - return "prefetchw\t%a0"; -} - [(set_attr "type" "mmx")]) - (define_insn "pf2id" [(set (match_operand:V2SI 0 "register_operand" "=y") (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))] @@ -19820,3 +19598,61 @@ "TARGET_3DNOW_A" "pswapd\\t{%1, %0|%0, %1}" [(set_attr "type" "mmx")]) + +(define_expand "prefetch" + [(prefetch (match_operand:SI 0 "address_operand" "") + (match_operand:SI 1 "const_int_operand" "") + (match_operand:SI 2 "const_int_operand" ""))] + "TARGET_PREFETCH_SSE || TARGET_3DNOW" +{ + int rw = INTVAL (operands[1]); + int locality = INTVAL (operands[2]); + if (rw != 0 && rw != 1) + abort (); + if (locality < 0 || locality > 3) + abort (); + + /* Use 3dNOW prefetch in case we are asking for write prefetch not + suported by SSE counterpart or the SSE prefetch is not available + (K6 machines). Otherwise use SSE prefetch as it allows specifying + of locality. */ + if (TARGET_3DNOW && (!TARGET_PREFETCH_SSE || rw)) + { + operands[2] = GEN_INT (3); + } + else + { + operands[1] = const0_rtx; + } +}) + +(define_insn "*prefetch_sse" + [(prefetch (match_operand:SI 0 "address_operand" "") + (const_int 0) + (match_operand:SI 1 "const_int_operand" ""))] + "TARGET_PREFETCH_SSE" +{ + static const char * const patterns[4] = { + "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0" + }; + + int locality = INTVAL (operands[1]); + if (locality < 0 || locality > 3) + abort (); + + return patterns[locality]; +} + [(set_attr "type" "sse")]) + +(define_insn "*prefetch_3dnow" + [(prefetch (match_operand:SI 0 "address_operand" "p") + (match_operand:SI 1 "const_int_operand" "n") + (const_int 0))] + "TARGET_3DNOW" +{ + if (INTVAL (operands[1]) == 0) + return "prefetch\t%a0"; + else + return "prefetchw\t%a0"; +} + [(set_attr "type" "mmx")]) |