;; GCC machine description for SSE instructions
;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
;; Free Software Foundation, Inc.
;;
;; This file is part of GCC.
;;
;; GCC is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;;
;; GCC is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; .
;; 16 byte integral modes handled by SSE
(define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
;; All 16-byte vector modes handled by SSE
(define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
(define_mode_iterator SSEMODE16 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF])
;; 32 byte integral vector modes handled by AVX
(define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
;; All 32-byte vector modes handled by AVX
(define_mode_iterator AVX256MODE [V32QI V16HI V8SI V4DI V8SF V4DF])
;; All QI vector modes handled by AVX
(define_mode_iterator AVXMODEQI [V32QI V16QI])
;; All DI vector modes handled by AVX
(define_mode_iterator AVXMODEDI [V4DI V2DI])
;; All vector modes handled by AVX
(define_mode_iterator AVXMODE
[V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
(define_mode_iterator AVXMODE16
[V16QI V8HI V4SI V2DI V1TI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
;; Mix-n-match
(define_mode_iterator SSEMODE12 [V16QI V8HI])
(define_mode_iterator SSEMODE24 [V8HI V4SI])
(define_mode_iterator SSEMODE14 [V16QI V4SI])
(define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
(define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
(define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
(define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
(define_mode_iterator FMA4MODEF4 [V8SF V4DF])
(define_mode_iterator SSEMODEF2P [V4SF V2DF])
(define_mode_iterator AVX256MODEF2P [V8SF V4DF])
(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
(define_mode_iterator AVX256MODE24P [V8SI V8SF V4DI V4DF])
(define_mode_iterator AVX256MODE4P [V4DI V4DF])
(define_mode_iterator AVX256MODE8P [V8SI V8SF])
(define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
(define_mode_iterator AVXMODEF4P [V4SF V4DF])
(define_mode_iterator AVXMODEFDP [V2DF V4DF])
(define_mode_iterator AVXMODEFSP [V4SF V8SF])
(define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
(define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
;; Int-float size matches
(define_mode_iterator SSEMODE4S [V4SF V4SI])
(define_mode_iterator SSEMODE2D [V2DF V2DI])
;; Modes handled by integer vcond pattern
(define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI
(V2DI "TARGET_SSE4_2")])
;; Modes handled by vec_extract_even/odd pattern.
(define_mode_iterator SSEMODE_EO
[(V4SF "TARGET_SSE")
(V2DF "TARGET_SSE2")
(V2DI "TARGET_SSE2") (V4SI "TARGET_SSE2")
(V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
;; Mapping from float mode to required SSE level
(define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
;; Mapping from integer vector mode to mnemonic suffix
(define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
;; Mapping of the fma4 suffix
(define_mode_attr fma4modesuffixf4 [(V8SF "ps") (V4DF "pd")])
(define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
(V4SF "ss") (V2DF "sd")])
;; Mapping of the avx suffix
(define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
(V4SF "ps") (V2DF "pd")])
(define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
(define_mode_attr ssescalarmodesuffix2s [(V4SF "ss") (V4SI "d")])
;; Mapping of the max integer size for xop rotate immediate constraint
(define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
;; Mapping of vector modes back to the scalar modes
(define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
(V16QI "QI") (V8HI "HI")
(V4SI "SI") (V2DI "DI")])
;; Mapping of vector modes to a vector mode of double size
(define_mode_attr ssedoublesizemode
[(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI")
(V8HI "V16HI") (V16QI "V32QI")
(V4DF "V8DF") (V8SF "V16SF")
(V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")])
;; Number of scalar elements in each vector type
(define_mode_attr ssescalarnum
[(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
(V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")])
;; Mapping for AVX
(define_mode_attr avxvecmode
[(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
(V4SF "V4SF") (V8SF "V8SF") (V2DF "V2DF") (V4DF "V4DF")
(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")])
(define_mode_attr avxvecpsmode
[(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
(V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
(define_mode_attr avxhalfvecmode
[(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
(V8SF "V4SF") (V4DF "V2DF")
(V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") (V4SF "V2SF")])
(define_mode_attr avxscalarmode
[(V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI") (V4SF "SF") (V2DF "DF")
(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI") (V8SF "SF") (V4DF "DF")])
(define_mode_attr avxcvtvecmode
[(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
(define_mode_attr avxpermvecmode
[(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
(define_mode_attr avxmodesuffixf2c
[(V4SF "s") (V2DF "d") (V8SI "s") (V8SF "s") (V4DI "d") (V4DF "d")])
(define_mode_attr avxmodesuffixp
[(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
(V4DF "pd")])
(define_mode_attr avxmodesuffix
[(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
(V8SI "256") (V8SF "256") (V4DF "256")])
;; Mapping of immediate bits for blend instructions
(define_mode_attr blendbits
[(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
;; Mapping of immediate bits for pinsr instructions
(define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Move patterns
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_expand "mov"
[(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
(match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
"TARGET_AVX"
{
ix86_expand_vector_move (mode, operands);
DONE;
})
(define_insn "*avx_mov_internal"
[(set (match_operand:AVXMODE16 0 "nonimmediate_operand" "=x,x ,m")
(match_operand:AVXMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
"TARGET_AVX
&& (register_operand (operands[0], mode)
|| register_operand (operands[1], mode))"
{
switch (which_alternative)
{
case 0:
return standard_sse_constant_opcode (insn, operands[1]);
case 1:
case 2:
switch (get_attr_mode (insn))
{
case MODE_V8SF:
case MODE_V4SF:
return "vmovaps\t{%1, %0|%0, %1}";
case MODE_V4DF:
case MODE_V2DF:
return "vmovapd\t{%1, %0|%0, %1}";
default:
return "vmovdqa\t{%1, %0|%0, %1}";
}
default:
gcc_unreachable ();
}
}
[(set_attr "type" "sselog1,ssemov,ssemov")
(set_attr "prefix" "vex")
(set_attr "mode" "")])
;; All of these patterns are enabled for SSE1 as well as SSE2.
;; This is essential for maintaining stable calling conventions.
(define_expand "mov"
[(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
(match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
"TARGET_SSE"
{
ix86_expand_vector_move (mode, operands);
DONE;
})
(define_insn "*mov_internal"
[(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "=x,x ,m")
(match_operand:SSEMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
"TARGET_SSE
&& (register_operand (operands[0], mode)
|| register_operand (operands[1], mode))"
{
switch (which_alternative)
{
case 0:
return standard_sse_constant_opcode (insn, operands[1]);
case 1:
case 2:
switch (get_attr_mode (insn))
{
case MODE_V4SF:
return "movaps\t{%1, %0|%0, %1}";
case MODE_V2DF:
return "movapd\t{%1, %0|%0, %1}";
default:
return "movdqa\t{%1, %0|%0, %1}";
}
default:
gcc_unreachable ();
}
}
[(set_attr "type" "sselog1,ssemov,ssemov")
(set (attr "mode")
(cond [(ior (ior (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
(eq (symbol_ref "TARGET_SSE2") (const_int 0)))
(and (eq_attr "alternative" "2")
(ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
(const_int 0))))
(const_string "V4SF")
(eq (const_string "mode") (const_string "V4SFmode"))
(const_string "V4SF")
(eq (const_string "mode") (const_string "V2DFmode"))
(const_string "V2DF")
]
(const_string "TI")))])
;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
;; from memory, we'd prefer to load the memory directly into the %xmm
;; register. To facilitate this happy circumstance, this pattern won't
;; split until after register allocation. If the 64-bit value didn't
;; come from memory, this is the best we can do. This is much better
;; than storing %edx:%eax into a stack temporary and loading an %xmm
;; from there.
(define_insn_and_split "movdi_to_sse"
[(parallel
[(set (match_operand:V4SI 0 "register_operand" "=?x,x")
(subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
(clobber (match_scratch:V4SI 2 "=&x,X"))])]
"!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
"#"
"&& reload_completed"
[(const_int 0)]
{
if (register_operand (operands[1], DImode))
{
/* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
Assemble the 64-bit DImode value in an xmm register. */
emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
gen_rtx_SUBREG (SImode, operands[1], 0)));
emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
gen_rtx_SUBREG (SImode, operands[1], 4)));
emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
operands[2]));
}
else if (memory_operand (operands[1], DImode))
emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
operands[1], const0_rtx));
else
gcc_unreachable ();
})
(define_split
[(set (match_operand:V4SF 0 "register_operand" "")
(match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
"TARGET_SSE && reload_completed"
[(set (match_dup 0)
(vec_merge:V4SF
(vec_duplicate:V4SF (match_dup 1))
(match_dup 2)
(const_int 1)))]
{
operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
operands[2] = CONST0_RTX (V4SFmode);
})
(define_split
[(set (match_operand:V2DF 0 "register_operand" "")
(match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
"TARGET_SSE2 && reload_completed"
[(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
{
operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
operands[2] = CONST0_RTX (DFmode);
})
(define_expand "push1"
[(match_operand:AVX256MODE 0 "register_operand" "")]
"TARGET_AVX"
{
ix86_expand_push (mode, operands[0]);
DONE;
})
(define_expand "push1"
[(match_operand:SSEMODE16 0 "register_operand" "")]
"TARGET_SSE"
{
ix86_expand_push (mode, operands[0]);
DONE;
})
(define_expand "movmisalign"
[(set (match_operand:AVX256MODE 0 "nonimmediate_operand" "")
(match_operand:AVX256MODE 1 "nonimmediate_operand" ""))]
"TARGET_AVX"
{
ix86_expand_vector_move_misalign (mode, operands);
DONE;
})
(define_expand "movmisalign"
[(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
(match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
"TARGET_SSE"
{
ix86_expand_vector_move_misalign (mode, operands);
DONE;
})
(define_insn "avx_movup"
[(set (match_operand:AVXMODEF2P 0 "nonimmediate_operand" "=x,m")
(unspec:AVXMODEF2P
[(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm,x")]
UNSPEC_MOVU))]
"AVX_VEC_FLOAT_MODE_P (mode)
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"vmovup\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "movu" "1")
(set_attr "prefix" "vex")
(set_attr "mode" "")])
(define_insn "sse2_movq128"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(vec_concat:V2DI
(vec_select:DI
(match_operand:V2DI 1 "nonimmediate_operand" "xm")
(parallel [(const_int 0)]))
(const_int 0)))]
"TARGET_SSE2"
"%vmovq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
(define_insn "_movup"
[(set (match_operand:SSEMODEF2P 0 "nonimmediate_operand" "=x,m")
(unspec:SSEMODEF2P
[(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm,x")]
UNSPEC_MOVU))]
"SSE_VEC_FLOAT_MODE_P (mode)
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"movup\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "movu" "1")
(set_attr "mode" "")])
(define_insn "avx_movdqu"
[(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
(unspec:AVXMODEQI
[(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
UNSPEC_MOVU))]
"TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"vmovdqu\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "movu" "1")
(set_attr "prefix" "vex")
(set_attr "mode" "")])
(define_insn "sse2_movdqu"
[(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
(unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
UNSPEC_MOVU))]
"TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"movdqu\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "movu" "1")
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "avx_movnt"
[(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
(unspec:AVXMODEF2P
[(match_operand:AVXMODEF2P 1 "register_operand" "x")]
UNSPEC_MOVNT))]
"AVX_VEC_FLOAT_MODE_P (mode)"
"vmovntp\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "vex")
(set_attr "mode" "")])
(define_insn "_movnt"
[(set (match_operand:SSEMODEF2P 0 "memory_operand" "=m")
(unspec:SSEMODEF2P
[(match_operand:SSEMODEF2P 1 "register_operand" "x")]
UNSPEC_MOVNT))]
"SSE_VEC_FLOAT_MODE_P (mode)"
"movntp\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "mode" "")])
(define_insn "avx_movnt"
[(set (match_operand:AVXMODEDI 0 "memory_operand" "=m")
(unspec:AVXMODEDI
[(match_operand:AVXMODEDI 1 "register_operand" "x")]
UNSPEC_MOVNT))]
"TARGET_AVX"
"vmovntdq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "vex")
(set_attr "mode" "")])
(define_insn "sse2_movntv2di"
[(set (match_operand:V2DI 0 "memory_operand" "=m")
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
UNSPEC_MOVNT))]
"TARGET_SSE2"
"movntdq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "sse2_movntsi"
[(set (match_operand:SI 0 "memory_operand" "=m")
(unspec:SI [(match_operand:SI 1 "register_operand" "r")]
UNSPEC_MOVNT))]
"TARGET_SSE2"
"movnti\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix_data16" "0")
(set_attr "mode" "V2DF")])
(define_insn "avx_lddqu"
[(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
(unspec:AVXMODEQI
[(match_operand:AVXMODEQI 1 "memory_operand" "m")]
UNSPEC_LDDQU))]
"TARGET_AVX"
"vlddqu\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
(set_attr "movu" "1")
(set_attr "prefix" "vex")
(set_attr "mode" "")])
(define_insn "sse3_lddqu"
[(set (match_operand:V16QI 0 "register_operand" "=x")
(unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
UNSPEC_LDDQU))]
"TARGET_SSE3"
"lddqu\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "movu" "1")
(set_attr "prefix_data16" "0")
(set_attr "prefix_rep" "1")
(set_attr "mode" "TI")])
; Expand patterns for non-temporal stores. At the moment, only those
; that directly map to insns are defined; it would be possible to
; define patterns for other modes that would expand to several insns.
(define_expand "storent"
[(set (match_operand:AVX256MODEF2P 0 "memory_operand" "")
(unspec:AVX256MODEF2P
[(match_operand:AVX256MODEF2P 1 "register_operand" "")]
UNSPEC_MOVNT))]
"AVX256_VEC_FLOAT_MODE_P (mode)"
"")
(define_expand "storent"
[(set (match_operand:SSEMODEF2P 0 "memory_operand" "")
(unspec:SSEMODEF2P
[(match_operand:SSEMODEF2P 1 "register_operand" "")]
UNSPEC_MOVNT))]
"SSE_VEC_FLOAT_MODE_P (mode)"
"")
(define_expand "storent"
[(set (match_operand:MODEF 0 "memory_operand" "")
(unspec:MODEF
[(match_operand:MODEF 1 "register_operand" "")]
UNSPEC_MOVNT))]
"TARGET_SSE4A"
"")
(define_expand "storentv2di"
[(set (match_operand:V2DI 0 "memory_operand" "")
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "")]
UNSPEC_MOVNT))]
"TARGET_SSE2"
"")
(define_expand "storentsi"
[(set (match_operand:SI 0 "memory_operand" "")
(unspec:SI [(match_operand:SI 1 "register_operand" "")]
UNSPEC_MOVNT))]
"TARGET_SSE2"
"")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel floating point arithmetic
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_expand "2"
[(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
(absneg:AVX256MODEF2P
(match_operand:AVX256MODEF2P 1 "register_operand" "")))]
"AVX256_VEC_FLOAT_MODE_P (mode)"
"ix86_expand_fp_absneg_operator (, mode, operands); DONE;")
(define_expand "2"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "")
(absneg:SSEMODEF2P
(match_operand:SSEMODEF2P 1 "register_operand" "")))]
"SSE_VEC_FLOAT_MODE_P (mode)"
"ix86_expand_fp_absneg_operator (, mode, operands); DONE;")
(define_expand "3"
[(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
(plusminus:AVX256MODEF2P
(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
(match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
"AVX256_VEC_FLOAT_MODE_P (mode)"
"ix86_fixup_binary_operands_no_copy (, mode, operands);")
(define_insn "*avx_3"
[(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
(plusminus:AVXMODEF2P
(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "x")
(match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
"AVX_VEC_FLOAT_MODE_P (mode)
&& ix86_binary_operator_ok (, mode, operands)"
"vp\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseadd")
(set_attr "prefix" "vex")
(set_attr "mode" "")])
(define_expand "3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "")
(plusminus:SSEMODEF2P
(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
"SSE_VEC_FLOAT_MODE_P (mode)"
"ix86_fixup_binary_operands_no_copy (, mode, operands);")
(define_insn "*3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(plusminus:SSEMODEF2P
(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0")
(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
"SSE_VEC_FLOAT_MODE_P (mode)
&& ix86_binary_operator_ok (, mode, operands)"
"p\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
(set_attr "mode" "")])
(define_insn "*avx_vm3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(vec_merge:SSEMODEF2P
(plusminus:SSEMODEF2P
(match_operand:SSEMODEF2P 1 "register_operand" "x")
(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
"AVX128_VEC_FLOAT_MODE_P (mode)"
"vs\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseadd")
(set_attr "prefix" "vex")
(set_attr "mode" "")])
(define_insn "_vm3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(vec_merge:SSEMODEF2P
(plusminus:SSEMODEF2P
(match_operand:SSEMODEF2P 1 "register_operand" "0")
(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
"SSE_VEC_FLOAT_MODE_P (mode)"
"s\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
(set_attr "mode" "")])
(define_expand "mul3"
[(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
(mult:AVX256MODEF2P
(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
(match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
"AVX256_VEC_FLOAT_MODE_P (mode)"
"ix86_fixup_binary_operands_no_copy (MULT, mode, operands);")
(define_insn "*avx_mul3"
[(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
(mult:AVXMODEF2P
(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
(match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
"AVX_VEC_FLOAT_MODE_P (mode)
&& ix86_binary_operator_ok (MULT, mode, operands)"
"vmulp\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ssemul")
(set_attr "prefix" "vex")
(set_attr "mode" "")])
(define_expand "mul3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "")
(mult:SSEMODEF2P
(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
"SSE_VEC_FLOAT_MODE_P (mode)"
"ix86_fixup_binary_operands_no_copy (MULT, mode, operands);")
(define_insn "*mul3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(mult:SSEMODEF2P
(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
"SSE_VEC_FLOAT_MODE_P (mode)
&& ix86_binary_operator_ok (MULT, mode, operands)"
"mulp\t{%2, %0|%0, %2}"
[(set_attr "type" "ssemul")
(set_attr "mode" "")])
(define_insn "*avx_vmmul3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(vec_merge:SSEMODEF2P
(mult:SSEMODEF2P
(match_operand:SSEMODEF2P 1 "register_operand" "x")
(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
"AVX_VEC_FLOAT_MODE_P (mode)"
"vmuls\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ssemul")
(set_attr "prefix" "vex")
(set_attr "mode" "")])
(define_insn "_vmmul3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(vec_merge:SSEMODEF2P
(mult:SSEMODEF2P
(match_operand:SSEMODEF2P 1 "register_operand" "0")
(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
"SSE_VEC_FLOAT_MODE_P (mode)"
"muls\t{%2, %0|%0, %2}"
[(set_attr "type" "ssemul")
(set_attr "mode" "")])
(define_expand "divv8sf3"
[(set (match_operand:V8SF 0 "register_operand" "")
(div:V8SF (match_operand:V8SF 1 "register_operand" "")
(match_operand:V8SF 2 "nonimmediate_operand" "")))]
"TARGET_AVX"
{
ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
&& flag_finite_math_only && !flag_trapping_math
&& flag_unsafe_math_optimizations)
{
ix86_emit_swdivsf (operands[0], operands[1],
operands[2], V8SFmode);
DONE;
}
})
(define_expand "divv4df3"
[(set (match_operand:V4DF 0 "register_operand" "")
(div:V4DF (match_operand:V4DF 1 "register_operand" "")
(match_operand:V4DF 2 "nonimmediate_operand" "")))]
"TARGET_AVX"
"ix86_fixup_binary_operands_no_copy (DIV, V4DFmode, operands);")
(define_insn "avx_div3"
[(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
(div:AVXMODEF2P
(match_operand:AVXMODEF2P 1 "register_operand" "x")
(match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
"AVX_VEC_FLOAT_MODE_P (mode)"
"vdivp\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ssediv")
(set_attr "prefix" "vex")
(set_attr "mode" "")])
(define_expand "divv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "")
(div:V4SF (match_operand:V4SF 1 "register_operand" "")
(match_operand:V4SF 2 "nonimmediate_operand" "")))]
"TARGET_SSE"
{
if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
&& flag_finite_math_only && !flag_trapping_math
&& flag_unsafe_math_optimizations)
{
ix86_emit_swdivsf (operands[0], operands[1],
operands[2], V4SFmode);
DONE;
}
})
(define_expand "divv2df3"
[(set (match_operand:V2DF 0 "register_operand" "")
(div:V2DF (match_operand:V2DF 1 "register_operand" "")
(match_operand:V2DF 2 "nonimmediate_operand" "")))]
"TARGET_SSE2"
"")
(define_insn "*avx_div3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(div:SSEMODEF2P
(match_operand:SSEMODEF2P 1 "register_operand" "x")
(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
"AVX128_VEC_FLOAT_MODE_P (mode)"
"vdivp\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ssediv")
(set_attr "prefix" "vex")
(set_attr "mode" "")])
(define_insn "_div3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(div:SSEMODEF2P
(match_operand:SSEMODEF2P 1 "register_operand" "0")
(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
"SSE_VEC_FLOAT_MODE_P (mode)"
"divp\t{%2, %0|%0, %2}"
[(set_attr "type" "ssediv")
(set_attr "mode" "")])
(define_insn "*avx_vmdiv3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(vec_merge:SSEMODEF2P
(div:SSEMODEF2P
(match_operand:SSEMODEF2P 1 "register_operand" "x")
(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
"AVX128_VEC_FLOAT_MODE_P (mode)"
"vdivs\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ssediv")
(set_attr "prefix" "vex")
(set_attr "mode" "")])
(define_insn "_vmdiv3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(vec_merge:SSEMODEF2P
(div:SSEMODEF2P
(match_operand:SSEMODEF2P 1 "register_operand" "0")
(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
"SSE_VEC_FLOAT_MODE_P (mode)"
"divs\t{%2, %0|%0, %2}"
[(set_attr "type" "ssediv")
(set_attr "mode" "")])
(define_insn "avx_rcpv8sf2"
[(set (match_operand:V8SF 0 "register_operand" "=x")
(unspec:V8SF
[(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
"TARGET_AVX"
"vrcpps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
(define_insn "sse_rcpv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(unspec:V4SF
[(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
"TARGET_SSE"
"%vrcpps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
(set_attr "atom_sse_attr" "rcp")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "V4SF")])
(define_insn "*avx_vmrcpv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
(unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
UNSPEC_RCP)
(match_operand:V4SF 2 "register_operand" "x")
(const_int 1)))]
"TARGET_AVX"
"vrcpss\t{%1, %2, %0|%0, %2, %1}"
[(set_attr "type" "sse")
(set_attr "prefix" "vex")
(set_attr "mode" "SF")])
(define_insn "sse_vmrcpv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
(unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
UNSPEC_RCP)
(match_operand:V4SF 2 "register_operand" "0")
(const_int 1)))]
"TARGET_SSE"
"rcpss\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
(set_attr "atom_sse_attr" "rcp")
(set_attr "mode" "SF")])
(define_expand "sqrtv8sf2"
[(set (match_operand:V8SF 0 "register_operand" "")
(sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
"TARGET_AVX"
{
if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
&& flag_finite_math_only && !flag_trapping_math
&& flag_unsafe_math_optimizations)
{
ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
DONE;
}
})
(define_insn "avx_sqrtv8sf2"
[(set (match_operand:V8SF 0 "register_operand" "=x")
(sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
"TARGET_AVX"
"vsqrtps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
(define_expand "sqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "")
(sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
"TARGET_SSE"
{
if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
&& flag_finite_math_only && !flag_trapping_math
&& flag_unsafe_math_optimizations)
{
ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 0);
DONE;
}
})
(define_insn "sse_sqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
"TARGET_SSE"
"%vsqrtps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
(set_attr "atom_sse_attr" "sqrt")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "V4SF")])
(define_insn "sqrtv4df2"
[(set (match_operand:V4DF 0 "register_operand" "=x")
(sqrt:V4DF (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
"TARGET_AVX"
"vsqrtpd\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
(set_attr "prefix" "vex")
(set_attr "mode" "V4DF")])
(define_insn "sqrtv2df2"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
"%vsqrtpd\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "V2DF")])
(define_insn "*avx_vmsqrt2"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(vec_merge:SSEMODEF2P
(sqrt:SSEMODEF2P
(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
(match_operand:SSEMODEF2P 2 "register_operand" "x")
(const_int 1)))]
"AVX_VEC_FLOAT_MODE_P (mode)"
"vsqrts\t{%1, %2, %0|%0, %2, %1}"
[(set_attr "type" "sse")
(set_attr "prefix" "vex")
(set_attr "mode" "")])
(define_insn "_vmsqrt2"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(vec_merge:SSEMODEF2P
(sqrt:SSEMODEF2P
(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm"))
(match_operand:SSEMODEF2P 2 "register_operand" "0")
(const_int 1)))]
"SSE_VEC_FLOAT_MODE_P (mode)"
"sqrts\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
(set_attr "atom_sse_attr" "sqrt")
(set_attr "mode" "")])
(define_expand "rsqrtv8sf2"
[(set (match_operand:V8SF 0 "register_operand" "")
(unspec:V8SF
[(match_operand:V8SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
"TARGET_AVX && TARGET_SSE_MATH"
{
ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 1);
DONE;
})
(define_insn "avx_rsqrtv8sf2"
[(set (match_operand:V8SF 0 "register_operand" "=x")
(unspec:V8SF
[(match_operand:V8SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
"TARGET_AVX"
"vrsqrtps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
(define_expand "rsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "")
(unspec:V4SF
[(match_operand:V4SF 1 "nonimmediate_operand" "")] UNSPEC_RSQRT))]
"TARGET_SSE_MATH"
{
ix86_emit_swsqrtsf (operands[0], operands[1], V4SFmode, 1);
DONE;
})
(define_insn "sse_rsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(unspec:V4SF
[(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
"TARGET_SSE"
"%vrsqrtps\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "V4SF")])
(define_insn "*avx_vmrsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
(unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
UNSPEC_RSQRT)
(match_operand:V4SF 2 "register_operand" "x")
(const_int 1)))]
"TARGET_AVX"
"vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
[(set_attr "type" "sse")
(set_attr "prefix" "vex")
(set_attr "mode" "SF")])
(define_insn "sse_vmrsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
(unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
UNSPEC_RSQRT)
(match_operand:V4SF 2 "register_operand" "0")
(const_int 1)))]
"TARGET_SSE"
"rsqrtss\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
(set_attr "mode" "SF")])
;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
;; isn't really correct, as those rtl operators aren't defined when
;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
(define_expand "3"
[(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
(smaxmin:AVX256MODEF2P
(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
(match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
"AVX256_VEC_FLOAT_MODE_P (mode)"
{
if (!flag_finite_math_only)
operands[1] = force_reg (mode, operands[1]);
ix86_fixup_binary_operands_no_copy (, mode, operands);
})
(define_expand "3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "")
(smaxmin:SSEMODEF2P
(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
"SSE_VEC_FLOAT_MODE_P (mode)"
{
if (!flag_finite_math_only)
operands[1] = force_reg (mode, operands[1]);
ix86_fixup_binary_operands_no_copy (, mode, operands);
})
(define_insn "*avx_3_finite"
[(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
(smaxmin:AVXMODEF2P
(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
(match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
"AVX_VEC_FLOAT_MODE_P (mode) && flag_finite_math_only
&& ix86_binary_operator_ok (, mode, operands)"
"vp\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseadd")
(set_attr "prefix" "vex")
(set_attr "mode" "")])
(define_insn "*3_finite"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(smaxmin:SSEMODEF2P
(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
"SSE_VEC_FLOAT_MODE_P (mode) && flag_finite_math_only
&& ix86_binary_operator_ok (, mode, operands)"
"p\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
(set_attr "mode" "")])
(define_insn "*avx_3"
[(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
(smaxmin:AVXMODEF2P
(match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
(match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
"AVX_VEC_FLOAT_MODE_P (mode)"
"vp\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseadd")
(set_attr "prefix" "vex")
(set_attr "mode" "")])
(define_insn "*3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(smaxmin:SSEMODEF2P
(match_operand:SSEMODEF2P 1 "register_operand" "0")
(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
"SSE_VEC_FLOAT_MODE_P (mode)"
"p\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
(set_attr "mode" "")])
(define_insn "*avx_vm3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(vec_merge:SSEMODEF2P
(smaxmin:SSEMODEF2P
(match_operand:SSEMODEF2P 1 "register_operand" "x")
(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
"AVX128_VEC_FLOAT_MODE_P (mode)"
"vs\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sse")
(set_attr "prefix" "vex")
(set_attr "mode" "")])
(define_insn "_vm3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(vec_merge:SSEMODEF2P
(smaxmin:SSEMODEF2P
(match_operand:SSEMODEF2P 1 "register_operand" "0")
(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
"SSE_VEC_FLOAT_MODE_P (mode)"
"s\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
(set_attr "mode" "")])
;; These versions of the min/max patterns implement exactly the operations
;; min = (op1 < op2 ? op1 : op2)
;; max = (!(op1 < op2) ? op1 : op2)
;; Their operands are not commutative, and thus they may be used in the
;; presence of -0.0 and NaN.
(define_insn "*avx_ieee_smin3"
[(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
(unspec:AVXMODEF2P
[(match_operand:AVXMODEF2P 1 "register_operand" "x")
(match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
UNSPEC_IEEE_MIN))]
"AVX_VEC_FLOAT_MODE_P (mode)"
"vminp\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseadd")
(set_attr "prefix" "vex")
(set_attr "mode" "")])
(define_insn "*avx_ieee_smax3"
[(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
(unspec:AVXMODEF2P
[(match_operand:AVXMODEF2P 1 "register_operand" "x")
(match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]
UNSPEC_IEEE_MAX))]
"AVX_VEC_FLOAT_MODE_P (mode)"
"vmaxp\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseadd")
(set_attr "prefix" "vex")
(set_attr "mode" "")])
(define_insn "*ieee_smin3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(unspec:SSEMODEF2P
[(match_operand:SSEMODEF2P 1 "register_operand" "0")
(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
UNSPEC_IEEE_MIN))]
"SSE_VEC_FLOAT_MODE_P (mode)"
"minp\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
(set_attr "mode" "")])
(define_insn "*ieee_smax3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
(unspec:SSEMODEF2P
[(match_operand:SSEMODEF2P 1 "register_operand" "0")
(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
UNSPEC_IEEE_MAX))]
"SSE_VEC_FLOAT_MODE_P (mode)"
"maxp\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
(set_attr "mode" "")])
(define_insn "avx_addsubv8sf3"
[(set (match_operand:V8SF 0 "register_operand" "=x")
(vec_merge:V8SF
(plus:V8SF
(match_operand:V8SF 1 "register_operand" "x")
(match_operand:V8SF 2 "nonimmediate_operand" "xm"))
(minus:V8SF (match_dup 1) (match_dup 2))
(const_int 170)))]
"TARGET_AVX"
"vaddsubps\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseadd")
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
(define_insn "avx_addsubv4df3"
[(set (match_operand:V4DF 0 "register_operand" "=x")
(vec_merge:V4DF
(plus:V4DF
(match_operand:V4DF 1 "register_operand" "x")
(match_operand:V4DF 2 "nonimmediate_operand" "xm"))
(minus:V4DF (match_dup 1) (match_dup 2))
(const_int 10)))]
"TARGET_AVX"
"vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseadd")
(set_attr "prefix" "vex")
(set_attr "mode" "V4DF")])
(define_insn "*avx_addsubv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
(plus:V4SF
(match_operand:V4SF 1 "register_operand" "x")
(match_operand:V4SF 2 "nonimmediate_operand" "xm"))
(minus:V4SF (match_dup 1) (match_dup 2))
(const_int 10)))]
"TARGET_AVX"
"vaddsubps\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseadd")
(set_attr "prefix" "vex")
(set_attr "mode" "V4SF")])
(define_insn "sse3_addsubv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
(plus:V4SF
(match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "nonimmediate_operand" "xm"))
(minus:V4SF (match_dup 1) (match_dup 2))
(const_int 10)))]
"TARGET_SSE3"
"addsubps\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
(set_attr "prefix_rep" "1")
(set_attr "mode" "V4SF")])
(define_insn "*avx_addsubv2df3"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(vec_merge:V2DF
(plus:V2DF
(match_operand:V2DF 1 "register_operand" "x")
(match_operand:V2DF 2 "nonimmediate_operand" "xm"))
(minus:V2DF (match_dup 1) (match_dup 2))
(const_int 2)))]
"TARGET_AVX"
"vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseadd")
(set_attr "prefix" "vex")
(set_attr "mode" "V2DF")])
(define_insn "sse3_addsubv2df3"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(vec_merge:V2DF
(plus:V2DF
(match_operand:V2DF 1 "register_operand" "0")
(match_operand:V2DF 2 "nonimmediate_operand" "xm"))
(minus:V2DF (match_dup 1) (match_dup 2))
(const_int 2)))]
"TARGET_SSE3"
"addsubpd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
(set_attr "atom_unit" "complex")
(set_attr "mode" "V2DF")])
(define_insn "avx_hv4df3"
[(set (match_operand:V4DF 0 "register_operand" "=x")
(vec_concat:V4DF
(vec_concat:V2DF
(plusminus:DF
(vec_select:DF
(match_operand:V4DF 1 "register_operand" "x")
(parallel [(const_int 0)]))
(vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
(plusminus:DF
(vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
(vec_select:DF (match_dup 1) (parallel [(const_int 3)]))))
(vec_concat:V2DF
(plusminus:DF
(vec_select:DF
(match_operand:V4DF 2 "nonimmediate_operand" "xm")
(parallel [(const_int 0)]))
(vec_select:DF (match_dup 2) (parallel [(const_int 1)])))
(plusminus:DF
(vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
(vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
"TARGET_AVX"
"vhpd\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseadd")
(set_attr "prefix" "vex")
(set_attr "mode" "V4DF")])
(define_insn "avx_hv8sf3"
[(set (match_operand:V8SF 0 "register_operand" "=x")
(vec_concat:V8SF
(vec_concat:V4SF
(vec_concat:V2SF
(plusminus:SF
(vec_select:SF
(match_operand:V8SF 1 "register_operand" "x")
(parallel [(const_int 0)]))
(vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
(plusminus:SF
(vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
(vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
(vec_concat:V2SF
(plusminus:SF
(vec_select:SF
(match_operand:V8SF 2 "nonimmediate_operand" "xm")
(parallel [(const_int 0)]))
(vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
(plusminus:SF
(vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
(vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
(vec_concat:V4SF
(vec_concat:V2SF
(plusminus:SF
(vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
(vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
(plusminus:SF
(vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
(vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
(vec_concat:V2SF
(plusminus:SF
(vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
(vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
(plusminus:SF
(vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
(vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
"TARGET_AVX"
"vhps\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseadd")
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
(define_insn "*avx_hv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_concat:V4SF
(vec_concat:V2SF
(plusminus:SF
(vec_select:SF
(match_operand:V4SF 1 "register_operand" "x")
(parallel [(const_int 0)]))
(vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
(plusminus:SF
(vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
(vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
(vec_concat:V2SF
(plusminus:SF
(vec_select:SF
(match_operand:V4SF 2 "nonimmediate_operand" "xm")
(parallel [(const_int 0)]))
(vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
(plusminus:SF
(vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
(vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
"TARGET_AVX"
"vhps\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseadd")
(set_attr "prefix" "vex")
(set_attr "mode" "V4SF")])
(define_insn "sse3_hv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_concat:V4SF
(vec_concat:V2SF
(plusminus:SF
(vec_select:SF
(match_operand:V4SF 1 "register_operand" "0")
(parallel [(const_int 0)]))
(vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
(plusminus:SF
(vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
(vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
(vec_concat:V2SF
(plusminus:SF
(vec_select:SF
(match_operand:V4SF 2 "nonimmediate_operand" "xm")
(parallel [(const_int 0)]))
(vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
(plusminus:SF
(vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
(vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
"TARGET_SSE3"
"hps\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
(set_attr "atom_unit" "complex")
(set_attr "prefix_rep" "1")
(set_attr "mode" "V4SF")])
(define_insn "*avx_hv2df3"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(vec_concat:V2DF
(plusminus:DF
(vec_select:DF
(match_operand:V2DF 1 "register_operand" "x")
(parallel [(const_int 0)]))
(vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
(plusminus:DF
(vec_select:DF
(match_operand:V2DF 2 "nonimmediate_operand" "xm")
(parallel [(const_int 0)]))
(vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
"TARGET_AVX"
"vhpd\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseadd")
(set_attr "prefix" "vex")
(set_attr "mode" "V2DF")])
(define_insn "sse3_hv2df3"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(vec_concat:V2DF
(plusminus:DF
(vec_select:DF
(match_operand:V2DF 1 "register_operand" "0")
(parallel [(const_int 0)]))
(vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
(plusminus:DF
(vec_select:DF
(match_operand:V2DF 2 "nonimmediate_operand" "xm")
(parallel [(const_int 0)]))
(vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
"TARGET_SSE3"
"hpd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
(set_attr "mode" "V2DF")])
(define_expand "reduc_splus_v8sf"
[(match_operand:V8SF 0 "register_operand" "")
(match_operand:V8SF 1 "register_operand" "")]
"TARGET_AVX"
{
rtx tmp = gen_reg_rtx (V8SFmode);
rtx tmp2 = gen_reg_rtx (V8SFmode);
emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
emit_insn (gen_avx_haddv8sf3 (tmp2, operands[1], operands[1]));
emit_insn (gen_avx_haddv8sf3 (operands[0], tmp2, tmp2));
DONE;
})
(define_expand "reduc_splus_v4sf"
[(match_operand:V4SF 0 "register_operand" "")
(match_operand:V4SF 1 "register_operand" "")]
"TARGET_SSE"
{
if (TARGET_SSE3)
{
rtx tmp = gen_reg_rtx (V4SFmode);
emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
}
else
ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
DONE;
})
(define_expand "reduc_splus_v4df"
[(match_operand:V4DF 0 "register_operand" "")
(match_operand:V4DF 1 "register_operand" "")]
"TARGET_AVX"
{
rtx tmp = gen_reg_rtx (V4DFmode);
emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
emit_insn (gen_avx_haddv4df3 (operands[0], tmp, tmp));
DONE;
})
(define_expand "reduc_splus_v2df"
[(match_operand:V2DF 0 "register_operand" "")
(match_operand:V2DF 1 "register_operand" "")]
"TARGET_SSE3"
{
emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
DONE;
})
(define_expand "reduc_smax_v4sf"
[(match_operand:V4SF 0 "register_operand" "")
(match_operand:V4SF 1 "register_operand" "")]
"TARGET_SSE"
{
ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
DONE;
})
(define_expand "reduc_smin_v4sf"
[(match_operand:V4SF 0 "register_operand" "")
(match_operand:V4SF 1 "register_operand" "")]
"TARGET_SSE"
{
ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
DONE;
})
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel floating point comparisons
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn "avx_cmpp3"
[(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
(unspec:AVXMODEF2P
[(match_operand:AVXMODEF2P 1 "register_operand" "x")
(match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")
(match_operand:SI 3 "const_0_to_31_operand" "n")]
UNSPEC_PCMP))]
"TARGET_AVX"
"vcmpp\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ssecmp")
(set_attr "length_immediate" "1")
(set_attr "prefix" "vex")
(set_attr "mode" "")])
(define_insn "avx_cmps3"
[(set (match_operand:SSEMODEF2P 0 "register_operand" "")
(vec_merge:SSEMODEF2P
(unspec:SSEMODEF2P
[(match_operand:SSEMODEF2P 1 "register_operand" "x")
(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
(match_operand:SI 3 "const_0_to_31_operand" "n")]
UNSPEC_PCMP)
(match_dup 1)
(const_int 1)))]
"TARGET_AVX"
"vcmps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ssecmp")
(set_attr "length_immediate" "1")
(set_attr "prefix" "vex")
(set_attr "mode" "")])
;; We don't promote 128bit vector compare intrinsics. But vectorizer
;; may generate 256bit vector compare instructions.
(define_insn "*avx_maskcmp3"
[(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
(match_operator:AVXMODEF2P 3 "avx_comparison_float_operator"
[(match_operand:AVXMODEF2P 1 "register_operand" "x")
(match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
"AVX_VEC_FLOAT_MODE_P (mode)"
"vcmp%D3p\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ssecmp")
(set_attr "prefix" "vex")
(set_attr "length_immediate" "1")
(set_attr "mode" "")])
(define_insn "_maskcmp3"
[(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
(match_operator:SSEMODEF4 3 "sse_comparison_operator"
[(match_operand:SSEMODEF4 1 "register_operand" "0")
(match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
"!TARGET_XOP
&& (SSE_FLOAT_MODE_P (mode) || SSE_VEC_FLOAT_MODE_P (