summaryrefslogtreecommitdiff
path: root/compiler/nativeGen/X86
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/nativeGen/X86')
-rw-r--r--compiler/nativeGen/X86/CodeGen.hs31
-rw-r--r--compiler/nativeGen/X86/Instr.hs20
-rw-r--r--compiler/nativeGen/X86/Ppr.hs9
3 files changed, 57 insertions, 3 deletions
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index 73cfb28d46..fe6d4d0499 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -284,11 +284,13 @@ data ChildCode64
data Register
= Fixed Format Reg InstrBlock
| Any Format (Reg -> InstrBlock)
+ | AnyV (Reg -> InstrBlock)
swizzleRegisterRep :: Register -> Format -> Register
swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code
swizzleRegisterRep (Any _ codefn) format = Any format codefn
+swizzleRegisterRep r@(AnyV{}) _ = r
-- | Grab the Reg for a CmmReg
@@ -368,6 +370,9 @@ getSomeReg expr = do
Any rep code -> do
tmp <- getNewRegNat rep
return (tmp, code tmp)
+ AnyV code -> do
+ tmp <- getVectorReg
+ return (tmp, code tmp)
Fixed _ reg code ->
return (reg, code)
@@ -484,6 +489,16 @@ iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
r_dst_lo
)
+iselExpr64 (CmmMachOp (MO_V_Add len width) [expr1, expr2]) = do
+ r_dst <- getVectorReg
+ ChildCode64 code1 r1 <- iselExpr64 expr1
+ ChildCode64 code2 r2 <- iselExpr64 expr2
+ let fmt = VecFormat len width FmtInt
+ return $ ChildCode64 (code1 `appOL` code2 `appOL`
+ toOL [ V_MOV fmt (OpReg r_dst) (OpReg r1)
+ , V_ADD (VecFormat len width FmtInt) (OpReg r2) (OpReg r_dst)
+ ]) r_dst
+
iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do
fn <- getAnyReg expr
r_dst_lo <- getNewRegNat II32
@@ -501,6 +516,8 @@ iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do
iselExpr64 expr
= pprPanic "iselExpr64(i386)" (ppr expr)
+getVectorReg :: NatM Reg
+getVectorReg = RegVirtual . VirtualRegSSE <$> getUniqueM
--------------------------------------------------------------------------------
getRegister :: CmmExpr -> NatM Register
@@ -822,7 +839,7 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
MO_V_Insert {} -> needLlvm
MO_V_Extract {} -> needLlvm
- MO_V_Add {} -> needLlvm
+ MO_V_Add {} -> triv_op W512 V_ADD
MO_V_Sub {} -> needLlvm
MO_V_Mul {} -> needLlvm
MO_VS_Quot {} -> needLlvm
@@ -985,6 +1002,12 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
getRegister' _ _ (CmmLoad mem pk)
+ | isVecType pk
+ = do
+ code <- vecLoadCode (V_MOV format) mem
+ return (AnyV code)
+
+getRegister' _ _ (CmmLoad mem pk)
| isFloatType pk
= do
Amode addr mem_code <- getAmode mem
@@ -1096,6 +1119,9 @@ getByteReg expr = do
-- ToDo: could optimise slightly by checking for
-- byte-addressable real registers, but that will
-- happen very rarely if at all.
+ AnyV {} -> do
+ tmp <- getVectorReg
+ return (tmp, code tmp)
else getSomeReg expr -- all regs are byte-addressable on x86_64
-- Another variant: this time we want the result in a register that cannot
@@ -1116,6 +1142,9 @@ getNonClobberedReg expr = do
return (tmp, code `snocOL` reg2reg rep reg tmp)
| otherwise ->
return (reg, code)
+ AnyV code -> do
+ tmp <- getVectorReg
+ return (tmp, code tmp)
reg2reg :: Format -> Reg -> Reg -> Instr
reg2reg format src dst = MOV format (OpReg src) (OpReg dst)
diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs
index 6e5d656beb..e59594365b 100644
--- a/compiler/nativeGen/X86/Instr.hs
+++ b/compiler/nativeGen/X86/Instr.hs
@@ -8,8 +8,9 @@
--
-----------------------------------------------------------------------------
-module X86.Instr (Instr(..), Operand(..), PrefetchVariant(..), JumpDest(..),
- getJumpDestBlockId, canShortcut, shortcutStatics,
+module X86.Instr (Instr(..), Operand(..), PrefetchVariant(..),
+ VecFormat(..), ScalarFormat(..),
+ JumpDest(..), getJumpDestBlockId, canShortcut, shortcutStatics,
shortcutJump, allocMoreStack,
maxSpillSlots, archWordFormat )
where
@@ -71,6 +72,19 @@ instance Instruction Instr where
mkStackAllocInstr = x86_mkStackAllocInstr
mkStackDeallocInstr = x86_mkStackDeallocInstr
+data VecFormat = VecFormat { vecLength :: !Length
+ , vecWidth :: !Width
+ , vecScalar :: !ScalarFormat
+ }
+
+instance Outputable VecFormat where
+ ppr (VecFormat l w f) = ppr (l,w,f)
+
+data ScalarFormat = FmtFloat | FmtInt
+
+instance Outputable ScalarFormat where
+ ppr FmtFloat = text "float"
+ ppr FmtInt = text "int"
-- -----------------------------------------------------------------------------
-- Intel x86 instructions
@@ -193,6 +207,7 @@ data Instr
-- Moves.
| MOV Format Operand Operand
+ | V_MOV VecFormat Operand Operand
| CMOV Cond Format Operand Reg
| MOVZxL Format Operand Operand -- format is the size of operand 1
| MOVSxL Format Operand Operand -- format is the size of operand 1
@@ -205,6 +220,7 @@ data Instr
-- Int Arithmetic.
| ADD Format Operand Operand
+ | V_ADD VecFormat Operand Operand
| ADC Format Operand Operand
| SUB Format Operand Operand
| SBB Format Operand Operand
diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs
index 095d9eba7c..7a799cfabd 100644
--- a/compiler/nativeGen/X86/Ppr.hs
+++ b/compiler/nativeGen/X86/Ppr.hs
@@ -833,6 +833,15 @@ pprInstr (XADD format src dst) = pprFormatOpOp (sLit "xadd") format src dst
pprInstr (CMPXCHG format src dst)
= pprFormatOpOp (sLit "cmpxchg") format src dst
+-- SIMD
+pprInstr (V_ADD format src dst)
+ | VecFormat _ W32 FmtFloat <- format = pprVecFormatOpOp (sLit "addps") format src dst
+ | VecFormat _ W64 FmtFloat <- format = pprVecFormatOpOp (sLit "addpd") format src dst
+ | VecFormat _ W64 FmtInt <- format = pprVecFormatOpOp (sLit "addpd") format src dst
+pprInstr (V_MOV format src dst)
+ | VecFormat _ W32 FmtFloat <- format = pprVecFormatOpOp (sLit "movaps") format src dst
+ | VecFormat _ W64 FmtFloat <- format = pprVecFormatOpOp (sLit "movapd") format src dst
+ | VecFormat _ _ FmtInt <- format = pprVecFormatOpOp (sLit "movdqa") format src dst
--------------------------