summaryrefslogtreecommitdiff
path: root/compiler/codeGen
diff options
context:
space:
mode:
authorGeoffrey Mainland <gmainlan@microsoft.com>2012-10-31 15:42:01 +0000
committerGeoffrey Mainland <gmainlan@microsoft.com>2013-02-01 22:00:24 +0000
commit33bfc6a700eaab9bc06974d6f71a80e61d9177c9 (patch)
tree842bf82b7dbdafe04820349e5e991800a3cf0646 /compiler/codeGen
parent1811440833da92eefd7b7255915855fddc64994c (diff)
downloadhaskell-33bfc6a700eaab9bc06974d6f71a80e61d9177c9.tar.gz
Add support for passing SSE vectors in registers.
This patch adds support for 6 XMM registers on x86-64 which overlap with the F and D registers and may hold 128-bit wide SIMD vectors. Because there is not a good way to attach type information to STG registers, we aggressively bitcast in the LLVM back-end.
Diffstat (limited to 'compiler/codeGen')
-rw-r--r--compiler/codeGen/CgUtils.hs7
-rw-r--r--compiler/codeGen/StgCmmPrim.hs18
2 files changed, 20 insertions, 5 deletions
diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs
index bdb7f69b11..c06dd60cb1 100644
--- a/compiler/codeGen/CgUtils.hs
+++ b/compiler/codeGen/CgUtils.hs
@@ -49,6 +49,13 @@ baseRegOffset dflags (DoubleReg 4) = oFFSET_StgRegTable_rD4 dflags
baseRegOffset dflags (DoubleReg 5) = oFFSET_StgRegTable_rD5 dflags
baseRegOffset dflags (DoubleReg 6) = oFFSET_StgRegTable_rD6 dflags
baseRegOffset _ (DoubleReg n) = panic ("Registers above D6 are not supported (tried to use D" ++ show n ++ ")")
+baseRegOffset dflags (XmmReg 1) = oFFSET_StgRegTable_rXMM1 dflags
+baseRegOffset dflags (XmmReg 2) = oFFSET_StgRegTable_rXMM2 dflags
+baseRegOffset dflags (XmmReg 3) = oFFSET_StgRegTable_rXMM3 dflags
+baseRegOffset dflags (XmmReg 4) = oFFSET_StgRegTable_rXMM4 dflags
+baseRegOffset dflags (XmmReg 5) = oFFSET_StgRegTable_rXMM5 dflags
+baseRegOffset dflags (XmmReg 6) = oFFSET_StgRegTable_rXMM6 dflags
+baseRegOffset _ (XmmReg n) = panic ("Registers above XMM6 are not supported (tried to use XMM" ++ show n ++ ")")
baseRegOffset dflags Sp = oFFSET_StgRegTable_rSp dflags
baseRegOffset dflags SpLim = oFFSET_StgRegTable_rSpLim dflags
baseRegOffset dflags (LongReg 1) = oFFSET_StgRegTable_rL1 dflags
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index 10a514b6e1..4e0d773097 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -1183,8 +1183,11 @@ doVecPackOp maybe_pre_write_cast ty z es res = do
vecPack src (e : es) i = do
dst <- newTemp ty
- emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
- [CmmReg (CmmLocal src), cast e, iLit])
+ if isFloatType (vecElemType ty)
+ then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
+ [CmmReg (CmmLocal src), cast e, iLit])
+ else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
+ [CmmReg (CmmLocal src), cast e, iLit])
vecPack dst es (i + 1)
where
-- vector indices are always 32-bits
@@ -1214,8 +1217,11 @@ doVecUnpackOp maybe_post_read_cast ty e res =
return ()
vecUnpack (r : rs) i = do
- emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
- [e, iLit]))
+ if isFloatType (vecElemType ty)
+ then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
+ [e, iLit]))
+ else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
+ [e, iLit]))
vecUnpack rs (i + 1)
where
-- vector indices are always 32-bits
@@ -1244,7 +1250,9 @@ doVecInsertOp maybe_pre_write_cast ty src e idx res = do
-- vector indices are always 32-bits
let idx' :: CmmExpr
idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx]
- emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
+ if isFloatType (vecElemType ty)
+ then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
+ else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
where
cast :: CmmExpr -> CmmExpr
cast val = case maybe_pre_write_cast of