diff options
author | Geoffrey Mainland <gmainlan@microsoft.com> | 2012-10-31 15:42:01 +0000 |
---|---|---|
committer | Geoffrey Mainland <gmainlan@microsoft.com> | 2013-02-01 22:00:24 +0000 |
commit | 33bfc6a700eaab9bc06974d6f71a80e61d9177c9 (patch) | |
tree | 842bf82b7dbdafe04820349e5e991800a3cf0646 /compiler/codeGen | |
parent | 1811440833da92eefd7b7255915855fddc64994c (diff) | |
download | haskell-33bfc6a700eaab9bc06974d6f71a80e61d9177c9.tar.gz |
Add support for passing SSE vectors in registers.
This patch adds support for 6 XMM registers on x86-64 which overlap with the F
and D registers and may hold 128-bit wide SIMD vectors. Because there is not a
good way to attach type information to STG registers, we aggressively bitcast in
the LLVM back-end.
Diffstat (limited to 'compiler/codeGen')
-rw-r--r-- | compiler/codeGen/CgUtils.hs | 7 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmPrim.hs | 18 |
2 files changed, 20 insertions, 5 deletions
diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs index bdb7f69b11..c06dd60cb1 100644 --- a/compiler/codeGen/CgUtils.hs +++ b/compiler/codeGen/CgUtils.hs @@ -49,6 +49,13 @@ baseRegOffset dflags (DoubleReg 4) = oFFSET_StgRegTable_rD4 dflags baseRegOffset dflags (DoubleReg 5) = oFFSET_StgRegTable_rD5 dflags baseRegOffset dflags (DoubleReg 6) = oFFSET_StgRegTable_rD6 dflags baseRegOffset _ (DoubleReg n) = panic ("Registers above D6 are not supported (tried to use D" ++ show n ++ ")") +baseRegOffset dflags (XmmReg 1) = oFFSET_StgRegTable_rXMM1 dflags +baseRegOffset dflags (XmmReg 2) = oFFSET_StgRegTable_rXMM2 dflags +baseRegOffset dflags (XmmReg 3) = oFFSET_StgRegTable_rXMM3 dflags +baseRegOffset dflags (XmmReg 4) = oFFSET_StgRegTable_rXMM4 dflags +baseRegOffset dflags (XmmReg 5) = oFFSET_StgRegTable_rXMM5 dflags +baseRegOffset dflags (XmmReg 6) = oFFSET_StgRegTable_rXMM6 dflags +baseRegOffset _ (XmmReg n) = panic ("Registers above XMM6 are not supported (tried to use XMM" ++ show n ++ ")") baseRegOffset dflags Sp = oFFSET_StgRegTable_rSp dflags baseRegOffset dflags SpLim = oFFSET_StgRegTable_rSpLim dflags baseRegOffset dflags (LongReg 1) = oFFSET_StgRegTable_rL1 dflags diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs index 10a514b6e1..4e0d773097 100644 --- a/compiler/codeGen/StgCmmPrim.hs +++ b/compiler/codeGen/StgCmmPrim.hs @@ -1183,8 +1183,11 @@ doVecPackOp maybe_pre_write_cast ty z es res = do vecPack src (e : es) i = do dst <- newTemp ty - emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid) - [CmmReg (CmmLocal src), cast e, iLit]) + if isFloatType (vecElemType ty) + then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid) + [CmmReg (CmmLocal src), cast e, iLit]) + else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid) + [CmmReg (CmmLocal src), cast e, iLit]) vecPack dst es (i + 1) where -- vector indices are always 32-bits @@ -1214,8 +1217,11 @@ doVecUnpackOp maybe_post_read_cast ty e res = return () vecUnpack (r : rs) i = do - emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid) - [e, iLit])) + if isFloatType (vecElemType ty) + then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid) + [e, iLit])) + else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid) + [e, iLit])) vecUnpack rs (i + 1) where -- vector indices are always 32-bits @@ -1244,7 +1250,9 @@ doVecInsertOp maybe_pre_write_cast ty src e idx res = do -- vector indices are always 32-bits let idx' :: CmmExpr idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx] - emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx']) + if isFloatType (vecElemType ty) + then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx']) + else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx']) where cast :: CmmExpr -> CmmExpr cast val = case maybe_pre_write_cast of |