summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/cmm/CmmCallConv.hs4
-rw-r--r--compiler/cmm/CmmExpr.hs10
-rw-r--r--compiler/cmm/CmmMachOp.hs46
-rw-r--r--compiler/cmm/PprC.hs9
-rw-r--r--compiler/cmm/PprCmmExpr.hs1
-rw-r--r--compiler/codeGen/CgUtils.hs7
-rw-r--r--compiler/codeGen/StgCmmPrim.hs18
-rw-r--r--compiler/llvmGen/LlvmCodeGen/Base.hs11
-rw-r--r--compiler/llvmGen/LlvmCodeGen/CodeGen.hs121
-rw-r--r--compiler/llvmGen/LlvmCodeGen/Regs.hs7
-rw-r--r--compiler/nativeGen/X86/CodeGen.hs88
-rw-r--r--includes/CodeGen.Platform.hs26
-rw-r--r--includes/stg/MachRegs.h24
-rw-r--r--includes/stg/Regs.h42
-rw-r--r--includes/stg/Types.h2
-rw-r--r--utils/deriveConstants/DeriveConstants.hs6
16 files changed, 308 insertions, 114 deletions
diff --git a/compiler/cmm/CmmCallConv.hs b/compiler/cmm/CmmCallConv.hs
index dd4d6a6c1a..913f15d436 100644
--- a/compiler/cmm/CmmCallConv.hs
+++ b/compiler/cmm/CmmCallConv.hs
@@ -70,7 +70,9 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
assign_regs assts (r:rs) regs | isVecType ty = vec
| isFloatType ty = float
| otherwise = int
- where vec = (assts, (r:rs))
+ where vec = case (w, regs) of
+ (W128, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss))
+ _ -> (assts, (r:rs))
float = case (w, regs) of
(W32, (vs, fs, ds, ls, s:ss)) -> k (RegisterParam (FloatReg s), (vs, fs, ds, ls, ss))
(W32, (vs, f:fs, ds, ls, ss))
diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs
index dce962443b..1df8e848b8 100644
--- a/compiler/cmm/CmmExpr.hs
+++ b/compiler/cmm/CmmExpr.hs
@@ -336,6 +336,9 @@ data GlobalReg
| LongReg -- long int registers (64-bit, really)
{-# UNPACK #-} !Int -- its number
+ | XmmReg -- 128-bit SIMD vector register
+ {-# UNPACK #-} !Int -- its number
+
-- STG registers
| Sp -- Stack ptr; points to last occupied stack location.
| SpLim -- Stack limit
@@ -371,6 +374,7 @@ instance Eq GlobalReg where
FloatReg i == FloatReg j = i==j
DoubleReg i == DoubleReg j = i==j
LongReg i == LongReg j = i==j
+ XmmReg i == XmmReg j = i==j
Sp == Sp = True
SpLim == SpLim = True
Hp == Hp = True
@@ -392,6 +396,7 @@ instance Ord GlobalReg where
compare (FloatReg i) (FloatReg j) = compare i j
compare (DoubleReg i) (DoubleReg j) = compare i j
compare (LongReg i) (LongReg j) = compare i j
+ compare (XmmReg i) (XmmReg j) = compare i j
compare Sp Sp = EQ
compare SpLim SpLim = EQ
compare Hp Hp = EQ
@@ -413,6 +418,8 @@ instance Ord GlobalReg where
compare _ (DoubleReg _) = GT
compare (LongReg _) _ = LT
compare _ (LongReg _) = GT
+ compare (XmmReg _) _ = LT
+ compare _ (XmmReg _) = GT
compare Sp _ = LT
compare _ Sp = GT
compare SpLim _ = LT
@@ -455,6 +462,8 @@ globalRegType dflags (VanillaReg _ VNonGcPtr) = bWord dflags
globalRegType _ (FloatReg _) = cmmFloat W32
globalRegType _ (DoubleReg _) = cmmFloat W64
globalRegType _ (LongReg _) = cmmBits W64
+globalRegType _ (XmmReg _) = cmmVec 4 (cmmBits W32)
+
globalRegType dflags Hp = gcWord dflags
-- The initialiser for all
-- dynamically allocated closures
@@ -465,4 +474,5 @@ isArgReg (VanillaReg {}) = True
isArgReg (FloatReg {}) = True
isArgReg (DoubleReg {}) = True
isArgReg (LongReg {}) = True
+isArgReg (XmmReg {}) = True
isArgReg _ = False
diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs
index 4e38cd42b9..0f18029d20 100644
--- a/compiler/cmm/CmmMachOp.hs
+++ b/compiler/cmm/CmmMachOp.hs
@@ -118,6 +118,10 @@ data MachOp
| MO_VS_Rem Length Width
| MO_VS_Neg Length Width
+ -- Floting point vector element insertion and extraction operations
+ | MO_VF_Insert Length Width -- Insert scalar into vector
+ | MO_VF_Extract Length Width -- Extract scalar from vector
+
-- Floating point vector operations
| MO_VF_Add Length Width
| MO_VF_Sub Length Width
@@ -360,22 +364,25 @@ machOpResultType dflags mop tys =
MO_SF_Conv _ to -> cmmFloat to
MO_FF_Conv _ to -> cmmFloat to
- MO_V_Insert {} -> ty1
- MO_V_Extract {} -> vecElemType ty1
-
- MO_V_Add {} -> ty1
- MO_V_Sub {} -> ty1
- MO_V_Mul {} -> ty1
-
- MO_VS_Quot {} -> ty1
- MO_VS_Rem {} -> ty1
- MO_VS_Neg {} -> ty1
-
- MO_VF_Add {} -> ty1
- MO_VF_Sub {} -> ty1
- MO_VF_Mul {} -> ty1
- MO_VF_Quot {} -> ty1
- MO_VF_Neg {} -> ty1
+ MO_V_Insert l w -> cmmVec l (cmmBits w)
+ MO_V_Extract _ w -> cmmBits w
+
+ MO_V_Add l w -> cmmVec l (cmmBits w)
+ MO_V_Sub l w -> cmmVec l (cmmBits w)
+ MO_V_Mul l w -> cmmVec l (cmmBits w)
+
+ MO_VS_Quot l w -> cmmVec l (cmmBits w)
+ MO_VS_Rem l w -> cmmVec l (cmmBits w)
+ MO_VS_Neg l w -> cmmVec l (cmmBits w)
+
+ MO_VF_Insert l w -> cmmVec l (cmmFloat w)
+ MO_VF_Extract _ w -> cmmFloat w
+
+ MO_VF_Add l w -> cmmVec l (cmmFloat w)
+ MO_VF_Sub l w -> cmmVec l (cmmFloat w)
+ MO_VF_Mul l w -> cmmVec l (cmmFloat w)
+ MO_VF_Quot l w -> cmmVec l (cmmFloat w)
+ MO_VF_Neg l w -> cmmVec l (cmmFloat w)
where
(ty1:_) = tys
@@ -443,8 +450,8 @@ machOpArgReps dflags op =
MO_FS_Conv from _ -> [from]
MO_FF_Conv from _ -> [from]
- MO_V_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,wordWidth dflags]
- MO_V_Extract l r -> [typeWidth (vec l (cmmFloat r)),wordWidth dflags]
+ MO_V_Insert l r -> [typeWidth (vec l (cmmBits r)),r,wordWidth dflags]
+ MO_V_Extract l r -> [typeWidth (vec l (cmmBits r)),wordWidth dflags]
MO_V_Add _ r -> [r,r]
MO_V_Sub _ r -> [r,r]
@@ -454,6 +461,9 @@ machOpArgReps dflags op =
MO_VS_Rem _ r -> [r,r]
MO_VS_Neg _ r -> [r]
+ MO_VF_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,wordWidth dflags]
+ MO_VF_Extract l r -> [typeWidth (vec l (cmmFloat r)),wordWidth dflags]
+
MO_VF_Add _ r -> [r,r]
MO_VF_Sub _ r -> [r,r]
MO_VF_Mul _ r -> [r,r]
diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs
index 8712d5fb5c..cda68ef39e 100644
--- a/compiler/cmm/PprC.hs
+++ b/compiler/cmm/PprC.hs
@@ -661,6 +661,15 @@ pprMachOp_for_C mop = case mop of
(panic $ "PprC.pprMachOp_for_C: MO_VS_Neg"
++ " should have been handled earlier!")
+ MO_VF_Insert {} -> pprTrace "offending mop:"
+ (ptext $ sLit "MO_VF_Insert")
+ (panic $ "PprC.pprMachOp_for_C: MO_VF_Insert"
+ ++ " should have been handled earlier!")
+ MO_VF_Extract {} -> pprTrace "offending mop:"
+ (ptext $ sLit "MO_VF_Extract")
+ (panic $ "PprC.pprMachOp_for_C: MO_VF_Extract"
+ ++ " should have been handled earlier!")
+
MO_VF_Add {} -> pprTrace "offending mop:"
(ptext $ sLit "MO_VF_Add")
(panic $ "PprC.pprMachOp_for_C: MO_VF_Add"
diff --git a/compiler/cmm/PprCmmExpr.hs b/compiler/cmm/PprCmmExpr.hs
index 3c9fa063ff..d1128b07d3 100644
--- a/compiler/cmm/PprCmmExpr.hs
+++ b/compiler/cmm/PprCmmExpr.hs
@@ -255,6 +255,7 @@ pprGlobalReg gr
FloatReg n -> char 'F' <> int n
DoubleReg n -> char 'D' <> int n
LongReg n -> char 'L' <> int n
+ XmmReg n -> ptext (sLit "XMM") <> int n
Sp -> ptext (sLit "Sp")
SpLim -> ptext (sLit "SpLim")
Hp -> ptext (sLit "Hp")
diff --git a/compiler/codeGen/CgUtils.hs b/compiler/codeGen/CgUtils.hs
index bdb7f69b11..c06dd60cb1 100644
--- a/compiler/codeGen/CgUtils.hs
+++ b/compiler/codeGen/CgUtils.hs
@@ -49,6 +49,13 @@ baseRegOffset dflags (DoubleReg 4) = oFFSET_StgRegTable_rD4 dflags
baseRegOffset dflags (DoubleReg 5) = oFFSET_StgRegTable_rD5 dflags
baseRegOffset dflags (DoubleReg 6) = oFFSET_StgRegTable_rD6 dflags
baseRegOffset _ (DoubleReg n) = panic ("Registers above D6 are not supported (tried to use D" ++ show n ++ ")")
+baseRegOffset dflags (XmmReg 1) = oFFSET_StgRegTable_rXMM1 dflags
+baseRegOffset dflags (XmmReg 2) = oFFSET_StgRegTable_rXMM2 dflags
+baseRegOffset dflags (XmmReg 3) = oFFSET_StgRegTable_rXMM3 dflags
+baseRegOffset dflags (XmmReg 4) = oFFSET_StgRegTable_rXMM4 dflags
+baseRegOffset dflags (XmmReg 5) = oFFSET_StgRegTable_rXMM5 dflags
+baseRegOffset dflags (XmmReg 6) = oFFSET_StgRegTable_rXMM6 dflags
+baseRegOffset _ (XmmReg n) = panic ("Registers above XMM6 are not supported (tried to use XMM" ++ show n ++ ")")
baseRegOffset dflags Sp = oFFSET_StgRegTable_rSp dflags
baseRegOffset dflags SpLim = oFFSET_StgRegTable_rSpLim dflags
baseRegOffset dflags (LongReg 1) = oFFSET_StgRegTable_rL1 dflags
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index 10a514b6e1..4e0d773097 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -1183,8 +1183,11 @@ doVecPackOp maybe_pre_write_cast ty z es res = do
vecPack src (e : es) i = do
dst <- newTemp ty
- emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
- [CmmReg (CmmLocal src), cast e, iLit])
+ if isFloatType (vecElemType ty)
+ then emitAssign (CmmLocal dst) (CmmMachOp (MO_VF_Insert len wid)
+ [CmmReg (CmmLocal src), cast e, iLit])
+ else emitAssign (CmmLocal dst) (CmmMachOp (MO_V_Insert len wid)
+ [CmmReg (CmmLocal src), cast e, iLit])
vecPack dst es (i + 1)
where
-- vector indices are always 32-bits
@@ -1214,8 +1217,11 @@ doVecUnpackOp maybe_post_read_cast ty e res =
return ()
vecUnpack (r : rs) i = do
- emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
- [e, iLit]))
+ if isFloatType (vecElemType ty)
+ then emitAssign (CmmLocal r) (cast (CmmMachOp (MO_VF_Extract len wid)
+ [e, iLit]))
+ else emitAssign (CmmLocal r) (cast (CmmMachOp (MO_V_Extract len wid)
+ [e, iLit]))
vecUnpack rs (i + 1)
where
-- vector indices are always 32-bits
@@ -1244,7 +1250,9 @@ doVecInsertOp maybe_pre_write_cast ty src e idx res = do
-- vector indices are always 32-bits
let idx' :: CmmExpr
idx' = CmmMachOp (MO_SS_Conv (wordWidth dflags) W32) [idx]
- emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
+ if isFloatType (vecElemType ty)
+ then emitAssign (CmmLocal res) (CmmMachOp (MO_VF_Insert len wid) [src, cast e, idx'])
+ else emitAssign (CmmLocal res) (CmmMachOp (MO_V_Insert len wid) [src, cast e, idx'])
where
cast :: CmmExpr -> CmmExpr
cast val = case maybe_pre_write_cast of
diff --git a/compiler/llvmGen/LlvmCodeGen/Base.hs b/compiler/llvmGen/LlvmCodeGen/Base.hs
index 1457efe3cb..bcfce3401e 100644
--- a/compiler/llvmGen/LlvmCodeGen/Base.hs
+++ b/compiler/llvmGen/LlvmCodeGen/Base.hs
@@ -131,11 +131,12 @@ llvmFunArgs :: DynFlags -> LiveGlobalRegs -> [LlvmVar]
llvmFunArgs dflags live =
map (lmGlobalRegArg dflags) (filter isPassed (activeStgRegs platform))
where platform = targetPlatform dflags
- isLive r = not (isFloat r) || r `elem` alwaysLive || r `elem` live
- isPassed r = not (isFloat r) || isLive r
- isFloat (FloatReg _) = True
- isFloat (DoubleReg _) = True
- isFloat _ = False
+ isLive r = not (isSSE r) || r `elem` alwaysLive || r `elem` live
+ isPassed r = not (isSSE r) || isLive r
+ isSSE (FloatReg _) = True
+ isSSE (DoubleReg _) = True
+ isSSE (XmmReg _) = True
+ isSSE _ = False
-- | Llvm standard fun attributes
llvmStdFunAttrs :: [LlvmFuncAttr]
diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
index efa7e9a706..969bca8ec0 100644
--- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
+++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
@@ -470,6 +470,7 @@ castVar dflags v t
(vt, _) | isInt vt && isPointer t -> LM_Inttoptr
(vt, _) | isPointer vt && isInt t -> LM_Ptrtoint
(vt, _) | isPointer vt && isPointer t -> LM_Bitcast
+ (vt, _) | isVector vt && isVector t -> LM_Bitcast
(vt, _) -> panic $ "castVars: Can't cast this type ("
++ show vt ++ ") to (" ++ show t ++ ")"
@@ -582,16 +583,21 @@ genAssign env reg val = do
let stmts = stmts1 `appOL` stmts2
let ty = (pLower . getVarType) vreg
- case isPointer ty && getVarType vval == llvmWord dflags of
- -- Some registers are pointer types, so need to cast value to pointer
- True -> do
- (v, s1) <- doExpr ty $ Cast LM_Inttoptr vval ty
- let s2 = Store v vreg
- return (env2, stmts `snocOL` s1 `snocOL` s2, top1 ++ top2)
+ case ty of
+ -- Some registers are pointer types, so need to cast value to pointer
+ LMPointer _ | getVarType vval == llvmWord dflags -> do
+ (v, s1) <- doExpr ty $ Cast LM_Inttoptr vval ty
+ let s2 = Store v vreg
+ return (env2, stmts `snocOL` s1 `snocOL` s2, top1 ++ top2)
- False -> do
- let s1 = Store vval vreg
- return (env2, stmts `snocOL` s1, top1 ++ top2)
+ LMVector _ _ -> do
+ (v, s1) <- doExpr ty $ Cast LM_Bitcast vval ty
+ let s2 = Store v vreg
+ return (env2, stmts `snocOL` s1 `snocOL` s2, top1 ++ top2)
+
+ _ -> do
+ let s1 = Store vval vreg
+ return (env2, stmts `snocOL` s1, top1 ++ top2)
-- | CmmStore operation
@@ -884,14 +890,14 @@ genMachOp env _ op [x] = case op of
vecty = LMVector len ty
all0 = LMIntLit (-0) ty
all0s = LMLitVar $ LMVectorLit (replicate len all0)
- in negate vecty all0s LM_MO_Sub
+ in negateVec vecty all0s LM_MO_Sub
MO_VF_Neg len w ->
let ty = widthToLlvmFloat w
vecty = LMVector len ty
all0 = LMFloatLit (-0) ty
all0s = LMLitVar $ LMVectorLit (replicate len all0)
- in negate vecty all0s LM_MO_FSub
+ in negateVec vecty all0s LM_MO_FSub
-- Handle unsupported cases explicitly so we get a warning
-- of missing case when new MachOps added
@@ -943,6 +949,9 @@ genMachOp env _ op [x] = case op of
MO_VS_Quot _ _ -> panicOp
MO_VS_Rem _ _ -> panicOp
+
+ MO_VF_Insert _ _ -> panicOp
+ MO_VF_Extract _ _ -> panicOp
MO_VF_Add _ _ -> panicOp
MO_VF_Sub _ _ -> panicOp
@@ -957,6 +966,12 @@ genMachOp env _ op [x] = case op of
(v1, s1) <- doExpr ty $ LlvmOp negOp v2 vx
return (env', v1, stmts `snocOL` s1, top)
+ negateVec ty v2 negOp = do
+ (env', vx, stmts1, top) <- exprToVar env x
+ ([vx'], stmts2) <- castVars dflags [(vx, ty)]
+ (v1, s1) <- doExpr ty $ LlvmOp negOp v2 vx'
+ return (env', v1, stmts1 `appOL` stmts2 `snocOL` s1, top)
+
fiConv ty convOp = do
(env', vx, stmts, top) <- exprToVar env x
(v1, s1) <- doExpr ty $ Cast convOp vx ty
@@ -1014,22 +1029,50 @@ genMachOp_fast env opt op r n e
genMachOp_slow :: LlvmEnv -> EOption -> MachOp -> [CmmExpr] -> UniqSM ExprData
-- Element extraction
-genMachOp_slow env _ (MO_V_Extract {}) [val, idx] = do
+genMachOp_slow env _ (MO_V_Extract l w) [val, idx] = do
+ (env1, vval, stmts1, top1) <- exprToVar env val
+ (env2, vidx, stmts2, top2) <- exprToVar env1 idx
+ ([vval'], stmts3) <- castVars dflags [(vval, LMVector l ty)]
+ (v1, s1) <- doExpr ty $ Extract vval' vidx
+ return (env2, v1, stmts1 `appOL` stmts2 `appOL` stmts3 `snocOL` s1, top1 ++ top2)
+ where
+ dflags = getDflags env
+ ty = widthToLlvmInt w
+
+genMachOp_slow env _ (MO_VF_Extract l w) [val, idx] = do
(env1, vval, stmts1, top1) <- exprToVar env val
(env2, vidx, stmts2, top2) <- exprToVar env1 idx
- let (LMVector _ ty) = getVarType vval
- (v1, s1) <- doExpr ty $ Extract vval vidx
- return (env2, v1, stmts1 `appOL` stmts2 `snocOL` s1, top1 ++ top2)
+ ([vval'], stmts3) <- castVars dflags [(vval, LMVector l ty)]
+ (v1, s1) <- doExpr ty $ Extract vval' vidx
+ return (env2, v1, stmts1 `appOL` stmts2 `appOL` stmts3 `snocOL` s1, top1 ++ top2)
+ where
+ dflags = getDflags env
+ ty = widthToLlvmFloat w
-- Element insertion
-genMachOp_slow env _ (MO_V_Insert {}) [val, elt, idx] = do
+genMachOp_slow env _ (MO_V_Insert l w) [val, elt, idx] = do
+ (env1, vval, stmts1, top1) <- exprToVar env val
+ (env2, velt, stmts2, top2) <- exprToVar env1 elt
+ (env3, vidx, stmts3, top3) <- exprToVar env2 idx
+ ([vval'], stmts4) <- castVars dflags [(vval, ty)]
+ (v1, s1) <- doExpr ty $ Insert vval' velt vidx
+ return (env3, v1, stmts1 `appOL` stmts2 `appOL` stmts3 `appOL` stmts4 `snocOL` s1,
+ top1 ++ top2 ++ top3)
+ where
+ dflags = getDflags env
+ ty = LMVector l (widthToLlvmInt w)
+
+genMachOp_slow env _ (MO_VF_Insert l w) [val, elt, idx] = do
(env1, vval, stmts1, top1) <- exprToVar env val
(env2, velt, stmts2, top2) <- exprToVar env1 elt
(env3, vidx, stmts3, top3) <- exprToVar env2 idx
- let ty = getVarType vval
- (v1, s1) <- doExpr ty $ Insert vval velt vidx
- return (env3, v1, stmts1 `appOL` stmts2 `appOL` stmts3 `snocOL` s1,
+ ([vval'], stmts4) <- castVars dflags [(vval, ty)]
+ (v1, s1) <- doExpr ty $ Insert vval' velt vidx
+ return (env3, v1, stmts1 `appOL` stmts2 `appOL` stmts3 `appOL` stmts4 `snocOL` s1,
top1 ++ top2 ++ top3)
+ where
+ dflags = getDflags env
+ ty = LMVector l (widthToLlvmFloat w)
-- Binary MachOp
genMachOp_slow env opt op [x, y] = case op of
@@ -1080,17 +1123,17 @@ genMachOp_slow env opt op [x, y] = case op of
MO_U_Shr _ -> genBinMach LM_MO_LShr
MO_S_Shr _ -> genBinMach LM_MO_AShr
- MO_V_Add _ _ -> genBinMach LM_MO_Add
- MO_V_Sub _ _ -> genBinMach LM_MO_Sub
- MO_V_Mul _ _ -> genBinMach LM_MO_Mul
+ MO_V_Add l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_Add
+ MO_V_Sub l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_Sub
+ MO_V_Mul l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_Mul
- MO_VS_Quot _ _ -> genBinMach LM_MO_SDiv
- MO_VS_Rem _ _ -> genBinMach LM_MO_SRem
+ MO_VS_Quot l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_SDiv
+ MO_VS_Rem l w -> genCastBinMach (LMVector l (widthToLlvmInt w)) LM_MO_SRem
- MO_VF_Add _ _ -> genBinMach LM_MO_FAdd
- MO_VF_Sub _ _ -> genBinMach LM_MO_FSub
- MO_VF_Mul _ _ -> genBinMach LM_MO_FMul
- MO_VF_Quot _ _ -> genBinMach LM_MO_FDiv
+ MO_VF_Add l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FAdd
+ MO_VF_Sub l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FSub
+ MO_VF_Mul l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FMul
+ MO_VF_Quot l w -> genCastBinMach (LMVector l (widthToLlvmFloat w)) LM_MO_FDiv
MO_Not _ -> panicOp
MO_S_Neg _ -> panicOp
@@ -1107,6 +1150,9 @@ genMachOp_slow env opt op [x, y] = case op of
MO_VS_Neg {} -> panicOp
+ MO_VF_Insert {} -> panicOp
+ MO_VF_Extract {} -> panicOp
+
MO_VF_Neg {} -> panicOp
where
@@ -1134,6 +1180,14 @@ genMachOp_slow env opt op [x, y] = case op of
`snocOL` dy `snocOL` s1
return (env2, v1, allStmts, top1 ++ top2)
+ binCastLlvmOp ty binOp = do
+ (env1, vx, stmts1, top1) <- exprToVar env x
+ (env2, vy, stmts2, top2) <- exprToVar env1 y
+ ([vx', vy'], stmts3) <- castVars dflags [(vx, ty), (vy, ty)]
+ (v1, s1) <- doExpr ty $ binOp vx' vy'
+ return (env2, v1, stmts1 `appOL` stmts2 `appOL` stmts3 `snocOL` s1,
+ top1 ++ top2)
+
-- | Need to use EOption here as Cmm expects word size results from
-- comparisons while LLVM return i1. Need to extend to llvmWord type
-- if expected. See Note [Literals and branch conditions].
@@ -1152,6 +1206,8 @@ genMachOp_slow env opt op [x, y] = case op of
genBinMach op = binLlvmOp getVarType (LlvmOp op)
+ genCastBinMach ty op = binCastLlvmOp ty (LlvmOp op)
+
-- | Detect if overflow will occur in signed multiply of the two
-- CmmExpr's. This is the LLVM assembly equivalent of the NCG
-- implementation. Its much longer due to type information/safety.
@@ -1427,10 +1483,11 @@ funEpilogue env live = do
dflags = getDflags env
platform = targetPlatform dflags
isLive r = r `elem` alwaysLive || r `elem` live
- isPassed r = not (isFloat r) || isLive r
- isFloat (FloatReg _) = True
- isFloat (DoubleReg _) = True
- isFloat _ = False
+ isPassed r = not (isSSE r) || isLive r
+ isSSE (FloatReg _) = True
+ isSSE (DoubleReg _) = True
+ isSSE (XmmReg _) = True
+ isSSE _ = False
loadExpr r | isLive r = do
let reg = lmGlobalRegVar dflags r
(v,s) <- doExpr (pLower $ getVarType reg) $ Load reg
diff --git a/compiler/llvmGen/LlvmCodeGen/Regs.hs b/compiler/llvmGen/LlvmCodeGen/Regs.hs
index e6cfcb2e18..7271c2f3d9 100644
--- a/compiler/llvmGen/LlvmCodeGen/Regs.hs
+++ b/compiler/llvmGen/LlvmCodeGen/Regs.hs
@@ -55,6 +55,12 @@ lmGlobalReg dflags suf reg
DoubleReg 4 -> doubleGlobal $ "D4" ++ suf
DoubleReg 5 -> doubleGlobal $ "D5" ++ suf
DoubleReg 6 -> doubleGlobal $ "D6" ++ suf
+ XmmReg 1 -> xmmGlobal $ "XMM1" ++ suf
+ XmmReg 2 -> xmmGlobal $ "XMM2" ++ suf
+ XmmReg 3 -> xmmGlobal $ "XMM3" ++ suf
+ XmmReg 4 -> xmmGlobal $ "XMM4" ++ suf
+ XmmReg 5 -> xmmGlobal $ "XMM5" ++ suf
+ XmmReg 6 -> xmmGlobal $ "XMM6" ++ suf
_other -> panic $ "LlvmCodeGen.Reg: GlobalReg (" ++ (show reg)
++ ") not supported!"
-- LongReg, HpLim, CCSS, CurrentTSO, CurrentNusery, HpAlloc
@@ -64,6 +70,7 @@ lmGlobalReg dflags suf reg
ptrGlobal name = LMNLocalVar (fsLit name) (llvmWordPtr dflags)
floatGlobal name = LMNLocalVar (fsLit name) LMFloat
doubleGlobal name = LMNLocalVar (fsLit name) LMDouble
+ xmmGlobal name = LMNLocalVar (fsLit name) (LMVector 4 (LMInt 32))
-- | A list of STG Registers that should always be considered alive
alwaysLive :: [GlobalReg]
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index 0df95a2f73..4177cadbf6 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -602,19 +602,21 @@ getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
MO_FS_Conv from to -> coerceFP2Int from to x
MO_SF_Conv from to -> coerceInt2FP from to x
- MO_V_Insert {} -> needLlvm
- MO_V_Extract {} -> needLlvm
- MO_V_Add {} -> needLlvm
- MO_V_Sub {} -> needLlvm
- MO_V_Mul {} -> needLlvm
- MO_VS_Quot {} -> needLlvm
- MO_VS_Rem {} -> needLlvm
- MO_VS_Neg {} -> needLlvm
- MO_VF_Add {} -> needLlvm
- MO_VF_Sub {} -> needLlvm
- MO_VF_Mul {} -> needLlvm
- MO_VF_Quot {} -> needLlvm
- MO_VF_Neg {} -> needLlvm
+ MO_V_Insert {} -> needLlvm
+ MO_V_Extract {} -> needLlvm
+ MO_V_Add {} -> needLlvm
+ MO_V_Sub {} -> needLlvm
+ MO_V_Mul {} -> needLlvm
+ MO_VS_Quot {} -> needLlvm
+ MO_VS_Rem {} -> needLlvm
+ MO_VS_Neg {} -> needLlvm
+ MO_VF_Insert {} -> needLlvm
+ MO_VF_Extract {} -> needLlvm
+ MO_VF_Add {} -> needLlvm
+ MO_VF_Sub {} -> needLlvm
+ MO_VF_Mul {} -> needLlvm
+ MO_VF_Quot {} -> needLlvm
+ MO_VF_Neg {} -> needLlvm
_other -> pprPanic "getRegister" (pprMachOp mop)
where
@@ -708,19 +710,21 @@ getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
MO_U_Shr rep -> shift_code rep SHR x y {-False-}
MO_S_Shr rep -> shift_code rep SAR x y {-False-}
- MO_V_Insert {} -> needLlvm
- MO_V_Extract {} -> needLlvm
- MO_V_Add {} -> needLlvm
- MO_V_Sub {} -> needLlvm
- MO_V_Mul {} -> needLlvm
- MO_VS_Quot {} -> needLlvm
- MO_VS_Rem {} -> needLlvm
- MO_VS_Neg {} -> needLlvm
- MO_VF_Add {} -> needLlvm
- MO_VF_Sub {} -> needLlvm
- MO_VF_Mul {} -> needLlvm
- MO_VF_Quot {} -> needLlvm
- MO_VF_Neg {} -> needLlvm
+ MO_V_Insert {} -> needLlvm
+ MO_V_Extract {} -> needLlvm
+ MO_V_Add {} -> needLlvm
+ MO_V_Sub {} -> needLlvm
+ MO_V_Mul {} -> needLlvm
+ MO_VS_Quot {} -> needLlvm
+ MO_VS_Rem {} -> needLlvm
+ MO_VS_Neg {} -> needLlvm
+ MO_VF_Insert {} -> needLlvm
+ MO_VF_Extract {} -> needLlvm
+ MO_VF_Add {} -> needLlvm
+ MO_VF_Sub {} -> needLlvm
+ MO_VF_Mul {} -> needLlvm
+ MO_VF_Quot {} -> needLlvm
+ MO_VF_Neg {} -> needLlvm
_other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
where
@@ -2722,21 +2726,23 @@ sse2NegCode w x = do
return (Any sz code)
isVecExpr :: CmmExpr -> Bool
-isVecExpr (CmmMachOp (MO_V_Insert {}) _) = True
-isVecExpr (CmmMachOp (MO_V_Extract {}) _) = True
-isVecExpr (CmmMachOp (MO_V_Add {}) _) = True
-isVecExpr (CmmMachOp (MO_V_Sub {}) _) = True
-isVecExpr (CmmMachOp (MO_V_Mul {}) _) = True
-isVecExpr (CmmMachOp (MO_VS_Quot {}) _) = True
-isVecExpr (CmmMachOp (MO_VS_Rem {}) _) = True
-isVecExpr (CmmMachOp (MO_VS_Neg {}) _) = True
-isVecExpr (CmmMachOp (MO_VF_Add {}) _) = True
-isVecExpr (CmmMachOp (MO_VF_Sub {}) _) = True
-isVecExpr (CmmMachOp (MO_VF_Mul {}) _) = True
-isVecExpr (CmmMachOp (MO_VF_Quot {}) _) = True
-isVecExpr (CmmMachOp (MO_VF_Neg {}) _) = True
-isVecExpr (CmmMachOp _ [e]) = isVecExpr e
-isVecExpr _ = False
+isVecExpr (CmmMachOp (MO_V_Insert {}) _) = True
+isVecExpr (CmmMachOp (MO_V_Extract {}) _) = True
+isVecExpr (CmmMachOp (MO_V_Add {}) _) = True
+isVecExpr (CmmMachOp (MO_V_Sub {}) _) = True
+isVecExpr (CmmMachOp (MO_V_Mul {}) _) = True
+isVecExpr (CmmMachOp (MO_VS_Quot {}) _) = True
+isVecExpr (CmmMachOp (MO_VS_Rem {}) _) = True
+isVecExpr (CmmMachOp (MO_VS_Neg {}) _) = True
+isVecExpr (CmmMachOp (MO_VF_Insert {}) _) = True
+isVecExpr (CmmMachOp (MO_VF_Extract {}) _) = True
+isVecExpr (CmmMachOp (MO_VF_Add {}) _) = True
+isVecExpr (CmmMachOp (MO_VF_Sub {}) _) = True
+isVecExpr (CmmMachOp (MO_VF_Mul {}) _) = True
+isVecExpr (CmmMachOp (MO_VF_Quot {}) _) = True
+isVecExpr (CmmMachOp (MO_VF_Neg {}) _) = True
+isVecExpr (CmmMachOp _ [e]) = isVecExpr e
+isVecExpr _ = False
needLlvm :: NatM a
needLlvm =
diff --git a/includes/CodeGen.Platform.hs b/includes/CodeGen.Platform.hs
index 14642bd1c5..beff19601d 100644
--- a/includes/CodeGen.Platform.hs
+++ b/includes/CodeGen.Platform.hs
@@ -388,36 +388,54 @@ activeStgRegs = [
#ifdef REG_D1
,DoubleReg 1
#endif
+#ifdef REG_XMM1
+ ,XmmReg 1
+#endif
#ifdef REG_F2
,FloatReg 2
#endif
#ifdef REG_D2
,DoubleReg 2
#endif
+#ifdef REG_XMM2
+ ,XmmReg 2
+#endif
#ifdef REG_F3
,FloatReg 3
#endif
#ifdef REG_D3
,DoubleReg 3
#endif
+#ifdef REG_XMM3
+ ,XmmReg 3
+#endif
#ifdef REG_F4
,FloatReg 4
#endif
#ifdef REG_D4
,DoubleReg 4
#endif
+#ifdef REG_XMM4
+ ,XmmReg 4
+#endif
#ifdef REG_F5
,FloatReg 5
#endif
#ifdef REG_D5
,DoubleReg 5
#endif
+#ifdef REG_XMM5
+ ,XmmReg 5
+#endif
#ifdef REG_F6
,FloatReg 6
#endif
#ifdef REG_D6
,DoubleReg 6
#endif
+#ifdef REG_XMM6
+ ,XmmReg 6
+#endif
#else /* MAX_REAL_SSE_REG == 0 */
#ifdef REG_F1
,FloatReg 1
@@ -569,6 +587,14 @@ globalRegMaybe (DoubleReg 6) =
Just (RealRegSingle REG_D6)
# endif
# endif
+#if MAX_REAL_SSE_REG != 0
+globalRegMaybe (XmmReg 1) = Just (RealRegSingle REG_XMM1)
+globalRegMaybe (XmmReg 2) = Just (RealRegSingle REG_XMM2)
+globalRegMaybe (XmmReg 3) = Just (RealRegSingle REG_XMM3)
+globalRegMaybe (XmmReg 4) = Just (RealRegSingle REG_XMM4)
+globalRegMaybe (XmmReg 5) = Just (RealRegSingle REG_XMM5)
+globalRegMaybe (XmmReg 6) = Just (RealRegSingle REG_XMM6)
+# endif
# ifdef REG_Sp
globalRegMaybe Sp = Just (RealRegSingle REG_Sp)
# endif
diff --git a/includes/stg/MachRegs.h b/includes/stg/MachRegs.h
index 6dc81f63c1..76bdb1fc21 100644
--- a/includes/stg/MachRegs.h
+++ b/includes/stg/MachRegs.h
@@ -167,12 +167,12 @@
#define REG_D5 xmm5
#define REG_D6 xmm6
-#define REG_SSE1 xmm1
-#define REG_SSE2 xmm2
-#define REG_SSE3 xmm3
-#define REG_SSE4 xmm4
-#define REG_SSE5 xmm5
-#define REG_SSE6 xmm6
+#define REG_XMM1 xmm1
+#define REG_XMM2 xmm2
+#define REG_XMM3 xmm3
+#define REG_XMM4 xmm4
+#define REG_XMM5 xmm5
+#define REG_XMM6 xmm6
#if !defined(mingw32_HOST_OS)
#define CALLER_SAVES_R3
@@ -199,13 +199,13 @@
#define CALLER_SAVES_D6
#endif
-#define CALLER_SAVES_SSE1
-#define CALLER_SAVES_SSE2
-#define CALLER_SAVES_SSE3
-#define CALLER_SAVES_SSE4
-#define CALLER_SAVES_SSE5
+#define CALLER_SAVES_XMM1
+#define CALLER_SAVES_XMM2
+#define CALLER_SAVES_XMM3
+#define CALLER_SAVES_XMM4
+#define CALLER_SAVES_XMM5
#if !defined(mingw32_HOST_OS)
-#define CALLER_SAVES_SSE6
+#define CALLER_SAVES_XMM6
#endif
#define MAX_REAL_VANILLA_REG 6
diff --git a/includes/stg/Regs.h b/includes/stg/Regs.h
index fd1577e71a..10ae2851ac 100644
--- a/includes/stg/Regs.h
+++ b/includes/stg/Regs.h
@@ -81,6 +81,12 @@ typedef struct {
StgDouble rD4;
StgDouble rD5;
StgDouble rD6;
+ StgWord128 rXMM1;
+ StgWord128 rXMM2;
+ StgWord128 rXMM3;
+ StgWord128 rXMM4;
+ StgWord128 rXMM5;
+ StgWord128 rXMM6;
StgWord64 rL1;
StgPtr rSp;
StgPtr rSpLim;
@@ -270,6 +276,42 @@ GLOBAL_REG_DECL(StgDouble,D6,REG_D6)
#define D6 (BaseReg->rD6)
#endif
+#if defined(REG_XMM1) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord128,XMM1,REG_XMM1)
+#else
+#define XMM1 (BaseReg->rXMM1)
+#endif
+
+#if defined(REG_XMM2) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord128,XMM2,REG_XMM2)
+#else
+#define XMM2 (BaseReg->rXMM2)
+#endif
+
+#if defined(REG_XMM3) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord128,XMM3,REG_XMM3)
+#else
+#define XMM3 (BaseReg->rXMM3)
+#endif
+
+#if defined(REG_XMM4) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord128,XMM4,REG_XMM4)
+#else
+#define XMM4 (BaseReg->rXMM4)
+#endif
+
+#if defined(REG_XMM5) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord128,XMM5,REG_XMM5)
+#else
+#define XMM5 (BaseReg->rXMM5)
+#endif
+
+#if defined(REG_XMM6) && !defined(NO_GLOBAL_REG_DECLS)
+GLOBAL_REG_DECL(StgWord128,XMM6,REG_XMM6)
+#else
+#define XMM6 (BaseReg->rXMM6)
+#endif
+
#if defined(REG_L1) && !defined(NO_GLOBAL_REG_DECLS)
GLOBAL_REG_DECL(StgWord64,L1,REG_L1)
#else
diff --git a/includes/stg/Types.h b/includes/stg/Types.h
index d6bdc9042b..ccc06a175b 100644
--- a/includes/stg/Types.h
+++ b/includes/stg/Types.h
@@ -83,6 +83,8 @@ typedef unsigned long long int StgWord64;
#error cannot find a way to define StgInt64
#endif
+typedef struct { StgWord64 h; StgWord64 l; } StgWord128;
+
/*
* Define the standard word size we'll use on this machine: make it
* big enough to hold a pointer.
diff --git a/utils/deriveConstants/DeriveConstants.hs b/utils/deriveConstants/DeriveConstants.hs
index 66c1f0e456..e726bf7e0e 100644
--- a/utils/deriveConstants/DeriveConstants.hs
+++ b/utils/deriveConstants/DeriveConstants.hs
@@ -307,6 +307,12 @@ wanteds = concat
,fieldOffset Both "StgRegTable" "rD4"
,fieldOffset Both "StgRegTable" "rD5"
,fieldOffset Both "StgRegTable" "rD6"
+ ,fieldOffset Both "StgRegTable" "rXMM1"
+ ,fieldOffset Both "StgRegTable" "rXMM2"
+ ,fieldOffset Both "StgRegTable" "rXMM3"
+ ,fieldOffset Both "StgRegTable" "rXMM4"
+ ,fieldOffset Both "StgRegTable" "rXMM5"
+ ,fieldOffset Both "StgRegTable" "rXMM6"
,fieldOffset Both "StgRegTable" "rL1"
,fieldOffset Both "StgRegTable" "rSp"
,fieldOffset Both "StgRegTable" "rSpLim"