summaryrefslogtreecommitdiff
path: root/compiler/cmm
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/cmm')
-rw-r--r--compiler/cmm/CmmCallConv.hs40
-rw-r--r--compiler/cmm/CmmExpr.hs83
-rw-r--r--compiler/cmm/CmmLint.hs10
-rw-r--r--compiler/cmm/CmmMachOp.hs25
-rw-r--r--compiler/cmm/CmmType.hs6
-rw-r--r--compiler/cmm/PprC.hs4
-rw-r--r--compiler/cmm/PprCmmExpr.hs6
7 files changed, 52 insertions, 122 deletions
diff --git a/compiler/cmm/CmmCallConv.hs b/compiler/cmm/CmmCallConv.hs
index 6df910edfa..4e6a9d293a 100644
--- a/compiler/cmm/CmmCallConv.hs
+++ b/compiler/cmm/CmmCallConv.hs
@@ -64,20 +64,13 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
assign_regs assts (r:rs) regs | isVecType ty = vec
| isFloatType ty = float
| otherwise = int
- where vec = case regs of
- (vs, fs, ds, ls, s:ss)
- | passVectorInReg w dflags
- -> let elt_ty = vecElemType ty
- reg_ty = if isFloatType elt_ty
- then Float else Integer
- reg_class = case w of
- W128 -> XmmReg
- W256 -> YmmReg
- W512 -> ZmmReg
- _ -> panic "CmmCallConv.assignArgumentsPos: Invalid vector width"
- in k (RegisterParam
- (reg_class s (vecLength ty) (typeWidth elt_ty) reg_ty),
- (vs, fs, ds, ls, ss))
+ where vec = case (w, regs) of
+ (W128, (vs, fs, ds, ls, s:ss))
+ | passVectorInReg W128 dflags -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss))
+ (W256, (vs, fs, ds, ls, s:ss))
+ | passVectorInReg W256 dflags -> k (RegisterParam (YmmReg s), (vs, fs, ds, ls, ss))
+ (W512, (vs, fs, ds, ls, s:ss))
+ | passVectorInReg W512 dflags -> k (RegisterParam (ZmmReg s), (vs, fs, ds, ls, ss))
_ -> (assts, (r:rs))
float = case (w, regs) of
(W32, (vs, fs, ds, ls, s:ss))
@@ -96,7 +89,6 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments)
(_, (vs, fs, ds, l:ls, ss)) | widthInBits w > widthInBits (wordWidth dflags)
-> k (RegisterParam l, (vs, fs, ds, ls, ss))
_ -> (assts, (r:rs))
-
k (asst, regs') = assign_regs ((r, asst) : assts) rs regs'
ty = arg_ty r
w = typeWidth ty
@@ -210,13 +202,11 @@ nodeOnly = ([VanillaReg 1], [], [], [], [])
-- only use this functionality in hand-written C-- code in the RTS.
realArgRegsCover :: DynFlags -> [GlobalReg]
realArgRegsCover dflags
- | passFloatArgsInXmm dflags
- = map ($VGcPtr) (realVanillaRegs dflags) ++
- realLongRegs dflags ++
- map (\x -> XmmReg x 2 W64 Integer) (realXmmRegNos dflags)
- | otherwise
- = map ($VGcPtr) (realVanillaRegs dflags) ++
- realFloatRegs dflags ++
- realDoubleRegs dflags ++
- realLongRegs dflags ++
- map (\x -> XmmReg x 2 W64 Integer) (realXmmRegNos dflags)
+ | passFloatArgsInXmm dflags = map ($VGcPtr) (realVanillaRegs dflags) ++
+ realLongRegs dflags ++
+ map XmmReg (realXmmRegNos dflags)
+ | otherwise = map ($VGcPtr) (realVanillaRegs dflags) ++
+ realFloatRegs dflags ++
+ realDoubleRegs dflags ++
+ realLongRegs dflags ++
+ map XmmReg (realXmmRegNos dflags)
diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs
index 79eaf8f89c..901df5d908 100644
--- a/compiler/cmm/CmmExpr.hs
+++ b/compiler/cmm/CmmExpr.hs
@@ -14,7 +14,6 @@ module CmmExpr
, currentTSOReg, currentNurseryReg, hpAllocReg, cccsReg
, node, baseReg
, VGcPtr(..)
- , GlobalVecRegTy(..)
, DefinerOfRegs, UserOfRegs
, foldRegsDefd, foldRegsUsed
@@ -42,7 +41,6 @@ import Outputable (panic)
import Unique
import Data.Set (Set)
-import Data.Monoid ((<>))
import qualified Data.Set as Set
import BasicTypes (Alignment, mkAlignment, alignmentOf)
@@ -394,7 +392,6 @@ data VGcPtr = VGcPtr | VNonGcPtr deriving( Eq, Show )
-----------------------------------------------------------------------------
{-
Note [Overlapping global registers]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The backend might not faithfully implement the abstraction of the STG
machine with independent registers for different values of type
@@ -416,26 +413,6 @@ on a particular platform. The instance Eq GlobalReg is syntactic
equality of STG registers and does not take overlap into
account. However it is still used in UserOfRegs/DefinerOfRegs and
there are likely still bugs there, beware!
-
-
-Note [SIMD registers]
-~~~~~~~~~~~~~~~~~~~~~
-
-GHC's treatment of SIMD registers is heavily modelled after the x86_64
-architecture. Namely we have 128- (XMM), 256- (YMM), and 512-bit (ZMM)
-registers. Furthermore, we treat each possible format in these registers as a
-distinct register which overlaps with the others. For instance, we XMM1 as a
-2xI64 register is distinct from but overlaps with (in the sense defined in Note
-[Overlapping global registers]) its use as a 4xI32 register.
-
-This model makes it easier to fit SIMD registers into the NCG, which generally
-expects that each global register has a single, known CmmType.
-
-In the future we could consider further refactoring this to eliminate the
-XMM, YMM, and ZMM register names (which are quite x86-specific) and instead just
-having a set of NxM-bit vector registers (e.g. Vec2x64A, Vec2x64B, ...,
-Vec4x32A, ..., Vec4x64A).
-
-}
data GlobalReg
@@ -455,15 +432,12 @@ data GlobalReg
| XmmReg -- 128-bit SIMD vector register
{-# UNPACK #-} !Int -- its number
- !Length !Width !GlobalVecRegTy
| YmmReg -- 256-bit SIMD vector register
{-# UNPACK #-} !Int -- its number
- !Length !Width !GlobalVecRegTy
| ZmmReg -- 512-bit SIMD vector register
{-# UNPACK #-} !Int -- its number
- !Length !Width !GlobalVecRegTy
-- STG registers
| Sp -- Stack ptr; points to last occupied stack location.
@@ -504,17 +478,17 @@ data GlobalReg
deriving( Show )
-data GlobalVecRegTy = Integer | Float
- deriving (Show, Eq, Ord)
-
instance Eq GlobalReg where
VanillaReg i _ == VanillaReg j _ = i==j -- Ignore type when seeking clashes
FloatReg i == FloatReg j = i==j
DoubleReg i == DoubleReg j = i==j
LongReg i == LongReg j = i==j
- XmmReg i l w grt == XmmReg j l' w' grt' = i==j && l == l' && w == w' && grt == grt'
- YmmReg i l w grt == YmmReg j l' w' grt' = i==j && l == l' && w == w' && grt == grt'
- ZmmReg i l w grt == ZmmReg j l' w' grt' = i==j && l == l' && w == w' && grt == grt'
+ -- NOTE: XMM, YMM, ZMM registers actually are the same registers
+ -- at least with respect to store at YMM i and then read from XMM i
+ -- and similarly for ZMM etc.
+ XmmReg i == XmmReg j = i==j
+ YmmReg i == YmmReg j = i==j
+ ZmmReg i == ZmmReg j = i==j
Sp == Sp = True
SpLim == SpLim = True
Hp == Hp = True
@@ -538,21 +512,9 @@ instance Ord GlobalReg where
compare (FloatReg i) (FloatReg j) = compare i j
compare (DoubleReg i) (DoubleReg j) = compare i j
compare (LongReg i) (LongReg j) = compare i j
- compare (XmmReg i l w grt)
- (XmmReg j l' w' grt') = compare i j
- <> compare l l'
- <> compare w w'
- <> compare grt grt'
- compare (YmmReg i l w grt)
- (YmmReg j l' w' grt') = compare i j
- <> compare l l'
- <> compare w w'
- <> compare grt grt'
- compare (ZmmReg i l w grt)
- (ZmmReg j l' w' grt') = compare i j
- <> compare l l'
- <> compare w w'
- <> compare grt grt'
+ compare (XmmReg i) (XmmReg j) = compare i j
+ compare (YmmReg i) (YmmReg j) = compare i j
+ compare (ZmmReg i) (ZmmReg j) = compare i j
compare Sp Sp = EQ
compare SpLim SpLim = EQ
compare Hp Hp = EQ
@@ -576,12 +538,12 @@ instance Ord GlobalReg where
compare _ (DoubleReg _) = GT
compare (LongReg _) _ = LT
compare _ (LongReg _) = GT
- compare (XmmReg _ _ _ _) _ = LT
- compare _ (XmmReg _ _ _ _) = GT
- compare (YmmReg _ _ _ _) _ = LT
- compare _ (YmmReg _ _ _ _) = GT
- compare (ZmmReg _ _ _ _) _ = LT
- compare _ (ZmmReg _ _ _ _) = GT
+ compare (XmmReg _) _ = LT
+ compare _ (XmmReg _) = GT
+ compare (YmmReg _) _ = LT
+ compare _ (YmmReg _) = GT
+ compare (ZmmReg _) _ = LT
+ compare _ (ZmmReg _) = GT
compare Sp _ = LT
compare _ Sp = GT
compare SpLim _ = LT
@@ -634,15 +596,12 @@ globalRegType dflags (VanillaReg _ VNonGcPtr) = bWord dflags
globalRegType _ (FloatReg _) = cmmFloat W32
globalRegType _ (DoubleReg _) = cmmFloat W64
globalRegType _ (LongReg _) = cmmBits W64
-globalRegType _ (XmmReg _ l w ty) = case ty of
- Integer -> cmmVec l (cmmBits w)
- Float -> cmmVec l (cmmFloat w)
-globalRegType _ (YmmReg _ l w ty) = case ty of
- Integer -> cmmVec l (cmmBits w)
- Float -> cmmVec l (cmmFloat w)
-globalRegType _ (ZmmReg _ l w ty) = case ty of
- Integer -> cmmVec l (cmmBits w)
- Float -> cmmVec l (cmmFloat w)
+-- TODO: improve the internal model of SIMD/vectorized registers
+-- the right design SHOULd improve handling of float and double code too.
+-- see remarks in "NOTE [SIMD Design for the future]"" in StgCmmPrim
+globalRegType _ (XmmReg _) = cmmVec 4 (cmmBits W32)
+globalRegType _ (YmmReg _) = cmmVec 8 (cmmBits W32)
+globalRegType _ (ZmmReg _) = cmmVec 16 (cmmBits W32)
globalRegType dflags Hp = gcWord dflags
-- The initialiser for all
diff --git a/compiler/cmm/CmmLint.hs b/compiler/cmm/CmmLint.hs
index 53dcd70b7b..d5c3f84443 100644
--- a/compiler/cmm/CmmLint.hs
+++ b/compiler/cmm/CmmLint.hs
@@ -148,13 +148,9 @@ lintCmmMiddle node = case node of
dflags <- getDynFlags
erep <- lintCmmExpr expr
let reg_ty = cmmRegType dflags reg
- case isVecCatType reg_ty of
- True -> if ((typeWidth reg_ty) == (typeWidth erep))
- then return ()
- else cmmLintAssignErr (CmmAssign reg expr) erep reg_ty
- _ -> if (erep `cmmEqType_ignoring_ptrhood` reg_ty)
- then return ()
- else cmmLintAssignErr (CmmAssign reg expr) erep reg_ty
+ if (erep `cmmEqType_ignoring_ptrhood` reg_ty)
+ then return ()
+ else cmmLintAssignErr (CmmAssign reg expr) erep reg_ty
CmmStore l r -> do
_ <- lintCmmExpr l
diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs
index 38d9edb480..9740d21bef 100644
--- a/compiler/cmm/CmmMachOp.hs
+++ b/compiler/cmm/CmmMachOp.hs
@@ -136,9 +136,8 @@ data MachOp
| MO_VU_Rem Length Width
-- Floting point vector element insertion and extraction operations
- | MO_VF_Broadcast Length Width -- Broadcast a scalar into a vector
- | MO_VF_Insert Length Width -- Insert scalar into vector
- | MO_VF_Extract Length Width -- Extract scalar from vector
+ | MO_VF_Insert Length Width -- Insert scalar into vector
+ | MO_VF_Extract Length Width -- Extract scalar from vector
-- Floating point vector operations
| MO_VF_Add Length Width
@@ -431,7 +430,6 @@ machOpResultType dflags mop tys =
MO_VU_Quot l w -> cmmVec l (cmmBits w)
MO_VU_Rem l w -> cmmVec l (cmmBits w)
- MO_VF_Broadcast l w -> cmmVec l (cmmFloat w)
MO_VF_Insert l w -> cmmVec l (cmmFloat w)
MO_VF_Extract _ w -> cmmFloat w
@@ -524,21 +522,16 @@ machOpArgReps dflags op =
MO_VU_Quot _ r -> [r,r]
MO_VU_Rem _ r -> [r,r]
- -- offset is always W32 as mentioned in StgCmmPrim.hs
- MO_VF_Broadcast l r -> [vecwidth l r, r]
- MO_VF_Insert l r -> [vecwidth l r, r, W32]
- MO_VF_Extract l r -> [vecwidth l r, W32]
+ MO_VF_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,wordWidth dflags]
+ MO_VF_Extract l r -> [typeWidth (vec l (cmmFloat r)),wordWidth dflags]
- -- NOTE: The below is owing to the fact that floats use the SSE registers
- MO_VF_Add l w -> [vecwidth l w, vecwidth l w]
- MO_VF_Sub l w -> [vecwidth l w, vecwidth l w]
- MO_VF_Mul l w -> [vecwidth l w, vecwidth l w]
- MO_VF_Quot l w -> [vecwidth l w, vecwidth l w]
- MO_VF_Neg l w -> [vecwidth l w]
+ MO_VF_Add _ r -> [r,r]
+ MO_VF_Sub _ r -> [r,r]
+ MO_VF_Mul _ r -> [r,r]
+ MO_VF_Quot _ r -> [r,r]
+ MO_VF_Neg _ r -> [r]
MO_AlignmentCheck _ r -> [r]
- where
- vecwidth l w = widthFromBytes (l*widthInBytes w)
-----------------------------------------------------------------------------
-- CallishMachOp
diff --git a/compiler/cmm/CmmType.hs b/compiler/cmm/CmmType.hs
index 17b588720f..43d23c7ee7 100644
--- a/compiler/cmm/CmmType.hs
+++ b/compiler/cmm/CmmType.hs
@@ -6,7 +6,6 @@ module CmmType
, typeWidth, cmmEqType, cmmEqType_ignoring_ptrhood
, isFloatType, isGcPtrType, isBitsType
, isWord32, isWord64, isFloat64, isFloat32
- , isVecCatType
, Width(..)
, widthInBits, widthInBytes, widthInLog, widthFromBytes
@@ -134,7 +133,7 @@ cInt :: DynFlags -> CmmType
cInt dflags = cmmBits (cIntWidth dflags)
------------ Predicates ----------------
-isFloatType, isGcPtrType, isBitsType, isVecCatType :: CmmType -> Bool
+isFloatType, isGcPtrType, isBitsType :: CmmType -> Bool
isFloatType (CmmType FloatCat _) = True
isFloatType _other = False
@@ -144,9 +143,6 @@ isGcPtrType _other = False
isBitsType (CmmType BitsCat _) = True
isBitsType _ = False
-isVecCatType (CmmType (VecCat _ _) _) = True
-isVecCatType _other = False
-
isWord32, isWord64, isFloat32, isFloat64 :: CmmType -> Bool
-- isWord64 is true of 64-bit non-floats (both gc-ptrs and otherwise)
-- isFloat32 and 64 are obvious
diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs
index a60a26229b..7227edd57e 100644
--- a/compiler/cmm/PprC.hs
+++ b/compiler/cmm/PprC.hs
@@ -713,10 +713,6 @@ pprMachOp_for_C mop = case mop of
(panic $ "PprC.pprMachOp_for_C: MO_VU_Rem"
++ " should have been handled earlier!")
- MO_VF_Broadcast {} -> pprTrace "offending mop:"
- (text "MO_VF_Broadcast")
- (panic $ "PprC.pprMachOp_for_C: MO_VF_Broadcast"
- ++ " should have been handled earlier!")
MO_VF_Insert {} -> pprTrace "offending mop:"
(text "MO_VF_Insert")
(panic $ "PprC.pprMachOp_for_C: MO_VF_Insert"
diff --git a/compiler/cmm/PprCmmExpr.hs b/compiler/cmm/PprCmmExpr.hs
index 2080c1f5d8..7bf73f1ca6 100644
--- a/compiler/cmm/PprCmmExpr.hs
+++ b/compiler/cmm/PprCmmExpr.hs
@@ -261,9 +261,9 @@ pprGlobalReg gr
FloatReg n -> char 'F' <> int n
DoubleReg n -> char 'D' <> int n
LongReg n -> char 'L' <> int n
- XmmReg n _ _ _ -> text "XMM" <> int n
- YmmReg n _ _ _ -> text "YMM" <> int n
- ZmmReg n _ _ _ -> text "ZMM" <> int n
+ XmmReg n -> text "XMM" <> int n
+ YmmReg n -> text "YMM" <> int n
+ ZmmReg n -> text "ZMM" <> int n
Sp -> text "Sp"
SpLim -> text "SpLim"
Hp -> text "Hp"