diff options
Diffstat (limited to 'compiler/cmm')
-rw-r--r-- | compiler/cmm/CmmCallConv.hs | 40 | ||||
-rw-r--r-- | compiler/cmm/CmmExpr.hs | 83 | ||||
-rw-r--r-- | compiler/cmm/CmmLint.hs | 10 | ||||
-rw-r--r-- | compiler/cmm/CmmMachOp.hs | 25 | ||||
-rw-r--r-- | compiler/cmm/CmmType.hs | 6 | ||||
-rw-r--r-- | compiler/cmm/PprC.hs | 4 | ||||
-rw-r--r-- | compiler/cmm/PprCmmExpr.hs | 6 |
7 files changed, 52 insertions, 122 deletions
diff --git a/compiler/cmm/CmmCallConv.hs b/compiler/cmm/CmmCallConv.hs index 6df910edfa..4e6a9d293a 100644 --- a/compiler/cmm/CmmCallConv.hs +++ b/compiler/cmm/CmmCallConv.hs @@ -64,20 +64,13 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments) assign_regs assts (r:rs) regs | isVecType ty = vec | isFloatType ty = float | otherwise = int - where vec = case regs of - (vs, fs, ds, ls, s:ss) - | passVectorInReg w dflags - -> let elt_ty = vecElemType ty - reg_ty = if isFloatType elt_ty - then Float else Integer - reg_class = case w of - W128 -> XmmReg - W256 -> YmmReg - W512 -> ZmmReg - _ -> panic "CmmCallConv.assignArgumentsPos: Invalid vector width" - in k (RegisterParam - (reg_class s (vecLength ty) (typeWidth elt_ty) reg_ty), - (vs, fs, ds, ls, ss)) + where vec = case (w, regs) of + (W128, (vs, fs, ds, ls, s:ss)) + | passVectorInReg W128 dflags -> k (RegisterParam (XmmReg s), (vs, fs, ds, ls, ss)) + (W256, (vs, fs, ds, ls, s:ss)) + | passVectorInReg W256 dflags -> k (RegisterParam (YmmReg s), (vs, fs, ds, ls, ss)) + (W512, (vs, fs, ds, ls, s:ss)) + | passVectorInReg W512 dflags -> k (RegisterParam (ZmmReg s), (vs, fs, ds, ls, ss)) _ -> (assts, (r:rs)) float = case (w, regs) of (W32, (vs, fs, ds, ls, s:ss)) @@ -96,7 +89,6 @@ assignArgumentsPos dflags off conv arg_ty reps = (stk_off, assignments) (_, (vs, fs, ds, l:ls, ss)) | widthInBits w > widthInBits (wordWidth dflags) -> k (RegisterParam l, (vs, fs, ds, ls, ss)) _ -> (assts, (r:rs)) - k (asst, regs') = assign_regs ((r, asst) : assts) rs regs' ty = arg_ty r w = typeWidth ty @@ -210,13 +202,11 @@ nodeOnly = ([VanillaReg 1], [], [], [], []) -- only use this functionality in hand-written C-- code in the RTS. realArgRegsCover :: DynFlags -> [GlobalReg] realArgRegsCover dflags - | passFloatArgsInXmm dflags - = map ($VGcPtr) (realVanillaRegs dflags) ++ - realLongRegs dflags ++ - map (\x -> XmmReg x 2 W64 Integer) (realXmmRegNos dflags) - | otherwise - = map ($VGcPtr) (realVanillaRegs dflags) ++ - realFloatRegs dflags ++ - realDoubleRegs dflags ++ - realLongRegs dflags ++ - map (\x -> XmmReg x 2 W64 Integer) (realXmmRegNos dflags) + | passFloatArgsInXmm dflags = map ($VGcPtr) (realVanillaRegs dflags) ++ + realLongRegs dflags ++ + map XmmReg (realXmmRegNos dflags) + | otherwise = map ($VGcPtr) (realVanillaRegs dflags) ++ + realFloatRegs dflags ++ + realDoubleRegs dflags ++ + realLongRegs dflags ++ + map XmmReg (realXmmRegNos dflags) diff --git a/compiler/cmm/CmmExpr.hs b/compiler/cmm/CmmExpr.hs index 79eaf8f89c..901df5d908 100644 --- a/compiler/cmm/CmmExpr.hs +++ b/compiler/cmm/CmmExpr.hs @@ -14,7 +14,6 @@ module CmmExpr , currentTSOReg, currentNurseryReg, hpAllocReg, cccsReg , node, baseReg , VGcPtr(..) - , GlobalVecRegTy(..) , DefinerOfRegs, UserOfRegs , foldRegsDefd, foldRegsUsed @@ -42,7 +41,6 @@ import Outputable (panic) import Unique import Data.Set (Set) -import Data.Monoid ((<>)) import qualified Data.Set as Set import BasicTypes (Alignment, mkAlignment, alignmentOf) @@ -394,7 +392,6 @@ data VGcPtr = VGcPtr | VNonGcPtr deriving( Eq, Show ) ----------------------------------------------------------------------------- {- Note [Overlapping global registers] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The backend might not faithfully implement the abstraction of the STG machine with independent registers for different values of type @@ -416,26 +413,6 @@ on a particular platform. The instance Eq GlobalReg is syntactic equality of STG registers and does not take overlap into account. However it is still used in UserOfRegs/DefinerOfRegs and there are likely still bugs there, beware! - - -Note [SIMD registers] -~~~~~~~~~~~~~~~~~~~~~ - -GHC's treatment of SIMD registers is heavily modelled after the x86_64 -architecture. Namely we have 128- (XMM), 256- (YMM), and 512-bit (ZMM) -registers. Furthermore, we treat each possible format in these registers as a -distinct register which overlaps with the others. For instance, we XMM1 as a -2xI64 register is distinct from but overlaps with (in the sense defined in Note -[Overlapping global registers]) its use as a 4xI32 register. - -This model makes it easier to fit SIMD registers into the NCG, which generally -expects that each global register has a single, known CmmType. - -In the future we could consider further refactoring this to eliminate the -XMM, YMM, and ZMM register names (which are quite x86-specific) and instead just -having a set of NxM-bit vector registers (e.g. Vec2x64A, Vec2x64B, ..., -Vec4x32A, ..., Vec4x64A). - -} data GlobalReg @@ -455,15 +432,12 @@ data GlobalReg | XmmReg -- 128-bit SIMD vector register {-# UNPACK #-} !Int -- its number - !Length !Width !GlobalVecRegTy | YmmReg -- 256-bit SIMD vector register {-# UNPACK #-} !Int -- its number - !Length !Width !GlobalVecRegTy | ZmmReg -- 512-bit SIMD vector register {-# UNPACK #-} !Int -- its number - !Length !Width !GlobalVecRegTy -- STG registers | Sp -- Stack ptr; points to last occupied stack location. @@ -504,17 +478,17 @@ data GlobalReg deriving( Show ) -data GlobalVecRegTy = Integer | Float - deriving (Show, Eq, Ord) - instance Eq GlobalReg where VanillaReg i _ == VanillaReg j _ = i==j -- Ignore type when seeking clashes FloatReg i == FloatReg j = i==j DoubleReg i == DoubleReg j = i==j LongReg i == LongReg j = i==j - XmmReg i l w grt == XmmReg j l' w' grt' = i==j && l == l' && w == w' && grt == grt' - YmmReg i l w grt == YmmReg j l' w' grt' = i==j && l == l' && w == w' && grt == grt' - ZmmReg i l w grt == ZmmReg j l' w' grt' = i==j && l == l' && w == w' && grt == grt' + -- NOTE: XMM, YMM, ZMM registers actually are the same registers + -- at least with respect to store at YMM i and then read from XMM i + -- and similarly for ZMM etc. + XmmReg i == XmmReg j = i==j + YmmReg i == YmmReg j = i==j + ZmmReg i == ZmmReg j = i==j Sp == Sp = True SpLim == SpLim = True Hp == Hp = True @@ -538,21 +512,9 @@ instance Ord GlobalReg where compare (FloatReg i) (FloatReg j) = compare i j compare (DoubleReg i) (DoubleReg j) = compare i j compare (LongReg i) (LongReg j) = compare i j - compare (XmmReg i l w grt) - (XmmReg j l' w' grt') = compare i j - <> compare l l' - <> compare w w' - <> compare grt grt' - compare (YmmReg i l w grt) - (YmmReg j l' w' grt') = compare i j - <> compare l l' - <> compare w w' - <> compare grt grt' - compare (ZmmReg i l w grt) - (ZmmReg j l' w' grt') = compare i j - <> compare l l' - <> compare w w' - <> compare grt grt' + compare (XmmReg i) (XmmReg j) = compare i j + compare (YmmReg i) (YmmReg j) = compare i j + compare (ZmmReg i) (ZmmReg j) = compare i j compare Sp Sp = EQ compare SpLim SpLim = EQ compare Hp Hp = EQ @@ -576,12 +538,12 @@ instance Ord GlobalReg where compare _ (DoubleReg _) = GT compare (LongReg _) _ = LT compare _ (LongReg _) = GT - compare (XmmReg _ _ _ _) _ = LT - compare _ (XmmReg _ _ _ _) = GT - compare (YmmReg _ _ _ _) _ = LT - compare _ (YmmReg _ _ _ _) = GT - compare (ZmmReg _ _ _ _) _ = LT - compare _ (ZmmReg _ _ _ _) = GT + compare (XmmReg _) _ = LT + compare _ (XmmReg _) = GT + compare (YmmReg _) _ = LT + compare _ (YmmReg _) = GT + compare (ZmmReg _) _ = LT + compare _ (ZmmReg _) = GT compare Sp _ = LT compare _ Sp = GT compare SpLim _ = LT @@ -634,15 +596,12 @@ globalRegType dflags (VanillaReg _ VNonGcPtr) = bWord dflags globalRegType _ (FloatReg _) = cmmFloat W32 globalRegType _ (DoubleReg _) = cmmFloat W64 globalRegType _ (LongReg _) = cmmBits W64 -globalRegType _ (XmmReg _ l w ty) = case ty of - Integer -> cmmVec l (cmmBits w) - Float -> cmmVec l (cmmFloat w) -globalRegType _ (YmmReg _ l w ty) = case ty of - Integer -> cmmVec l (cmmBits w) - Float -> cmmVec l (cmmFloat w) -globalRegType _ (ZmmReg _ l w ty) = case ty of - Integer -> cmmVec l (cmmBits w) - Float -> cmmVec l (cmmFloat w) +-- TODO: improve the internal model of SIMD/vectorized registers +-- the right design SHOULd improve handling of float and double code too. +-- see remarks in "NOTE [SIMD Design for the future]"" in StgCmmPrim +globalRegType _ (XmmReg _) = cmmVec 4 (cmmBits W32) +globalRegType _ (YmmReg _) = cmmVec 8 (cmmBits W32) +globalRegType _ (ZmmReg _) = cmmVec 16 (cmmBits W32) globalRegType dflags Hp = gcWord dflags -- The initialiser for all diff --git a/compiler/cmm/CmmLint.hs b/compiler/cmm/CmmLint.hs index 53dcd70b7b..d5c3f84443 100644 --- a/compiler/cmm/CmmLint.hs +++ b/compiler/cmm/CmmLint.hs @@ -148,13 +148,9 @@ lintCmmMiddle node = case node of dflags <- getDynFlags erep <- lintCmmExpr expr let reg_ty = cmmRegType dflags reg - case isVecCatType reg_ty of - True -> if ((typeWidth reg_ty) == (typeWidth erep)) - then return () - else cmmLintAssignErr (CmmAssign reg expr) erep reg_ty - _ -> if (erep `cmmEqType_ignoring_ptrhood` reg_ty) - then return () - else cmmLintAssignErr (CmmAssign reg expr) erep reg_ty + if (erep `cmmEqType_ignoring_ptrhood` reg_ty) + then return () + else cmmLintAssignErr (CmmAssign reg expr) erep reg_ty CmmStore l r -> do _ <- lintCmmExpr l diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs index 38d9edb480..9740d21bef 100644 --- a/compiler/cmm/CmmMachOp.hs +++ b/compiler/cmm/CmmMachOp.hs @@ -136,9 +136,8 @@ data MachOp | MO_VU_Rem Length Width -- Floting point vector element insertion and extraction operations - | MO_VF_Broadcast Length Width -- Broadcast a scalar into a vector - | MO_VF_Insert Length Width -- Insert scalar into vector - | MO_VF_Extract Length Width -- Extract scalar from vector + | MO_VF_Insert Length Width -- Insert scalar into vector + | MO_VF_Extract Length Width -- Extract scalar from vector -- Floating point vector operations | MO_VF_Add Length Width @@ -431,7 +430,6 @@ machOpResultType dflags mop tys = MO_VU_Quot l w -> cmmVec l (cmmBits w) MO_VU_Rem l w -> cmmVec l (cmmBits w) - MO_VF_Broadcast l w -> cmmVec l (cmmFloat w) MO_VF_Insert l w -> cmmVec l (cmmFloat w) MO_VF_Extract _ w -> cmmFloat w @@ -524,21 +522,16 @@ machOpArgReps dflags op = MO_VU_Quot _ r -> [r,r] MO_VU_Rem _ r -> [r,r] - -- offset is always W32 as mentioned in StgCmmPrim.hs - MO_VF_Broadcast l r -> [vecwidth l r, r] - MO_VF_Insert l r -> [vecwidth l r, r, W32] - MO_VF_Extract l r -> [vecwidth l r, W32] + MO_VF_Insert l r -> [typeWidth (vec l (cmmFloat r)),r,wordWidth dflags] + MO_VF_Extract l r -> [typeWidth (vec l (cmmFloat r)),wordWidth dflags] - -- NOTE: The below is owing to the fact that floats use the SSE registers - MO_VF_Add l w -> [vecwidth l w, vecwidth l w] - MO_VF_Sub l w -> [vecwidth l w, vecwidth l w] - MO_VF_Mul l w -> [vecwidth l w, vecwidth l w] - MO_VF_Quot l w -> [vecwidth l w, vecwidth l w] - MO_VF_Neg l w -> [vecwidth l w] + MO_VF_Add _ r -> [r,r] + MO_VF_Sub _ r -> [r,r] + MO_VF_Mul _ r -> [r,r] + MO_VF_Quot _ r -> [r,r] + MO_VF_Neg _ r -> [r] MO_AlignmentCheck _ r -> [r] - where - vecwidth l w = widthFromBytes (l*widthInBytes w) ----------------------------------------------------------------------------- -- CallishMachOp diff --git a/compiler/cmm/CmmType.hs b/compiler/cmm/CmmType.hs index 17b588720f..43d23c7ee7 100644 --- a/compiler/cmm/CmmType.hs +++ b/compiler/cmm/CmmType.hs @@ -6,7 +6,6 @@ module CmmType , typeWidth, cmmEqType, cmmEqType_ignoring_ptrhood , isFloatType, isGcPtrType, isBitsType , isWord32, isWord64, isFloat64, isFloat32 - , isVecCatType , Width(..) , widthInBits, widthInBytes, widthInLog, widthFromBytes @@ -134,7 +133,7 @@ cInt :: DynFlags -> CmmType cInt dflags = cmmBits (cIntWidth dflags) ------------ Predicates ---------------- -isFloatType, isGcPtrType, isBitsType, isVecCatType :: CmmType -> Bool +isFloatType, isGcPtrType, isBitsType :: CmmType -> Bool isFloatType (CmmType FloatCat _) = True isFloatType _other = False @@ -144,9 +143,6 @@ isGcPtrType _other = False isBitsType (CmmType BitsCat _) = True isBitsType _ = False -isVecCatType (CmmType (VecCat _ _) _) = True -isVecCatType _other = False - isWord32, isWord64, isFloat32, isFloat64 :: CmmType -> Bool -- isWord64 is true of 64-bit non-floats (both gc-ptrs and otherwise) -- isFloat32 and 64 are obvious diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs index a60a26229b..7227edd57e 100644 --- a/compiler/cmm/PprC.hs +++ b/compiler/cmm/PprC.hs @@ -713,10 +713,6 @@ pprMachOp_for_C mop = case mop of (panic $ "PprC.pprMachOp_for_C: MO_VU_Rem" ++ " should have been handled earlier!") - MO_VF_Broadcast {} -> pprTrace "offending mop:" - (text "MO_VF_Broadcast") - (panic $ "PprC.pprMachOp_for_C: MO_VF_Broadcast" - ++ " should have been handled earlier!") MO_VF_Insert {} -> pprTrace "offending mop:" (text "MO_VF_Insert") (panic $ "PprC.pprMachOp_for_C: MO_VF_Insert" diff --git a/compiler/cmm/PprCmmExpr.hs b/compiler/cmm/PprCmmExpr.hs index 2080c1f5d8..7bf73f1ca6 100644 --- a/compiler/cmm/PprCmmExpr.hs +++ b/compiler/cmm/PprCmmExpr.hs @@ -261,9 +261,9 @@ pprGlobalReg gr FloatReg n -> char 'F' <> int n DoubleReg n -> char 'D' <> int n LongReg n -> char 'L' <> int n - XmmReg n _ _ _ -> text "XMM" <> int n - YmmReg n _ _ _ -> text "YMM" <> int n - ZmmReg n _ _ _ -> text "ZMM" <> int n + XmmReg n -> text "XMM" <> int n + YmmReg n -> text "YMM" <> int n + ZmmReg n -> text "ZMM" <> int n Sp -> text "Sp" SpLim -> text "SpLim" Hp -> text "Hp" |