From 99ea5f2cfa09f50bf3ea105821dc095942552e59 Mon Sep 17 00:00:00 2001 From: Ben Gamari Date: Thu, 27 Jan 2022 14:47:33 -0500 Subject: Introduce alignment to CmmStore --- compiler/GHC/Cmm/CommonBlockElim.hs | 4 +- compiler/GHC/Cmm/Graph.hs | 3 +- compiler/GHC/Cmm/LayoutStack.hs | 9 ++- compiler/GHC/Cmm/Lint.hs | 2 +- compiler/GHC/Cmm/Node.hs | 12 ++-- compiler/GHC/Cmm/Ppr.hs | 5 +- compiler/GHC/Cmm/Sink.hs | 2 +- compiler/GHC/CmmToAsm.hs | 4 +- compiler/GHC/CmmToAsm/AArch64/CodeGen.hs | 2 +- compiler/GHC/CmmToAsm/PPC/CodeGen.hs | 2 +- compiler/GHC/CmmToAsm/X86/CodeGen.hs | 2 +- compiler/GHC/CmmToC.hs | 7 +-- compiler/GHC/CmmToLlvm/CodeGen.hs | 99 ++++++++++++++++++-------------- compiler/GHC/Driver/GenerateCgIPEStub.hs | 2 +- compiler/GHC/Llvm/Ppr.hs | 17 +++--- compiler/GHC/Llvm/Syntax.hs | 2 +- compiler/GHC/StgToCmm/CgUtils.hs | 2 +- compiler/GHC/StgToCmm/Monad.hs | 3 +- 18 files changed, 99 insertions(+), 80 deletions(-) diff --git a/compiler/GHC/Cmm/CommonBlockElim.hs b/compiler/GHC/Cmm/CommonBlockElim.hs index b18730ed79..7dd4119f83 100644 --- a/compiler/GHC/Cmm/CommonBlockElim.hs +++ b/compiler/GHC/Cmm/CommonBlockElim.hs @@ -144,7 +144,7 @@ hash_block block = hash_node :: CmmNode O x -> Word32 hash_node n | dont_care n = 0 -- don't care hash_node (CmmAssign r e) = hash_reg r + hash_e e - hash_node (CmmStore e e') = hash_e e + hash_e e' + hash_node (CmmStore e e' _) = hash_e e + hash_e e' hash_node (CmmUnsafeForeignCall t _ as) = hash_tgt t + hash_list hash_e as hash_node (CmmBranch _) = 23 -- NB. ignore the label hash_node (CmmCondBranch p _ _ _) = hash_e p @@ -210,7 +210,7 @@ eqMiddleWith :: (BlockId -> BlockId -> Bool) -> CmmNode O O -> CmmNode O O -> Bool eqMiddleWith eqBid (CmmAssign r1 e1) (CmmAssign r2 e2) = r1 == r2 && eqExprWith eqBid e1 e2 -eqMiddleWith eqBid (CmmStore l1 r1) (CmmStore l2 r2) +eqMiddleWith eqBid (CmmStore l1 r1 _) (CmmStore l2 r2 _) = eqExprWith eqBid l1 l2 && eqExprWith eqBid r1 r2 eqMiddleWith eqBid (CmmUnsafeForeignCall t1 r1 a1) (CmmUnsafeForeignCall t2 r2 a2) diff --git a/compiler/GHC/Cmm/Graph.hs b/compiler/GHC/Cmm/Graph.hs index 6761821951..d59658e2af 100644 --- a/compiler/GHC/Cmm/Graph.hs +++ b/compiler/GHC/Cmm/Graph.hs @@ -193,8 +193,9 @@ mkAssign :: CmmReg -> CmmExpr -> CmmAGraph mkAssign l (CmmReg r) | l == r = mkNop mkAssign l r = mkMiddle $ CmmAssign l r +-- | Assumes natural alignment mkStore :: CmmExpr -> CmmExpr -> CmmAGraph -mkStore l r = mkMiddle $ CmmStore l r +mkStore l r = mkMiddle $ CmmStore l r NaturallyAligned ---------- Control transfer mkJump :: Profile -> Convention -> CmmExpr diff --git a/compiler/GHC/Cmm/LayoutStack.hs b/compiler/GHC/Cmm/LayoutStack.hs index 0a01081634..0d759f5559 100644 --- a/compiler/GHC/Cmm/LayoutStack.hs +++ b/compiler/GHC/Cmm/LayoutStack.hs @@ -605,7 +605,8 @@ fixupStack old_stack new_stack = concatMap move new_locs move (r,n) | Just (_,m) <- lookupUFM old_map r, n == m = [] | otherwise = [CmmStore (CmmStackSlot Old n) - (CmmReg (CmmLocal r))] + (CmmReg (CmmLocal r)) + NaturallyAligned] @@ -703,7 +704,7 @@ setupStackFrame platform lbl liveness updfr_off ret_args stack0 futureContinuation :: Block CmmNode O O -> Maybe BlockId futureContinuation middle = foldBlockNodesB f middle Nothing where f :: CmmNode a b -> Maybe BlockId -> Maybe BlockId - f (CmmStore (CmmStackSlot (Young l) _) (CmmLit (CmmBlock _))) _ + f (CmmStore (CmmStackSlot (Young l) _) (CmmLit (CmmBlock _)) _) _ = Just l f _ r = r @@ -752,6 +753,7 @@ allocate platform ret_off live stackmap@StackMap{ sm_sp = sp0 select_save to_save (slot:stack) -> let assig = CmmStore (CmmStackSlot Old n') (CmmReg (CmmLocal r)) + NaturallyAligned n' = plusW platform n 1 in (to_save', stack', n', assig : assigs, (r,(r,n')):regs) @@ -786,6 +788,7 @@ allocate platform ret_off live stackmap@StackMap{ sm_sp = sp0 n' = n + localRegBytes platform r assig = CmmStore (CmmStackSlot Old n') (CmmReg (CmmLocal r)) + NaturallyAligned trim_sp | not (null push_regs) = push_sp @@ -995,7 +998,7 @@ elimStackStores stackmap stackmaps area_off nodes go _stackmap [] = [] go stackmap (n:ns) = case n of - CmmStore (CmmStackSlot area m) (CmmReg (CmmLocal r)) + CmmStore (CmmStackSlot area m) (CmmReg (CmmLocal r)) _ | Just (_,off) <- lookupUFM (sm_regs stackmap) r , area_off area + m == off -> go stackmap ns diff --git a/compiler/GHC/Cmm/Lint.hs b/compiler/GHC/Cmm/Lint.hs index 2c3c605240..e76be551f9 100644 --- a/compiler/GHC/Cmm/Lint.hs +++ b/compiler/GHC/Cmm/Lint.hs @@ -174,7 +174,7 @@ lintCmmMiddle node = case node of then return () else cmmLintAssignErr (CmmAssign reg expr) erep reg_ty - CmmStore l r -> do + CmmStore l r _alignment -> do _ <- lintCmmExpr l _ <- lintCmmExpr r return () diff --git a/compiler/GHC/Cmm/Node.hs b/compiler/GHC/Cmm/Node.hs index 40b268931d..841c726b14 100644 --- a/compiler/GHC/Cmm/Node.hs +++ b/compiler/GHC/Cmm/Node.hs @@ -74,7 +74,7 @@ data CmmNode e x where CmmAssign :: !CmmReg -> !CmmExpr -> CmmNode O O -- Assign to register - CmmStore :: !CmmExpr -> !CmmExpr -> CmmNode O O + CmmStore :: !CmmExpr -> !CmmExpr -> !AlignmentSpec -> CmmNode O O -- Assign to memory location. Size is -- given by cmmExprType of the rhs. @@ -322,7 +322,7 @@ instance UserOfRegs LocalReg (CmmNode e x) where {-# INLINEABLE foldRegsUsed #-} foldRegsUsed platform f !z n = case n of CmmAssign _ expr -> fold f z expr - CmmStore addr rval -> fold f (fold f z addr) rval + CmmStore addr rval _ -> fold f (fold f z addr) rval CmmUnsafeForeignCall t _ args -> fold f (fold f z t) args CmmCondBranch expr _ _ _ -> fold f z expr CmmSwitch expr _ -> fold f z expr @@ -337,7 +337,7 @@ instance UserOfRegs GlobalReg (CmmNode e x) where {-# INLINEABLE foldRegsUsed #-} foldRegsUsed platform f !z n = case n of CmmAssign _ expr -> fold f z expr - CmmStore addr rval -> fold f (fold f z addr) rval + CmmStore addr rval _ -> fold f (fold f z addr) rval CmmUnsafeForeignCall t _ args -> fold f (fold f z t) args CmmCondBranch expr _ _ _ -> fold f z expr CmmSwitch expr _ -> fold f z expr @@ -474,7 +474,7 @@ mapExp _ m@(CmmComment _) = m mapExp _ m@(CmmTick _) = m mapExp f (CmmUnwind regs) = CmmUnwind (map (fmap (fmap f)) regs) mapExp f (CmmAssign r e) = CmmAssign r (f e) -mapExp f (CmmStore addr e) = CmmStore (f addr) (f e) +mapExp f (CmmStore addr e align) = CmmStore (f addr) (f e) align mapExp f (CmmUnsafeForeignCall tgt fs as) = CmmUnsafeForeignCall (mapForeignTarget f tgt) fs (map f as) mapExp _ l@(CmmBranch _) = l mapExp f (CmmCondBranch e ti fi l) = CmmCondBranch (f e) ti fi l @@ -505,7 +505,7 @@ mapExpM _ (CmmComment _) = Nothing mapExpM _ (CmmTick _) = Nothing mapExpM f (CmmUnwind regs) = CmmUnwind `fmap` mapM (\(r,e) -> mapM f e >>= \e' -> pure (r,e')) regs mapExpM f (CmmAssign r e) = CmmAssign r `fmap` f e -mapExpM f (CmmStore addr e) = (\[addr', e'] -> CmmStore addr' e') `fmap` mapListM f [addr, e] +mapExpM f (CmmStore addr e align) = (\[addr', e'] -> CmmStore addr' e' align) `fmap` mapListM f [addr, e] mapExpM _ (CmmBranch _) = Nothing mapExpM f (CmmCondBranch e ti fi l) = (\x -> CmmCondBranch x ti fi l) `fmap` f e mapExpM f (CmmSwitch e tbl) = (\x -> CmmSwitch x tbl) `fmap` f e @@ -558,7 +558,7 @@ foldExp _ (CmmComment {}) z = z foldExp _ (CmmTick {}) z = z foldExp f (CmmUnwind xs) z = foldr (maybe id f) z (map snd xs) foldExp f (CmmAssign _ e) z = f e z -foldExp f (CmmStore addr e) z = f addr $ f e z +foldExp f (CmmStore addr e _) z = f addr $ f e z foldExp f (CmmUnsafeForeignCall t _ as) z = foldr f (foldExpForeignTarget f t z) as foldExp _ (CmmBranch _) z = z foldExp f (CmmCondBranch e _ _ _) z = f e z diff --git a/compiler/GHC/Cmm/Ppr.hs b/compiler/GHC/Cmm/Ppr.hs index 455a7d639a..c7a1579962 100644 --- a/compiler/GHC/Cmm/Ppr.hs +++ b/compiler/GHC/Cmm/Ppr.hs @@ -215,8 +215,11 @@ pprNode platform node = pp_node <+> pp_debug CmmAssign reg expr -> ppr reg <+> equals <+> pdoc platform expr <> semi -- rep[lv] = expr; - CmmStore lv expr -> rep <> brackets (pdoc platform lv) <+> equals <+> pdoc platform expr <> semi + CmmStore lv expr align -> rep <> align_mark <> brackets (pdoc platform lv) <+> equals <+> pdoc platform expr <> semi where + align_mark = case align of + Unaligned -> text "^" + NaturallyAligned -> empty rep = ppr ( cmmExprType platform expr ) -- call "ccall" foo(x, y)[r1, r2]; diff --git a/compiler/GHC/Cmm/Sink.hs b/compiler/GHC/Cmm/Sink.hs index 4d4cdb6d4c..84f9317f21 100644 --- a/compiler/GHC/Cmm/Sink.hs +++ b/compiler/GHC/Cmm/Sink.hs @@ -663,7 +663,7 @@ conflicts platform (r, rhs, addr) node | foldRegsUsed platform (\b r' -> r == r' || b) False node = True -- (3) a store to an address conflicts with a read of the same memory - | CmmStore addr' e <- node + | CmmStore addr' e _ <- node , memConflicts addr (loadAddr platform addr' (cmmExprWidth platform e)) = True -- (4) an assignment to Hp/Sp conflicts with a heap/stack read respectively diff --git a/compiler/GHC/CmmToAsm.hs b/compiler/GHC/CmmToAsm.hs index ddfcce8460..29ec7559f6 100644 --- a/compiler/GHC/CmmToAsm.hs +++ b/compiler/GHC/CmmToAsm.hs @@ -1023,10 +1023,10 @@ cmmStmtConFold stmt CmmReg reg' | reg == reg' -> CmmComment (fsLit "nop") new_src -> CmmAssign reg new_src - CmmStore addr src + CmmStore addr src align -> do addr' <- cmmExprConFold DataReference addr src' <- cmmExprConFold DataReference src - return $ CmmStore addr' src' + return $ CmmStore addr' src' align CmmCall { cml_target = addr } -> do addr' <- cmmExprConFold JumpReference addr diff --git a/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs b/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs index f645720de7..507d5243b9 100644 --- a/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs +++ b/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs @@ -290,7 +290,7 @@ stmtToInstrs bid stmt = do where ty = cmmRegType platform reg format = cmmTypeFormat ty - CmmStore addr src + CmmStore addr src _alignment | isFloatType ty -> assignMem_FltCode format addr src | otherwise -> assignMem_IntCode format addr src where ty = cmmExprType platform src diff --git a/compiler/GHC/CmmToAsm/PPC/CodeGen.hs b/compiler/GHC/CmmToAsm/PPC/CodeGen.hs index 430189d442..02308c59e0 100644 --- a/compiler/GHC/CmmToAsm/PPC/CodeGen.hs +++ b/compiler/GHC/CmmToAsm/PPC/CodeGen.hs @@ -174,7 +174,7 @@ stmtToInstrs stmt = do where ty = cmmRegType platform reg format = cmmTypeFormat ty - CmmStore addr src + CmmStore addr src _alignment | isFloatType ty -> assignMem_FltCode format addr src | target32Bit platform && isWord64 ty -> assignMem_I64Code addr src diff --git a/compiler/GHC/CmmToAsm/X86/CodeGen.hs b/compiler/GHC/CmmToAsm/X86/CodeGen.hs index 392d07c62e..bf799590cd 100644 --- a/compiler/GHC/CmmToAsm/X86/CodeGen.hs +++ b/compiler/GHC/CmmToAsm/X86/CodeGen.hs @@ -338,7 +338,7 @@ stmtToInstrs bid stmt = do where ty = cmmRegType platform reg format = cmmTypeFormat ty - CmmStore addr src + CmmStore addr src _alignment | isFloatType ty -> assignMem_FltCode format addr src | is32Bit && isWord64 ty -> assignMem_I64Code addr src | otherwise -> assignMem_IntCode format addr src diff --git a/compiler/GHC/CmmToC.hs b/compiler/GHC/CmmToC.hs index 65077dcc0b..9539208883 100644 --- a/compiler/GHC/CmmToC.hs +++ b/compiler/GHC/CmmToC.hs @@ -214,15 +214,14 @@ pprStmt platform stmt = CmmAssign dest src -> pprAssign platform dest src - CmmStore dest src + CmmStore dest src align | typeWidth rep == W64 && wordWidth platform /= W64 -> (if isFloatType rep then text "ASSIGN_DBL" else text "ASSIGN_Word64") <> parens (mkP_ <> pprExpr1 platform dest <> comma <> pprExpr platform src) <> semi | otherwise - -> hsep [ pprExpr platform (CmmLoad dest rep NaturallyAligned), equals, pprExpr platform src <> semi ] - -- TODO: Is this right? + -> hsep [ pprExpr platform (CmmLoad dest rep align), equals, pprExpr platform src <> semi ] where rep = cmmExprType platform src @@ -1271,7 +1270,7 @@ te_Lit _ = return () te_Stmt :: CmmNode e x -> TE () te_Stmt (CmmAssign r e) = te_Reg r >> te_Expr e -te_Stmt (CmmStore l r) = te_Expr l >> te_Expr r +te_Stmt (CmmStore l r _) = te_Expr l >> te_Expr r te_Stmt (CmmUnsafeForeignCall target rs es) = do te_Target target mapM_ te_temp rs diff --git a/compiler/GHC/CmmToLlvm/CodeGen.hs b/compiler/GHC/CmmToLlvm/CodeGen.hs index 49359939ea..08b1478331 100644 --- a/compiler/GHC/CmmToLlvm/CodeGen.hs +++ b/compiler/GHC/CmmToLlvm/CodeGen.hs @@ -126,7 +126,8 @@ stmtToInstrs stmt = case stmt of CmmUnwind {} -> return (nilOL, []) CmmAssign reg src -> genAssign reg src - CmmStore addr src -> genStore addr src + CmmStore addr src align + -> genStore addr src align CmmBranch id -> genBranch id CmmCondBranch arg true false likely @@ -207,7 +208,7 @@ genCall (PrimTarget (MO_UF_Conv w)) [dst] [e] = runStmtsDecls $ do castV <- lift $ mkLocalVar ty ve <- exprToVarW e statement $ Assignment castV $ Cast LM_Uitofp ve width - statement $ Store castV dstV + statement $ Store castV dstV Nothing genCall (PrimTarget (MO_UF_Conv _)) [_] args = panic $ "genCall: Too many arguments to MO_UF_Conv. " ++ @@ -263,12 +264,12 @@ genCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] = runStmtsDecls $ AMO_Or -> LAO_Or AMO_Xor -> LAO_Xor retVar <- doExprW targetTy $ AtomicRMW op ptrVar nVar SyncSeqCst - statement $ Store retVar dstVar + statement $ Store retVar dstVar Nothing genCall (PrimTarget (MO_AtomicRead _)) [dst] [addr] = runStmtsDecls $ do dstV <- getCmmRegW (CmmLocal dst) v1 <- genLoadW True addr (localRegType dst) NaturallyAligned - statement $ Store v1 dstV + statement $ Store v1 dstV Nothing genCall (PrimTarget (MO_Cmpxchg _width)) [dst] [addr, old, new] = runStmtsDecls $ do @@ -282,7 +283,7 @@ genCall (PrimTarget (MO_Cmpxchg _width)) retVar <- doExprW (LMStructU [targetTy,i1]) $ CmpXChg ptrVar oldVar newVar SyncSeqCst SyncSeqCst retVar' <- doExprW targetTy $ ExtractV retVar 0 - statement $ Store retVar' dstVar + statement $ Store retVar' dstVar Nothing genCall (PrimTarget (MO_Xchg _width)) [dst] [addr, val] = runStmtsDecls $ do dstV <- getCmmRegW (CmmLocal dst) :: WriterT LlvmAccum LlvmM LlvmVar @@ -292,7 +293,7 @@ genCall (PrimTarget (MO_Xchg _width)) [dst] [addr, val] = runStmtsDecls $ do ptrExpr = Cast LM_Inttoptr addrVar ptrTy ptrVar <- doExprW ptrTy ptrExpr resVar <- doExprW (getVarType valVar) (AtomicRMW LAO_Xchg ptrVar valVar SyncSeqCst) - statement $ Store resVar dstV + statement $ Store resVar dstV Nothing genCall (PrimTarget (MO_AtomicWrite _width)) [] [addr, val] = runStmtsDecls $ do addrVar <- exprToVarW addr @@ -352,8 +353,8 @@ genCall (PrimTarget (MO_U_Mul2 w)) [dstH, dstL] [lhs, rhs] = runStmtsDecls $ do retH <- doExprW width $ Cast LM_Trunc retShifted width dstRegL <- getCmmRegW (CmmLocal dstL) dstRegH <- getCmmRegW (CmmLocal dstH) - statement $ Store retL dstRegL - statement $ Store retH dstRegH + statement $ Store retL dstRegL Nothing + statement $ Store retH dstRegH Nothing genCall (PrimTarget (MO_S_Mul2 w)) [dstC, dstH, dstL] [lhs, rhs] = runStmtsDecls $ do let width = widthToLlvmInt w @@ -384,9 +385,9 @@ genCall (PrimTarget (MO_S_Mul2 w)) [dstC, dstH, dstL] [lhs, rhs] = runStmtsDecls dstRegL <- getCmmRegW (CmmLocal dstL) dstRegH <- getCmmRegW (CmmLocal dstH) dstRegC <- getCmmRegW (CmmLocal dstC) - statement $ Store retL dstRegL - statement $ Store retH dstRegH - statement $ Store retC dstRegC + statement $ Store retL dstRegL Nothing + statement $ Store retH dstRegH Nothing + statement $ Store retC dstRegC Nothing -- MO_U_QuotRem2 is another case we handle by widening the registers to double -- the width and use normal LLVM instructions (similarly to the MO_U_Mul2). The @@ -420,8 +421,8 @@ genCall (PrimTarget (MO_U_QuotRem2 w)) retRem <- narrow retExtRem dstRegQ <- lift $ getCmmReg (CmmLocal dstQ) dstRegR <- lift $ getCmmReg (CmmLocal dstR) - statement $ Store retDiv dstRegQ - statement $ Store retRem dstRegR + statement $ Store retDiv dstRegQ Nothing + statement $ Store retRem dstRegR Nothing -- Handle the MO_{Add,Sub}IntC separately. LLVM versions return a record from -- which we need to extract the actual values. @@ -528,7 +529,7 @@ genCall target res args = do vreg <- getCmmRegW (CmmLocal creg) if retTy == pLower (getVarType vreg) then do - statement $ Store v1 vreg + statement $ Store v1 vreg Nothing doReturn else do let ty = pLower $ getVarType vreg @@ -540,7 +541,7 @@ genCall target res args = do ++ " returned type!" v2 <- doExprW ty $ Cast op v1 ty - statement $ Store v2 vreg + statement $ Store v2 vreg Nothing doReturn -- | Generate a call to an LLVM intrinsic that performs arithmetic operation @@ -569,8 +570,8 @@ genCallWithOverflow t@(PrimTarget op) w [dstV, dstO] [lhs, rhs] = do (overflow, zext) <- doExpr width $ Cast LM_Zext overflowBit width dstRegV <- getCmmReg (CmmLocal dstV) dstRegO <- getCmmReg (CmmLocal dstO) - let storeV = Store value dstRegV - storeO = Store overflow dstRegO + let storeV = Store value dstRegV Nothing + storeO = Store overflow dstRegO Nothing return (stmts `snocOL` zext `snocOL` storeV `snocOL` storeO, top) genCallWithOverflow _ _ _ _ = panic "genCallExtract: wrong ForeignTarget or number of arguments" @@ -635,7 +636,7 @@ genCallSimpleCast w t@(PrimTarget op) [dst] args = do (retV, s1) <- doExpr width $ Call StdCall fptr argsV' [] (retVs', stmts5) <- castVars (cmmPrimOpRetValSignage op) [(retV,dstTy)] let retV' = singletonPanic "genCallSimpleCast" retVs' - let s2 = Store retV' dstV + let s2 = Store retV' dstV Nothing let stmts = stmts2 `appOL` stmts4 `snocOL` s1 `appOL` stmts5 `snocOL` s2 @@ -667,7 +668,7 @@ genCallSimpleCast2 w t@(PrimTarget op) [dst] args = do (retV, s1) <- doExpr width $ Call StdCall fptr argsV' [] (retVs', stmts5) <- castVars (cmmPrimOpRetValSignage op) [(retV,dstTy)] let retV' = singletonPanic "genCallSimpleCast2" retVs' - let s2 = Store retV' dstV + let s2 = Store retV' dstV Nothing let stmts = stmts2 `appOL` stmts4 `snocOL` s1 `appOL` stmts5 `snocOL` s2 @@ -1097,54 +1098,54 @@ genAssign reg val = do -- Some registers are pointer types, so need to cast value to pointer LMPointer _ | getVarType vval == llvmWord platform -> do (v, s1) <- doExpr ty $ Cast LM_Inttoptr vval ty - let s2 = Store v vreg + let s2 = Store v vreg Nothing return (stmts `snocOL` s1 `snocOL` s2, top2) LMVector _ _ -> do (v, s1) <- doExpr ty $ Cast LM_Bitcast vval ty - let s2 = Store v vreg + let s2 = mkStore v vreg NaturallyAligned return (stmts `snocOL` s1 `snocOL` s2, top2) _ -> do - let s1 = Store vval vreg + let s1 = Store vval vreg Nothing return (stmts `snocOL` s1, top2) -- | CmmStore operation -genStore :: CmmExpr -> CmmExpr -> LlvmM StmtData +genStore :: CmmExpr -> CmmExpr -> AlignmentSpec -> LlvmM StmtData -- First we try to detect a few common cases and produce better code for -- these then the default case. We are mostly trying to detect Cmm code -- like I32[Sp + n] and use 'getelementptr' operations instead of the -- generic case that uses casts and pointer arithmetic -genStore addr@(CmmReg (CmmGlobal r)) val - = genStore_fast addr r 0 val +genStore addr@(CmmReg (CmmGlobal r)) val alignment + = genStore_fast addr r 0 val alignment -genStore addr@(CmmRegOff (CmmGlobal r) n) val - = genStore_fast addr r n val +genStore addr@(CmmRegOff (CmmGlobal r) n) val alignment + = genStore_fast addr r n val alignment genStore addr@(CmmMachOp (MO_Add _) [ (CmmReg (CmmGlobal r)), (CmmLit (CmmInt n _))]) - val - = genStore_fast addr r (fromInteger n) val + val alignment + = genStore_fast addr r (fromInteger n) val alignment genStore addr@(CmmMachOp (MO_Sub _) [ (CmmReg (CmmGlobal r)), (CmmLit (CmmInt n _))]) - val - = genStore_fast addr r (negate $ fromInteger n) val + val alignment + = genStore_fast addr r (negate $ fromInteger n) val alignment -- generic case -genStore addr val - = getTBAAMeta topN >>= genStore_slow addr val +genStore addr val alignment + = getTBAAMeta topN >>= genStore_slow addr val alignment -- | CmmStore operation -- This is a special case for storing to a global register pointer -- offset such as I32[Sp+8]. -genStore_fast :: CmmExpr -> GlobalReg -> Int -> CmmExpr +genStore_fast :: CmmExpr -> GlobalReg -> Int -> CmmExpr -> AlignmentSpec -> LlvmM StmtData -genStore_fast addr r n val +genStore_fast addr r n val alignment = do platform <- getPlatform (gv, grt, s1) <- getCmmRegVal (CmmGlobal r) meta <- getTBAARegMeta r @@ -1157,7 +1158,7 @@ genStore_fast addr r n val case pLower grt == getVarType vval of -- were fine True -> do - let s3 = MetaStmt meta $ Store vval ptr + let s3 = MetaStmt meta $ mkStore vval ptr alignment return (stmts `appOL` s1 `snocOL` s2 `snocOL` s3, top) @@ -1165,19 +1166,19 @@ genStore_fast addr r n val False -> do let ty = (pLift . getVarType) vval (ptr', s3) <- doExpr ty $ Cast LM_Bitcast ptr ty - let s4 = MetaStmt meta $ Store vval ptr' + let s4 = MetaStmt meta $ mkStore vval ptr' alignment return (stmts `appOL` s1 `snocOL` s2 `snocOL` s3 `snocOL` s4, top) -- If its a bit type then we use the slow method since -- we can't avoid casting anyway. - False -> genStore_slow addr val meta + False -> genStore_slow addr val alignment meta -- | CmmStore operation -- Generic case. Uses casts and pointer arithmetic if needed. -genStore_slow :: CmmExpr -> CmmExpr -> [MetaAnnot] -> LlvmM StmtData -genStore_slow addr val meta = do +genStore_slow :: CmmExpr -> CmmExpr -> AlignmentSpec -> [MetaAnnot] -> LlvmM StmtData +genStore_slow addr val alignment meta = do (vaddr, stmts1, top1) <- exprToVar addr (vval, stmts2, top2) <- exprToVar val @@ -1188,17 +1189,17 @@ genStore_slow addr val meta = do -- sometimes we need to cast an int to a pointer before storing LMPointer ty@(LMPointer _) | getVarType vval == llvmWord platform -> do (v, s1) <- doExpr ty $ Cast LM_Inttoptr vval ty - let s2 = MetaStmt meta $ Store v vaddr + let s2 = MetaStmt meta $ mkStore v vaddr alignment return (stmts `snocOL` s1 `snocOL` s2, top1 ++ top2) LMPointer _ -> do - let s1 = MetaStmt meta $ Store vval vaddr + let s1 = MetaStmt meta $ mkStore vval vaddr alignment return (stmts `snocOL` s1, top1 ++ top2) i@(LMInt _) | i == llvmWord platform -> do let vty = pLift $ getVarType vval (vptr, s1) <- doExpr vty $ Cast LM_Inttoptr vaddr vty - let s2 = MetaStmt meta $ Store vval vptr + let s2 = MetaStmt meta $ mkStore vval vptr alignment return (stmts `snocOL` s1 `snocOL` s2, top1 ++ top2) other -> @@ -1208,6 +1209,16 @@ genStore_slow addr val meta = do ", Size of var: " ++ show (llvmWidthInBits platform other) ++ ", Var: " ++ renderWithContext (llvmCgContext cfg) (ppVar cfg vaddr))) +mkStore :: LlvmVar -> LlvmVar -> AlignmentSpec -> LlvmStatement +mkStore vval vptr alignment = + Store vval vptr align + where + is_vector = isVector (pLower (getVarType vptr)) + align = case alignment of + -- See Note [Alignment of vector-typed values] + _ | is_vector -> Just 1 + Unaligned -> Just 1 + NaturallyAligned -> Nothing -- | Unconditional branch genBranch :: BlockId -> LlvmM StmtData @@ -2064,7 +2075,7 @@ funPrologue live cmmBlocks = do rval = if isLive r then arg else trash alloc = Assignment reg $ Alloca (pLower $ getVarType reg) 1 markStackReg r - return $ toOL [alloc, Store rval reg] + return $ toOL [alloc, Store rval reg Nothing] return (concatOL stmtss `snocOL` jumpToEntry, []) where diff --git a/compiler/GHC/Driver/GenerateCgIPEStub.hs b/compiler/GHC/Driver/GenerateCgIPEStub.hs index ca609e9fcf..e1a751e762 100644 --- a/compiler/GHC/Driver/GenerateCgIPEStub.hs +++ b/compiler/GHC/Driver/GenerateCgIPEStub.hs @@ -261,7 +261,7 @@ generateCgIPEStub hsc_env this_mod denv s = do where find :: CLabel -> [CmmNode O O] -> Maybe IpeSourceLocation -> Maybe IpeSourceLocation find label (b : blocks) lastTick = case b of - (CmmStore _ (CmmLit (CmmLabel l))) -> if label == l then lastTick else find label blocks lastTick + (CmmStore _ (CmmLit (CmmLabel l)) _) -> if label == l then lastTick else find label blocks lastTick (CmmTick (SourceNote span name)) -> find label blocks $ Just (span, name) _ -> find label blocks lastTick find _ [] _ = Nothing diff --git a/compiler/GHC/Llvm/Ppr.hs b/compiler/GHC/Llvm/Ppr.hs index 405296f79f..f02d2e1024 100644 --- a/compiler/GHC/Llvm/Ppr.hs +++ b/compiler/GHC/Llvm/Ppr.hs @@ -219,7 +219,8 @@ ppLlvmStatement opts stmt = BranchIf cond ifT ifF -> ind $ ppBranchIf opts cond ifT ifF Comment comments -> ind $ ppLlvmComments comments MkLabel label -> ppLlvmBlockLabel label - Store value ptr -> ind $ ppStore opts value ptr + Store value ptr align + -> ind $ ppStore opts value ptr align Switch scrut def tgs -> ind $ ppSwitch opts scrut def tgs Return result -> ind $ ppReturn opts result Expr expr -> ind $ ppLlvmExpression opts expr @@ -386,14 +387,14 @@ ppALoad opts ord st var = in text "load atomic" <+> ppr derefType <> comma <+> ppVar opts var <> sThreaded <+> ppSyncOrdering ord <> align -ppStore :: LlvmCgConfig -> LlvmVar -> LlvmVar -> SDoc -ppStore opts val dst - | isVecPtrVar dst = text "store" <+> ppVar opts val <> comma <+> ppVar opts dst <> - comma <+> text "align 1" - | otherwise = text "store" <+> ppVar opts val <> comma <+> ppVar opts dst +ppStore :: LlvmCgConfig -> LlvmVar -> LlvmVar -> LMAlign -> SDoc +ppStore opts val dst alignment = + text "store" <+> ppVar opts val <> comma <+> ppVar opts dst <> align where - isVecPtrVar :: LlvmVar -> Bool - isVecPtrVar = isVector . pLower . getVarType + align = + case alignment of + Just n -> text ", align" <+> ppr n + Nothing -> empty ppCast :: LlvmCgConfig -> LlvmCastOp -> LlvmVar -> LlvmType -> SDoc diff --git a/compiler/GHC/Llvm/Syntax.hs b/compiler/GHC/Llvm/Syntax.hs index befac77734..115f5b58c3 100644 --- a/compiler/GHC/Llvm/Syntax.hs +++ b/compiler/GHC/Llvm/Syntax.hs @@ -150,7 +150,7 @@ data LlvmStatement * value: Variable/Constant to store. * ptr: Location to store the value in -} - | Store LlvmVar LlvmVar + | Store LlvmVar LlvmVar LMAlign {- | Multiway branch diff --git a/compiler/GHC/StgToCmm/CgUtils.hs b/compiler/GHC/StgToCmm/CgUtils.hs index bc42a11d1c..4718cbf74a 100644 --- a/compiler/GHC/StgToCmm/CgUtils.hs +++ b/compiler/GHC/StgToCmm/CgUtils.hs @@ -152,7 +152,7 @@ fixStgRegStmt platform stmt = fixAssign $ mapExpDeep fixExpr stmt let baseAddr = get_GlobalReg_addr platform reg in case reg `elem` activeStgRegs platform of True -> CmmAssign (CmmGlobal reg) src - False -> CmmStore baseAddr src + False -> CmmStore baseAddr src NaturallyAligned other_stmt -> other_stmt fixExpr expr = case expr of diff --git a/compiler/GHC/StgToCmm/Monad.hs b/compiler/GHC/StgToCmm/Monad.hs index 5ab12a4634..04654d40d3 100644 --- a/compiler/GHC/StgToCmm/Monad.hs +++ b/compiler/GHC/StgToCmm/Monad.hs @@ -724,8 +724,9 @@ emitUnwind regs = do emitAssign :: CmmReg -> CmmExpr -> FCode () emitAssign l r = emitCgStmt (CgStmt (CmmAssign l r)) +-- | Assumes natural alignment. emitStore :: CmmExpr -> CmmExpr -> FCode () -emitStore l r = emitCgStmt (CgStmt (CmmStore l r)) +emitStore l r = emitCgStmt (CgStmt (CmmStore l r NaturallyAligned)) emit :: CmmAGraph -> FCode () emit ag -- cgit v1.2.1