summaryrefslogtreecommitdiff
path: root/compiler/cmm/CmmLayoutStack.hs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/cmm/CmmLayoutStack.hs')
-rw-r--r--compiler/cmm/CmmLayoutStack.hs1048
1 files changed, 1048 insertions, 0 deletions
diff --git a/compiler/cmm/CmmLayoutStack.hs b/compiler/cmm/CmmLayoutStack.hs
new file mode 100644
index 0000000000..f0dce4a6a1
--- /dev/null
+++ b/compiler/cmm/CmmLayoutStack.hs
@@ -0,0 +1,1048 @@
+{-# LANGUAGE RecordWildCards, GADTs #-}
+module CmmLayoutStack (
+ cmmLayoutStack, setInfoTableStackMap, cmmSink
+ ) where
+
+import StgCmmUtils ( callerSaveVolatileRegs ) -- XXX
+import StgCmmForeign ( saveThreadState, loadThreadState ) -- XXX
+
+import Cmm
+import BlockId
+import CLabel
+import CmmUtils
+import MkGraph
+import Module
+import ForeignCall
+import CmmLive
+import CmmProcPoint
+import SMRep
+import Hoopl hiding ((<*>), mkLast, mkMiddle)
+import Constants
+import UniqSupply
+import Maybes
+import UniqFM
+import Util
+
+import FastString
+import Outputable
+import Data.Map (Map)
+import qualified Data.Map as Map
+import qualified Data.Set as Set
+import Control.Monad.Fix
+import Data.Array as Array
+import Data.Bits
+import Data.List (nub, partition)
+import Control.Monad (liftM)
+
+#include "HsVersions.h"
+
+
+data StackSlot = Occupied | Empty
+ -- Occupied: a return address or part of an update frame
+
+instance Outputable StackSlot where
+ ppr Occupied = ptext (sLit "XXX")
+ ppr Empty = ptext (sLit "---")
+
+-- All stack locations are expressed as positive byte offsets from the
+-- "base", which is defined to be the address above the return address
+-- on the stack on entry to this CmmProc.
+--
+-- Lower addresses have higher StackLocs.
+--
+type StackLoc = ByteOff
+
+{-
+ A StackMap describes the stack at any given point. At a continuation
+ it has a particular layout, like this:
+
+ | | <- base
+ |-------------|
+ | ret0 | <- base + 8
+ |-------------|
+ . upd frame . <- base + sm_ret_off
+ |-------------|
+ | |
+ . vars .
+ . (live/dead) .
+ | | <- base + sm_sp - sm_args
+ |-------------|
+ | ret1 |
+ . ret vals . <- base + sm_sp (<--- Sp points here)
+ |-------------|
+
+Why do we include the final return address (ret0) in our stack map? I
+have absolutely no idea, but it seems to be done that way consistently
+in the rest of the code generator, so I played along here. --SDM
+
+Note that we will be constructing an info table for the continuation
+(ret1), which needs to describe the stack down to, but not including,
+the update frame (or ret0, if there is no update frame).
+-}
+
+data StackMap = StackMap
+ { sm_sp :: StackLoc
+ -- ^ the offset of Sp relative to the base on entry
+ -- to this block.
+ , sm_args :: ByteOff
+ -- ^ the number of bytes of arguments in the area for this block
+ -- Defn: the offset of young(L) relative to the base is given by
+ -- (sm_sp - sm_args) of the StackMap for block L.
+ , sm_ret_off :: ByteOff
+ -- ^ Number of words of stack that we do not describe with an info
+ -- table, because it contains an update frame.
+ , sm_regs :: UniqFM (LocalReg,StackLoc)
+ -- ^ regs on the stack
+ }
+
+instance Outputable StackMap where
+ ppr StackMap{..} =
+ text "Sp = " <> int sm_sp $$
+ text "sm_args = " <> int sm_args $$
+ text "sm_ret_off = " <> int sm_ret_off $$
+ text "sm_regs = " <> ppr (eltsUFM sm_regs)
+
+
+cmmLayoutStack :: ProcPointSet -> ByteOff -> CmmGraph
+ -> UniqSM (CmmGraph, BlockEnv StackMap)
+cmmLayoutStack procpoints entry_args
+ graph0@(CmmGraph { g_entry = entry })
+ = do
+ pprTrace "cmmLayoutStack" (ppr entry_args) $ return ()
+ (graph, liveness) <- removeDeadAssignments graph0
+ pprTrace "liveness" (ppr liveness) $ return ()
+ let blocks = postorderDfs graph
+
+ (final_stackmaps, final_high_sp, new_blocks) <-
+ mfix $ \ ~(rec_stackmaps, rec_high_sp, _new_blocks) ->
+ layout procpoints liveness entry entry_args
+ rec_stackmaps rec_high_sp blocks
+
+ new_blocks' <- mapM lowerSafeForeignCall new_blocks
+
+ pprTrace ("Sp HWM") (ppr final_high_sp) $
+ return (ofBlockList entry new_blocks', final_stackmaps)
+
+
+
+layout :: BlockSet -- proc points
+ -> BlockEnv CmmLive -- liveness
+ -> BlockId -- entry
+ -> ByteOff -- stack args on entry
+
+ -> BlockEnv StackMap -- [final] stack maps
+ -> ByteOff -- [final] Sp high water mark
+
+ -> [CmmBlock] -- [in] blocks
+
+ -> UniqSM
+ ( BlockEnv StackMap -- [out] stack maps
+ , ByteOff -- [out] Sp high water mark
+ , [CmmBlock] -- [out] new blocks
+ )
+
+layout procpoints liveness entry entry_args final_stackmaps final_hwm blocks
+ = go blocks init_stackmap entry_args []
+ where
+ (updfr, cont_info) = collectContInfo blocks
+
+ init_stackmap = mapSingleton entry StackMap{ sm_sp = entry_args
+ , sm_args = entry_args
+ , sm_ret_off = updfr
+ , sm_regs = emptyUFM
+ }
+
+ go [] acc_stackmaps acc_hwm acc_blocks
+ = return (acc_stackmaps, acc_hwm, acc_blocks)
+
+ go (b0 : bs) acc_stackmaps acc_hwm acc_blocks
+ = do
+ let (entry0@(CmmEntry entry_lbl), middle0, last0) = blockSplit b0
+
+ let stack0@StackMap { sm_sp = sp0 }
+ = mapFindWithDefault
+ (pprPanic "no stack map for" (ppr entry_lbl))
+ entry_lbl acc_stackmaps
+
+ pprTrace "layout" (ppr entry_lbl <+> ppr stack0) $ return ()
+
+ -- (a) Update the stack map to include the effects of
+ -- assignments in this block
+ let stack1 = foldBlockNodesF (procMiddle acc_stackmaps) middle0 stack0
+
+ -- (b) Insert assignments to reload all the live variables if this
+ -- block is a proc point
+ let middle1 = if entry_lbl `setMember` procpoints
+ then foldr blockCons middle0 (insertReloads stack0)
+ else middle0
+
+ -- (c) Look at the last node and if we are making a call or
+ -- jumping to a proc point, we must save the live
+ -- variables, adjust Sp, and construct the StackMaps for
+ -- each of the successor blocks. See handleLastNode for
+ -- details.
+ (middle2, sp_off, last1, fixup_blocks, out)
+ <- handleLastNode procpoints liveness cont_info
+ acc_stackmaps stack1 middle0 last0
+
+ pprTrace "layout(out)" (ppr out) $ return ()
+
+ -- (d) Manifest Sp: run over the nodes in the block and replace
+ -- CmmStackSlot with CmmLoad from Sp with a concrete offset.
+ --
+ -- our block:
+ -- middle1 -- the original middle nodes
+ -- middle2 -- live variable saves from handleLastNode
+ -- Sp = Sp + sp_off -- Sp adjustment goes here
+ -- last1 -- the last node
+ --
+ let middle_pre = blockToList $ foldl blockSnoc middle1 middle2
+
+ sp_high = final_hwm - entry_args
+ -- The stack check value is adjusted by the Sp offset on
+ -- entry to the proc, which is entry_args. We are
+ -- assuming that we only do a stack check at the
+ -- beginning of a proc, and we don't modify Sp before the
+ -- check.
+
+ final_blocks = manifestSp final_stackmaps stack0 sp0 sp_high entry0
+ middle_pre sp_off last1 fixup_blocks
+
+ acc_stackmaps' = mapUnion acc_stackmaps out
+
+ hwm' = maximum (acc_hwm : (sp0 - sp_off) : map sm_sp (mapElems out))
+
+ go bs acc_stackmaps' hwm' (final_blocks ++ acc_blocks)
+
+
+-- -----------------------------------------------------------------------------
+
+-- This doesn't seem right somehow. We need to find out whether this
+-- proc will push some update frame material at some point, so that we
+-- can avoid using that area of the stack for spilling. The
+-- updfr_space field of the CmmProc *should* tell us, but it doesn't
+-- (I think maybe it gets filled in later when we do proc-point
+-- splitting).
+--
+-- So we'll just take the max of all the cml_ret_offs. This could be
+-- unnecessarily pessimistic, but probably not in the code we
+-- generate.
+
+collectContInfo :: [CmmBlock] -> (ByteOff, BlockEnv ByteOff)
+collectContInfo blocks
+ = (maximum ret_offs, mapFromList (catMaybes mb_argss))
+ where
+ (mb_argss, ret_offs) = mapAndUnzip get_cont blocks
+
+ get_cont b =
+ case lastNode b of
+ CmmCall { cml_cont = Just l, .. }
+ -> (Just (l, cml_ret_args), cml_ret_off)
+ CmmForeignCall { .. }
+ -> (Just (succ, 0), updfr) -- ??
+ _other -> (Nothing, 0)
+
+
+-- -----------------------------------------------------------------------------
+-- Updating the StackMap from middle nodes
+
+-- Look for loads from stack slots, and update the StackMap. This is
+-- purely for optimisation reasons, so that we can avoid saving a
+-- variable back to a different stack slot if it is already on the
+-- stack.
+--
+-- This happens a lot: for example when function arguments are passed
+-- on the stack and need to be immediately saved across a call, we
+-- want to just leave them where they are on the stack.
+--
+procMiddle :: BlockEnv StackMap -> CmmNode e x -> StackMap -> StackMap
+procMiddle stackmaps node sm
+ = case node of
+ CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot area off) _)
+ -> sm { sm_regs = addToUFM (sm_regs sm) r (r,loc) }
+ where loc = getStackLoc area off stackmaps
+ CmmAssign (CmmLocal r) _other
+ -> sm { sm_regs = delFromUFM (sm_regs sm) r }
+ _other
+ -> sm
+
+getStackLoc :: Area -> ByteOff -> BlockEnv StackMap -> StackLoc
+getStackLoc Old n _ = n
+getStackLoc (Young l) n stackmaps =
+ case mapLookup l stackmaps of
+ Nothing -> pprPanic "getStackLoc" (ppr l)
+ Just sm -> sm_sp sm - sm_args sm + n
+
+
+-- -----------------------------------------------------------------------------
+-- Handling stack allocation for a last node
+
+-- We take a single last node and turn it into:
+--
+-- C1 (some statements)
+-- Sp = Sp + N
+-- C2 (some more statements)
+-- call f() -- the actual last node
+--
+-- plus possibly some more blocks (we may have to add some fixup code
+-- between the last node and the continuation).
+--
+-- C1: is the code for saving the variables across this last node onto
+-- the stack, if the continuation is a call or jumps to a proc point.
+--
+-- C2: if the last node is a safe foreign call, we have to inject some
+-- extra code that goes *after* the Sp adjustment.
+
+handleLastNode
+ :: ProcPointSet -> BlockEnv CmmLive -> BlockEnv ByteOff
+ -> BlockEnv StackMap -> StackMap
+ -> Block CmmNode O O
+ -> CmmNode O C
+ -> UniqSM
+ ( [CmmNode O O] -- nodes to go *before* the Sp adjustment
+ , ByteOff -- amount to adjust Sp
+ , CmmNode O C -- new last node
+ , [CmmBlock] -- new blocks
+ , BlockEnv StackMap -- stackmaps for the continuations
+ )
+
+handleLastNode procpoints liveness cont_info stackmaps
+ stack0@StackMap { sm_sp = sp0 } middle last
+ = case last of
+ -- At each return / tail call,
+ -- adjust Sp to point to the last argument pushed, which
+ -- is cml_args, after popping any other junk from the stack.
+ CmmCall{ cml_cont = Nothing, .. } -> do
+ let sp_off = sp0 - cml_args
+ return ([], sp_off, last, [], mapEmpty)
+
+ -- At each CmmCall with a continuation:
+ CmmCall{ cml_cont = Just cont_lbl, .. } ->
+ return $ lastCall cont_lbl cml_args cml_ret_args cml_ret_off
+
+ CmmForeignCall{ succ = cont_lbl, .. } -> do
+ return $ lastCall cont_lbl wORD_SIZE wORD_SIZE (sm_ret_off stack0)
+ -- one word each for args and results: the return address
+
+ CmmBranch{..} -> handleProcPoints
+ CmmCondBranch{..} -> handleProcPoints
+ CmmSwitch{..} -> handleProcPoints
+
+ where
+ -- Calls and ForeignCalls are handled the same way:
+ lastCall :: BlockId -> ByteOff -> ByteOff -> ByteOff
+ -> ( [CmmNode O O]
+ , ByteOff
+ , CmmNode O C
+ , [CmmBlock]
+ , BlockEnv StackMap
+ )
+ lastCall lbl cml_args cml_ret_args cml_ret_off
+ = ( assignments
+ , spOffsetForCall sp0 cont_stack cml_args
+ , last
+ , [] -- no new blocks
+ , mapSingleton lbl cont_stack )
+ where
+ (assignments, cont_stack) = prepareStack lbl cml_ret_args cml_ret_off
+
+
+ prepareStack lbl cml_ret_args cml_ret_off
+ | Just cont_stack <- mapLookup lbl stackmaps
+ -- If we have already seen this continuation before, then
+ -- we just have to make the stack look the same:
+ = (fixupStack stack0 cont_stack, cont_stack)
+ -- Otherwise, we have to allocate the stack frame
+ | otherwise
+ = (save_assignments, new_cont_stack)
+ where
+ (new_cont_stack, save_assignments)
+ = setupStackFrame lbl liveness cml_ret_off cml_ret_args stack0
+
+
+ -- For other last nodes (branches), if any of the targets is a
+ -- proc point, we have to set up the stack to match what the proc
+ -- point is expecting.
+ --
+ handleProcPoints :: UniqSM ( [CmmNode O O]
+ , ByteOff
+ , CmmNode O C
+ , [CmmBlock]
+ , BlockEnv StackMap )
+
+ handleProcPoints
+ -- Note [diamond proc point]
+ | Just l <- futureContinuation middle
+ , (nub $ filter (`setMember` procpoints) $ successors last) == [l]
+ = do
+ let cont_args = mapFindWithDefault 0 l cont_info
+ (assigs, cont_stack) = prepareStack l cont_args (sm_ret_off stack0)
+ out = mapFromList [ (l', cont_stack)
+ | l' <- successors last ]
+ return ( assigs
+ , spOffsetForCall sp0 cont_stack wORD_SIZE
+ , last
+ , []
+ , out)
+
+ | otherwise = do
+ pps <- mapM handleProcPoint (successors last)
+ let lbl_map :: LabelMap Label
+ lbl_map = mapFromList [ (l,tmp) | (l,tmp,_,_) <- pps ]
+ fix_lbl l = mapLookup l lbl_map `orElse` l
+ return ( []
+ , 0
+ , mapSuccessors fix_lbl last
+ , concat [ blk | (_,_,_,blk) <- pps ]
+ , mapFromList [ (l, sm) | (l,_,sm,_) <- pps ] )
+
+ -- For each proc point that is a successor of this block
+ -- (a) if the proc point already has a stackmap, we need to
+ -- shuffle the current stack to make it look the same.
+ -- We have to insert a new block to make this happen.
+ -- (b) otherwise, call "allocate live stack0" to make the
+ -- stack map for the proc point
+ handleProcPoint :: BlockId
+ -> UniqSM (BlockId, BlockId, StackMap, [CmmBlock])
+ handleProcPoint l
+ | not (l `setMember` procpoints) = return (l, l, stack0, [])
+ | otherwise = do
+ tmp_lbl <- liftM mkBlockId $ getUniqueM
+ let
+ (stack2, assigs) =
+ case mapLookup l stackmaps of
+ Just pp_sm -> (pp_sm, fixupStack stack0 pp_sm)
+ Nothing ->
+ pprTrace "first visit to proc point"
+ (ppr l <+> ppr stack1) $
+ (stack1, assigs)
+ where
+ cont_args = mapFindWithDefault 0 l cont_info
+ (stack1, assigs) =
+ setupStackFrame l liveness (sm_ret_off stack0)
+ cont_args stack0
+
+ sp_off = sp0 - sm_sp stack2
+
+ block = blockJoin (CmmEntry tmp_lbl)
+ (maybeAddSpAdj sp_off (blockFromList assigs))
+ (CmmBranch l)
+ --
+ return (l, tmp_lbl, stack2, [block])
+
+
+
+-- Sp is currently pointing to current_sp,
+-- we want it to point to
+-- (sm_sp cont_stack - sm_args cont_stack + args)
+-- so the difference is
+-- sp0 - (sm_sp cont_stack - sm_args cont_stack + args)
+spOffsetForCall :: ByteOff -> StackMap -> ByteOff -> ByteOff
+spOffsetForCall current_sp cont_stack args
+ = current_sp - (sm_sp cont_stack - sm_args cont_stack + args)
+
+
+-- | create a sequence of assignments to establish the new StackMap,
+-- given the old StackMap.
+fixupStack :: StackMap -> StackMap -> [CmmNode O O]
+fixupStack old_stack new_stack = concatMap move new_locs
+ where
+ old_map :: Map LocalReg ByteOff
+ old_map = Map.fromList (stackSlotRegs old_stack)
+ new_locs = stackSlotRegs new_stack
+
+ move (r,n)
+ | Just m <- Map.lookup r old_map, n == m = []
+ | otherwise = [CmmStore (CmmStackSlot Old n)
+ (CmmReg (CmmLocal r))]
+
+
+
+setupStackFrame
+ :: BlockId -- label of continuation
+ -> BlockEnv CmmLive -- liveness
+ -> ByteOff -- updfr
+ -> ByteOff -- bytes of return values on stack
+ -> StackMap -- current StackMap
+ -> (StackMap, [CmmNode O O])
+
+setupStackFrame lbl liveness updfr_off ret_args stack0
+ = (cont_stack, assignments)
+ where
+ -- get the set of LocalRegs live in the continuation
+ live = mapFindWithDefault Set.empty lbl liveness
+
+ -- the stack from the base to updfr_off is off-limits.
+ -- our new stack frame contains:
+ -- * saved live variables
+ -- * the return address [young(C) + 8]
+ -- * the args for the call,
+ -- which are replaced by the return values at the return
+ -- point.
+
+ -- everything up to updfr_off is off-limits
+ -- stack1 contains updfr_off, plus everything we need to save
+ (stack1, assignments) = allocate updfr_off live stack0
+
+ -- And the Sp at the continuation is:
+ -- sm_sp stack1 + ret_args
+ cont_stack = stack1{ sm_sp = sm_sp stack1 + ret_args
+ , sm_args = ret_args
+ , sm_ret_off = updfr_off
+ }
+
+
+-- -----------------------------------------------------------------------------
+-- Note [diamond proc point]
+--
+-- This special case looks for the pattern we get from a typical
+-- tagged case expression:
+--
+-- Sp[young(L1)] = L1
+-- if (R1 & 7) != 0 goto L1 else goto L2
+-- L2:
+-- call [R1] returns to L1
+-- L1: live: {y}
+-- x = R1
+--
+-- If we let the generic case handle this, we get
+--
+-- Sp[-16] = L1
+-- if (R1 & 7) != 0 goto L1a else goto L2
+-- L2:
+-- Sp[-8] = y
+-- Sp = Sp - 16
+-- call [R1] returns to L1
+-- L1a:
+-- Sp[-8] = y
+-- Sp = Sp - 16
+-- goto L1
+-- L1:
+-- x = R1
+--
+-- The code for saving the live vars is duplicated in each branch, and
+-- furthermore there is an extra jump in the fast path (assuming L1 is
+-- a proc point, which it probably is if there is a heap check).
+--
+-- So to fix this we want to set up the stack frame before the
+-- conditional jump. How do we know when to do this, and when it is
+-- safe? The basic idea is, when we see the assignment
+--
+-- Sp[young(L)] = L
+--
+-- we know that
+-- * we are definitely heading for L
+-- * there can be no more reads from another stack area, because young(L)
+-- overlaps with it.
+--
+-- We don't necessarily know that everything live at L is live now
+-- (some might be assigned between here and the jump to L). So we
+-- simplify and only do the optimisation when we see
+--
+-- (1) a block containing an assignment of a return address L
+-- (2) ending in a branch where one (and only) continuation goes to L,
+-- and no other continuations go to proc points.
+--
+-- then we allocate the stack frame for L at the end of the block,
+-- before the branch.
+--
+-- We could generalise (2), but that would make it a bit more
+-- complicated to handle, and this currently catches the common case.
+
+futureContinuation :: Block CmmNode O O -> Maybe BlockId
+futureContinuation middle = foldBlockNodesB f middle Nothing
+ where f :: CmmNode a b -> Maybe BlockId -> Maybe BlockId
+ f (CmmStore (CmmStackSlot (Young l) _) (CmmLit (CmmBlock _))) _
+ = Just l
+ f _ r = r
+
+-- -----------------------------------------------------------------------------
+-- Saving live registers
+
+-- | Given a set of live registers and a StackMap, save all the registers
+-- on the stack and return the new StackMap and the assignments to do
+-- the saving.
+--
+allocate :: ByteOff -> RegSet -> StackMap -> (StackMap, [CmmNode O O])
+allocate ret_off live stackmap@StackMap{ sm_sp = sp0
+ , sm_regs = regs0 }
+ =
+ pprTrace "allocate" (ppr live $$ ppr stackmap) $
+
+ -- we only have to save regs that are not already in a slot
+ let to_save = filter (not . (`elemUFM` regs0)) (Set.elems live)
+ regs1 = filterUFM (\(r,_) -> elemRegSet r live) regs0
+ in
+
+ -- make a map of the stack
+ let stack = reverse $ Array.elems $
+ accumArray (\_ x -> x) Empty (1, toWords (max sp0 ret_off)) $
+ ret_words ++ live_words
+ where ret_words =
+ [ (x, Occupied)
+ | x <- [ 1 .. toWords ret_off] ]
+ live_words =
+ [ (toWords x, Occupied)
+ | (r,off) <- eltsUFM regs1,
+ let w = localRegBytes r,
+ x <- [ off, off-wORD_SIZE .. off - w + 1] ]
+ in
+
+ -- Pass over the stack: find slots to save all the new live variables,
+ -- choosing the oldest slots first (hence a foldr).
+ let
+ save slot ([], stack, n, assigs, regs) -- no more regs to save
+ = ([], slot:stack, n `plusW` 1, assigs, regs)
+ save slot (to_save, stack, n, assigs, regs)
+ = case slot of
+ Occupied -> (to_save, Occupied:stack, n `plusW` 1, assigs, regs)
+ Empty
+ | Just (stack', r, to_save') <-
+ select_save to_save (slot:stack)
+ -> let assig = CmmStore (CmmStackSlot Old n')
+ (CmmReg (CmmLocal r))
+ n' = n `plusW` 1
+ in
+ (to_save', stack', n', assig : assigs, (r,(r,n')):regs)
+
+ | otherwise
+ -> (to_save, slot:stack, n `plusW` 1, assigs, regs)
+
+ -- we should do better here: right now we'll fit the smallest first,
+ -- but it would make more sense to fit the biggest first.
+ select_save :: [LocalReg] -> [StackSlot]
+ -> Maybe ([StackSlot], LocalReg, [LocalReg])
+ select_save regs stack = go regs []
+ where go [] _no_fit = Nothing
+ go (r:rs) no_fit
+ | Just rest <- dropEmpty words stack
+ = Just (replicate words Occupied ++ rest, r, rs++no_fit)
+ | otherwise
+ = go rs (r:no_fit)
+ where words = localRegWords r
+
+ -- fill in empty slots as much as possible
+ (still_to_save, save_stack, n, save_assigs, save_regs)
+ = foldr save (to_save, [], 0, [], []) stack
+
+ -- push any remaining live vars on the stack
+ (push_sp, push_assigs, push_regs)
+ = foldr push (n, [], []) still_to_save
+ where
+ push r (n, assigs, regs)
+ = (n', assig : assigs, (r,(r,n')) : regs)
+ where
+ n' = n + localRegBytes r
+ assig = CmmStore (CmmStackSlot Old n')
+ (CmmReg (CmmLocal r))
+
+ trim_sp
+ | not (null push_regs) = push_sp
+ | otherwise
+ = n `plusW` (- length (takeWhile isEmpty save_stack))
+
+ final_regs = regs1 `addListToUFM` push_regs
+ `addListToUFM` save_regs
+
+ in
+ -- XXX should be an assert
+ if ( n /= max sp0 ret_off ) then pprPanic "allocate" (ppr n <+> ppr sp0 <+> ppr ret_off) else
+
+ if (trim_sp .&. (wORD_SIZE - 1)) /= 0 then pprPanic "allocate2" (ppr trim_sp <+> ppr final_regs <+> ppr push_sp) else
+
+ ( stackmap { sm_regs = final_regs , sm_sp = trim_sp }
+ , push_assigs ++ save_assigs )
+
+
+-- -----------------------------------------------------------------------------
+-- Manifesting Sp
+
+-- | Manifest Sp: turn all the CmmStackSlots into CmmLoads from Sp. The
+-- block looks like this:
+--
+-- middle_pre -- the middle nodes
+-- Sp = Sp + sp_off -- Sp adjustment goes here
+-- last -- the last node
+--
+-- And we have some extra blocks too (that don't contain Sp adjustments)
+--
+-- The adjustment for middle_pre will be different from that for
+-- middle_post, because the Sp adjustment intervenes.
+--
+manifestSp
+ :: BlockEnv StackMap -- StackMaps for other blocks
+ -> StackMap -- StackMap for this block
+ -> ByteOff -- Sp on entry to the block
+ -> ByteOff -- SpHigh
+ -> CmmNode C O -- first node
+ -> [CmmNode O O] -- middle
+ -> ByteOff -- sp_off
+ -> CmmNode O C -- last node
+ -> [CmmBlock] -- new blocks
+ -> [CmmBlock] -- final blocks with Sp manifest
+
+manifestSp stackmaps stack0 sp0 sp_high
+ first middle_pre sp_off last fixup_blocks
+ = final_block : fixup_blocks'
+ where
+ area_off = getAreaOff stackmaps
+
+ adj_pre_sp, adj_post_sp :: CmmNode e x -> CmmNode e x
+ adj_pre_sp = mapExpDeep (areaToSp sp0 sp_high area_off)
+ adj_post_sp = mapExpDeep (areaToSp (sp0 - sp_off) sp_high area_off)
+
+ final_middle = maybeAddSpAdj sp_off $
+ blockFromList $
+ map adj_pre_sp $
+ elimStackStores stack0 stackmaps area_off $
+ middle_pre
+
+ final_last = optStackCheck (adj_post_sp last)
+
+ final_block = blockJoin first final_middle final_last
+
+ fixup_blocks' = map (mapBlock3' (id, adj_post_sp, id)) fixup_blocks
+
+
+getAreaOff :: BlockEnv StackMap -> (Area -> StackLoc)
+getAreaOff _ Old = 0
+getAreaOff stackmaps (Young l) =
+ case mapLookup l stackmaps of
+ Just sm -> sm_sp sm - sm_args sm
+ Nothing -> pprPanic "getAreaOff" (ppr l)
+
+
+maybeAddSpAdj :: ByteOff -> Block CmmNode O O -> Block CmmNode O O
+maybeAddSpAdj 0 block = block
+maybeAddSpAdj sp_off block
+ = block `blockSnoc` CmmAssign spReg (cmmOffset (CmmReg spReg) sp_off)
+
+
+{-
+Sp(L) is the Sp offset on entry to block L relative to the base of the
+OLD area.
+
+SpArgs(L) is the size of the young area for L, i.e. the number of
+arguments.
+
+ - in block L, each reference to [old + N] turns into
+ [Sp + Sp(L) - N]
+
+ - in block L, each reference to [young(L') + N] turns into
+ [Sp + Sp(L) - Sp(L') + SpArgs(L') - N]
+
+ - be careful with the last node of each block: Sp has already been adjusted
+ to be Sp + Sp(L) - Sp(L')
+-}
+
+areaToSp :: ByteOff -> ByteOff -> (Area -> StackLoc) -> CmmExpr -> CmmExpr
+areaToSp sp_old _sp_hwm area_off (CmmStackSlot area n) =
+ cmmOffset (CmmReg spReg) (sp_old - area_off area - n)
+areaToSp _ sp_hwm _ (CmmLit CmmHighStackMark) = CmmLit (mkIntCLit sp_hwm)
+areaToSp _ _ _ (CmmMachOp (MO_U_Lt _) -- Note [null stack check]
+ [CmmMachOp (MO_Sub _)
+ [ CmmReg (CmmGlobal Sp)
+ , CmmLit (CmmInt 0 _)],
+ CmmReg (CmmGlobal SpLim)]) = CmmLit (CmmInt 0 wordWidth)
+areaToSp _ _ _ other = other
+
+-- -----------------------------------------------------------------------------
+-- Note [null stack check]
+--
+-- If the high-water Sp is zero, then we end up with
+--
+-- if (Sp - 0 < SpLim) then .. else ..
+--
+-- and possibly some dead code for the failure case. Optimising this
+-- away depends on knowing that SpLim <= Sp, so it is really the job
+-- of the stack layout algorithm, hence we do it now. This is also
+-- convenient because control-flow optimisation later will drop the
+-- dead code.
+
+optStackCheck :: CmmNode O C -> CmmNode O C
+optStackCheck n = -- Note [null stack check]
+ case n of
+ CmmCondBranch (CmmLit (CmmInt 0 _)) _true false -> CmmBranch false
+ other -> other
+
+
+-- -----------------------------------------------------------------------------
+
+-- | Eliminate stores of the form
+--
+-- Sp[area+n] = r
+--
+-- when we know that r is already in the same slot as Sp[area+n]. We
+-- could do this in a later optimisation pass, but that would involve
+-- a separate analysis and we already have the information to hand
+-- here. It helps clean up some extra stack stores in common cases.
+--
+-- Note that we may have to modify the StackMap as we walk through the
+-- code using procMiddle, since an assignment to a variable in the
+-- StackMap will invalidate its mapping there.
+--
+elimStackStores :: StackMap
+ -> BlockEnv StackMap
+ -> (Area -> ByteOff)
+ -> [CmmNode O O]
+ -> [CmmNode O O]
+elimStackStores stackmap stackmaps area_off nodes
+ = go stackmap nodes
+ where
+ go _stackmap [] = []
+ go stackmap (n:ns)
+ = case n of
+ CmmStore (CmmStackSlot area m) (CmmReg (CmmLocal r))
+ | Just (_,off) <- lookupUFM (sm_regs stackmap) r
+ , area_off area + m == off
+ -> pprTrace "eliminated a node!" (ppr r) $ go stackmap ns
+ _otherwise
+ -> n : go (procMiddle stackmaps n stackmap) ns
+
+
+-- -----------------------------------------------------------------------------
+-- Update info tables to include stack liveness
+
+
+setInfoTableStackMap :: BlockEnv StackMap -> CmmDecl -> CmmDecl
+setInfoTableStackMap stackmaps
+ (CmmProc top_info@TopInfo{..} l g@CmmGraph{g_entry = eid})
+ = CmmProc top_info{ info_tbl = fix_info info_tbl } l g
+ where
+ fix_info info_tbl@CmmInfoTable{ cit_rep = StackRep _ } =
+ info_tbl { cit_rep = StackRep (get_liveness eid) }
+ fix_info other = other
+
+ get_liveness :: BlockId -> Liveness
+ get_liveness lbl
+ = case mapLookup lbl stackmaps of
+ Nothing -> pprPanic "setInfoTableStackMap" (ppr lbl)
+ Just sm -> stackMapToLiveness sm
+
+setInfoTableStackMap _ d = d
+
+
+stackMapToLiveness :: StackMap -> Liveness
+stackMapToLiveness StackMap{..} =
+ reverse $ Array.elems $
+ accumArray (\_ x -> x) True (toWords sm_ret_off + 1,
+ toWords (sm_sp - sm_args)) live_words
+ where
+ live_words = [ (toWords off, False)
+ | (r,off) <- eltsUFM sm_regs, isGcPtrType (localRegType r) ]
+
+
+-- -----------------------------------------------------------------------------
+-- Lowering safe foreign calls
+
+{-
+Note [lower safe foreign calls]
+
+We start with
+
+ Sp[young(L1)] = L1
+ ,-----------------------
+ | r1 = foo(x,y,z) returns to L1
+ '-----------------------
+ L1:
+ R1 = r1 -- copyIn, inserted by mkSafeCall
+ ...
+
+the stack layout algorithm will arrange to save and reload everything
+live across the call. Our job now is to expand the call so we get
+
+ Sp[young(L1)] = L1
+ ,-----------------------
+ | SAVE_THREAD_STATE()
+ | token = suspendThread(BaseReg, interruptible)
+ | r = foo(x,y,z)
+ | BaseReg = resumeThread(token)
+ | LOAD_THREAD_STATE()
+ | R1 = r -- copyOut
+ | jump L1
+ '-----------------------
+ L1:
+ r = R1 -- copyIn, inserted by mkSafeCall
+ ...
+
+Note the copyOut, which saves the results in the places that L1 is
+expecting them (see Note {safe foreign call convention]).
+-}
+
+lowerSafeForeignCall :: CmmBlock -> UniqSM CmmBlock
+lowerSafeForeignCall block
+ | (entry, middle, CmmForeignCall { .. }) <- blockSplit block
+ = do
+ -- Both 'id' and 'new_base' are KindNonPtr because they're
+ -- RTS-only objects and are not subject to garbage collection
+ id <- newTemp bWord
+ new_base <- newTemp (cmmRegType (CmmGlobal BaseReg))
+ let (caller_save, caller_load) = callerSaveVolatileRegs
+ load_tso <- newTemp gcWord
+ load_stack <- newTemp gcWord
+ let suspend = saveThreadState <*>
+ caller_save <*>
+ mkMiddle (callSuspendThread id intrbl)
+ midCall = mkUnsafeCall tgt res args
+ resume = mkMiddle (callResumeThread new_base id) <*>
+ -- Assign the result to BaseReg: we
+ -- might now have a different Capability!
+ mkAssign (CmmGlobal BaseReg) (CmmReg (CmmLocal new_base)) <*>
+ caller_load <*>
+ loadThreadState load_tso load_stack
+ -- Note: The successor must be a procpoint, and we have already split,
+ -- so we use a jump, not a branch.
+ succLbl = CmmLit (CmmLabel (infoTblLbl succ))
+
+ (ret_args, copyout) = copyOutOflow NativeReturn Jump (Young succ)
+ (map (CmmReg . CmmLocal) res)
+ updfr (0, [])
+
+ jump = CmmCall { cml_target = succLbl
+ , cml_cont = Just succ
+ , cml_args = widthInBytes wordWidth
+ , cml_ret_args = ret_args
+ , cml_ret_off = updfr }
+
+ graph' <- lgraphOfAGraph $ suspend <*>
+ midCall <*>
+ resume <*>
+ copyout <*>
+ mkLast jump
+
+ case toBlockList graph' of
+ [one] -> let (_, middle', last) = blockSplit one
+ in return (blockJoin entry (middle `blockAppend` middle') last)
+ _ -> panic "lowerSafeForeignCall0"
+
+ -- Block doesn't end in a safe foreign call:
+ | otherwise = return block
+
+
+foreignLbl :: FastString -> CmmExpr
+foreignLbl name = CmmLit (CmmLabel (mkCmmCodeLabel rtsPackageId name))
+
+newTemp :: CmmType -> UniqSM LocalReg
+newTemp rep = getUniqueM >>= \u -> return (LocalReg u rep)
+
+callSuspendThread :: LocalReg -> Bool -> CmmNode O O
+callSuspendThread id intrbl =
+ CmmUnsafeForeignCall
+ (ForeignTarget (foreignLbl (fsLit "suspendThread"))
+ (ForeignConvention CCallConv [AddrHint, NoHint] [AddrHint]))
+ [id] [CmmReg (CmmGlobal BaseReg), CmmLit (mkIntCLit (fromEnum intrbl))]
+
+callResumeThread :: LocalReg -> LocalReg -> CmmNode O O
+callResumeThread new_base id =
+ CmmUnsafeForeignCall
+ (ForeignTarget (foreignLbl (fsLit "resumeThread"))
+ (ForeignConvention CCallConv [AddrHint] [AddrHint]))
+ [new_base] [CmmReg (CmmLocal id)]
+
+-- -----------------------------------------------------------------------------
+
+plusW :: ByteOff -> WordOff -> ByteOff
+plusW b w = b + w * wORD_SIZE
+
+dropEmpty :: WordOff -> [StackSlot] -> Maybe [StackSlot]
+dropEmpty 0 ss = Just ss
+dropEmpty n (Empty : ss) = dropEmpty (n-1) ss
+dropEmpty _ _ = Nothing
+
+isEmpty :: StackSlot -> Bool
+isEmpty Empty = True
+isEmpty _ = False
+
+localRegBytes :: LocalReg -> ByteOff
+localRegBytes r = roundUpToWords (widthInBytes (typeWidth (localRegType r)))
+
+localRegWords :: LocalReg -> WordOff
+localRegWords = toWords . localRegBytes
+
+toWords :: ByteOff -> WordOff
+toWords x = x `quot` wORD_SIZE
+
+
+insertReloads :: StackMap -> [CmmNode O O]
+insertReloads stackmap =
+ [ CmmAssign (CmmLocal r) (CmmLoad (CmmStackSlot Old sp)
+ (localRegType r))
+ | (r,sp) <- stackSlotRegs stackmap
+ ]
+
+
+stackSlotRegs :: StackMap -> [(LocalReg, StackLoc)]
+stackSlotRegs sm = eltsUFM (sm_regs sm)
+
+-- -----------------------------------------------------------------------------
+
+-- If we do this *before* stack layout, we might be able to avoid
+-- saving some things across calls/procpoints.
+--
+-- *but*, that will invalidate the liveness analysis, and we'll have
+-- to re-do it.
+
+cmmSink :: CmmGraph -> UniqSM CmmGraph
+cmmSink graph = do
+ let liveness = cmmLiveness graph
+ return $ cmmSink' liveness graph
+
+cmmSink' :: BlockEnv CmmLive -> CmmGraph -> CmmGraph
+cmmSink' liveness graph
+ = ofBlockList (g_entry graph) $ sink mapEmpty $ postorderDfs graph
+ where
+
+ sink :: BlockEnv [(LocalReg, CmmExpr)] -> [CmmBlock] -> [CmmBlock]
+ sink _ [] = []
+ sink sunk (b:bs) =
+ pprTrace "sink" (ppr l) $
+ blockJoin first final_middle last : sink sunk' bs
+ where
+ l = entryLabel b
+ (first, middle, last) = blockSplit b
+ (middle', assigs) = walk (blockToList middle) emptyBlock
+ (mapFindWithDefault [] l sunk)
+
+ (dropped_last, assigs') = partition (`conflictsWithLast` last) assigs
+
+ final_middle = foldl blockSnoc middle' (toNodes dropped_last)
+
+ sunk' = mapUnion sunk $
+ mapFromList [ (l, filt assigs' (getLive l))
+ | l <- successors last ]
+ where
+ getLive l = mapFindWithDefault Set.empty l liveness
+ filt as live = [ (r,e) | (r,e) <- as, r `Set.member` live ]
+
+
+walk :: [CmmNode O O] -> Block CmmNode O O -> [(LocalReg, CmmExpr)]
+ -> (Block CmmNode O O, [(LocalReg, CmmExpr)])
+
+walk [] acc as = (acc, as)
+walk (n:ns) acc as
+ | Just a <- collect_it = walk ns acc (a:as)
+ | otherwise = walk ns (foldr (flip blockSnoc) acc (n:drop_nodes)) as'
+ where
+ collect_it = case n of
+ CmmAssign (CmmLocal r) e@(CmmReg (CmmGlobal _)) -> Just (r,e)
+-- CmmAssign (CmmLocal r) e@(CmmLoad addr _) |
+-- foldRegsUsed (\b r -> False) True addr -> Just (r,e)
+ _ -> Nothing
+
+ drop_nodes = toNodes dropped
+ (dropped, as') = partition should_drop as
+ where should_drop a = a `conflicts` n
+
+toNodes :: [(LocalReg,CmmExpr)] -> [CmmNode O O]
+toNodes as = [ CmmAssign (CmmLocal r) rhs | (r,rhs) <- as ]
+
+-- We only sink "r = G" assignments right now, so conflicts is very simple:
+conflicts :: (LocalReg,CmmExpr) -> CmmNode O O -> Bool
+(_, rhs) `conflicts` CmmAssign reg _ | reg `regUsedIn` rhs = True
+--(r, CmmLoad _ _) `conflicts` CmmStore _ _ = True
+(r, _) `conflicts` node
+ = foldRegsUsed (\b r' -> r == r' || b) False node
+
+conflictsWithLast :: (LocalReg,CmmExpr) -> CmmNode O C -> Bool
+(r, _) `conflictsWithLast` node
+ = foldRegsUsed (\b r' -> r == r' || b) False node