diff options
Diffstat (limited to 'compiler/nativeGen/BlockLayout.hs')
-rw-r--r-- | compiler/nativeGen/BlockLayout.hs | 895 |
1 files changed, 0 insertions, 895 deletions
diff --git a/compiler/nativeGen/BlockLayout.hs b/compiler/nativeGen/BlockLayout.hs deleted file mode 100644 index e488f0908f..0000000000 --- a/compiler/nativeGen/BlockLayout.hs +++ /dev/null @@ -1,895 +0,0 @@ --- --- Copyright (c) 2018 Andreas Klebinger --- - -{-# LANGUAGE CPP #-} -{-# LANGUAGE DataKinds #-} -{-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} -{-# LANGUAGE FlexibleContexts #-} - -module BlockLayout - ( sequenceTop ) -where - -#include "HsVersions.h" -import GhcPrelude - -import Instruction -import NCGMonad -import CFG - -import GHC.Cmm.BlockId -import GHC.Cmm -import GHC.Cmm.Dataflow.Collections -import GHC.Cmm.Dataflow.Label - -import GHC.Driver.Session (gopt, GeneralFlag(..), DynFlags, backendMaintainsCfg) -import UniqFM -import Util -import Unique - -import Digraph -import Outputable -import Maybes - --- DEBUGGING ONLY ---import GHC.Cmm.DebugBlock ---import Debug.Trace -import ListSetOps (removeDups) - -import OrdList -import Data.List -import Data.Foldable (toList) - -import qualified Data.Set as Set -import Data.STRef -import Control.Monad.ST.Strict -import Control.Monad (foldM) - -{- - Note [CFG based code layout] - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - The major steps in placing blocks are as follow: - * Compute a CFG based on the Cmm AST, see getCfgProc. - This CFG will have edge weights representing a guess - on how important they are. - * After we convert Cmm to Asm we run `optimizeCFG` which - adds a few more "educated guesses" to the equation. - * Then we run loop analysis on the CFG (`loopInfo`) which tells us - about loop headers, loop nesting levels and the sort. - * Based on the CFG and loop information refine the edge weights - in the CFG and normalize them relative to the most often visited - node. (See `mkGlobalWeights`) - * Feed this CFG into the block layout code (`sequenceTop`) in this - module. Which will then produce a code layout based on the input weights. - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ~~~ Note [Chain based CFG serialization] - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - For additional information also look at - https://gitlab.haskell.org/ghc/ghc/wikis/commentary/compiler/code-layout - - We have a CFG with edge weights based on which we try to place blocks next to - each other. - - Edge weights not only represent likelihood of control transfer between blocks - but also how much a block would benefit from being placed sequentially after - it's predecessor. - For example blocks which are preceded by an info table are more likely to end - up in a different cache line than their predecessor and we can't eliminate the jump - so there is less benefit to placing them sequentially. - - For example consider this example: - - A: ... - jmp cond D (weak successor) - jmp B - B: ... - jmp C - C: ... - jmp X - D: ... - jmp B (weak successor) - - We determine a block layout by building up chunks (calling them chains) of - possible control flows for which blocks will be placed sequentially. - - Eg for our example we might end up with two chains like: - [A->B->C->X],[D]. Blocks inside chains will always be placed sequentially. - However there is no particular order in which chains are placed since - (hopefully) the blocks for which sequentiality is important have already - been placed in the same chain. - - ----------------------------------------------------------------------------- - 1) First try to create a list of good chains. - ----------------------------------------------------------------------------- - - Good chains are these which allow us to eliminate jump instructions. - Which further eliminate often executed jumps first. - - We do so by: - - *) Ignore edges which represent instructions which can not be replaced - by fall through control flow. Primarily calls and edges to blocks which - are prefixed by a info table we have to jump across. - - *) Then process remaining edges in order of frequency taken and: - - +) If source and target have not been placed build a new chain from them. - - +) If source and target have been placed, and are ends of differing chains - try to merge the two chains. - - +) If one side of the edge is a end/front of a chain, add the other block of - to edge to the same chain - - Eg if we look at edge (B -> C) and already have the chain (A -> B) - then we extend the chain to (A -> B -> C). - - +) If the edge was used to modify or build a new chain remove the edge from - our working list. - - *) If there any blocks not being placed into a chain after these steps we place - them into a chain consisting of only this block. - - Ranking edges by their taken frequency, if - two edges compete for fall through on the same target block, the one taken - more often will automatically win out. Resulting in fewer instructions being - executed. - - Creating singleton chains is required for situations where we have code of the - form: - - A: goto B: - <infoTable> - B: goto C: - <infoTable> - C: ... - - As the code in block B is only connected to the rest of the program via edges - which will be ignored in this step we make sure that B still ends up in a chain - this way. - - ----------------------------------------------------------------------------- - 2) We also try to fuse chains. - ----------------------------------------------------------------------------- - - As a result from the above step we still end up with multiple chains which - represent sequential control flow chunks. But they are not yet suitable for - code layout as we need to place *all* blocks into a single sequence. - - In this step we combine chains result from the above step via these steps: - - *) Look at the ranked list of *all* edges, including calls/jumps across info tables - and the like. - - *) Look at each edge and - - +) Given an edge (A -> B) try to find two chains for which - * Block A is at the end of one chain - * Block B is at the front of the other chain. - +) If we find such a chain we "fuse" them into a single chain, remove the - edge from working set and continue. - +) If we can't find such chains we skip the edge and continue. - - ----------------------------------------------------------------------------- - 3) Place indirect successors (neighbours) after each other - ----------------------------------------------------------------------------- - - We might have chains [A,B,C,X],[E] in a CFG of the sort: - - A ---> B ---> C --------> X(exit) - \- ->E- -/ - - While E does not follow X it's still beneficial to place them near each other. - This can be advantageous if eg C,X,E will end up in the same cache line. - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ~~~ Note [Triangle Control Flow] - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - Checking if an argument is already evaluated leads to a somewhat - special case which looks like this: - - A: - if (R1 & 7 != 0) goto Leval; else goto Lwork; - Leval: // global - call (I64[R1])(R1) returns to Lwork, args: 8, res: 8, upd: 8; - Lwork: // global - ... - - A - |\ - | Leval - |/ - (This edge can be missing because of optimizations) - Lwork - - Once we hit the metal the call instruction is just 2-3 bytes large - depending on the register used. So we lay out the assembly like this: - - movq %rbx,%rax - andl $7,%eax - cmpq $1,%rax - jne Lwork - Leval: - jmp *(%rbx) # encoded in 2-3 bytes. - <info table> - Lwork: - ... - - We could explicitly check for this control flow pattern. - - This is advantageous because: - * It's optimal if the argument isn't evaluated. - * If it's evaluated we only have the extra cost of jumping over - the 2-3 bytes for the call. - * Guarantees the smaller encoding for the conditional jump. - - However given that Lwork usually has an info table we - penalize this edge. So Leval should get placed first - either way and things work out for the best. - - Optimizing for the evaluated case instead would penalize - the other code path. It adds an jump as we can't fall through - to Lwork because of the info table. - Assuming that Lwork is large the chance that the "call" ends up - in the same cache line is also fairly small. - --} - - --- | Look at X number of blocks in two chains to determine --- if they are "neighbours". -neighbourOverlapp :: Int -neighbourOverlapp = 2 - --- | Maps blocks near the end of a chain to it's chain AND --- the other blocks near the end. --- [A,B,C,D,E] Gives entries like (B -> ([A,B], [A,B,C,D,E])) --- where [A,B] are blocks in the end region of a chain. --- This is cheaper then recomputing the ends multiple times. -type FrontierMap = LabelMap ([BlockId],BlockChain) - --- | A non empty ordered sequence of basic blocks. --- It is suitable for serialization in this order. --- --- We use OrdList instead of [] to allow fast append on both sides --- when combining chains. -newtype BlockChain - = BlockChain { chainBlocks :: (OrdList BlockId) } - --- All chains are constructed the same way so comparison --- including structure is faster. -instance Eq BlockChain where - BlockChain b1 == BlockChain b2 = strictlyEqOL b1 b2 - --- Useful for things like sets and debugging purposes, sorts by blocks --- in the chain. -instance Ord (BlockChain) where - (BlockChain lbls1) `compare` (BlockChain lbls2) - = ASSERT(toList lbls1 /= toList lbls2 || lbls1 `strictlyEqOL` lbls2) - strictlyOrdOL lbls1 lbls2 - -instance Outputable (BlockChain) where - ppr (BlockChain blks) = - parens (text "Chain:" <+> ppr (fromOL $ blks) ) - -chainFoldl :: (b -> BlockId -> b) -> b -> BlockChain -> b -chainFoldl f z (BlockChain blocks) = foldl' f z blocks - -noDups :: [BlockChain] -> Bool -noDups chains = - let chainBlocks = concatMap chainToBlocks chains :: [BlockId] - (_blocks, dups) = removeDups compare chainBlocks - in if null dups then True - else pprTrace "Duplicates:" (ppr (map toList dups) $$ text "chains" <+> ppr chains ) False - -inFront :: BlockId -> BlockChain -> Bool -inFront bid (BlockChain seq) - = headOL seq == bid - -chainSingleton :: BlockId -> BlockChain -chainSingleton lbl - = BlockChain (unitOL lbl) - -chainFromList :: [BlockId] -> BlockChain -chainFromList = BlockChain . toOL - -chainSnoc :: BlockChain -> BlockId -> BlockChain -chainSnoc (BlockChain blks) lbl - = BlockChain (blks `snocOL` lbl) - -chainCons :: BlockId -> BlockChain -> BlockChain -chainCons lbl (BlockChain blks) - = BlockChain (lbl `consOL` blks) - -chainConcat :: BlockChain -> BlockChain -> BlockChain -chainConcat (BlockChain blks1) (BlockChain blks2) - = BlockChain (blks1 `appOL` blks2) - -chainToBlocks :: BlockChain -> [BlockId] -chainToBlocks (BlockChain blks) = fromOL blks - --- | Given the Chain A -> B -> C -> D and we break at C --- we get the two Chains (A -> B, C -> D) as result. -breakChainAt :: BlockId -> BlockChain - -> (BlockChain,BlockChain) -breakChainAt bid (BlockChain blks) - | not (bid == head rblks) - = panic "Block not in chain" - | otherwise - = (BlockChain (toOL lblks), - BlockChain (toOL rblks)) - where - (lblks, rblks) = break (\lbl -> lbl == bid) (fromOL blks) - -takeR :: Int -> BlockChain -> [BlockId] -takeR n (BlockChain blks) = - take n . fromOLReverse $ blks - -takeL :: Int -> BlockChain -> [BlockId] -takeL n (BlockChain blks) = - take n . fromOL $ blks - --- Note [Combining neighborhood chains] --- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - --- See also Note [Chain based CFG serialization] --- We have the chains (A-B-C-D) and (E-F) and an Edge C->E. --- --- While placing the latter after the former doesn't result in sequential --- control flow it is still beneficial. As block C and E might end --- up in the same cache line. --- --- So we place these chains next to each other even if we can't fuse them. --- --- A -> B -> C -> D --- v --- - -> E -> F ... --- --- A simple heuristic to chose which chains we want to combine: --- * Process edges in descending priority. --- * Check if there is a edge near the end of one chain which goes --- to a block near the start of another edge. --- --- While we could take into account the space between the two blocks which --- share an edge this blows up compile times quite a bit. It requires --- us to find all edges between two chains, check the distance for all edges, --- rank them based on the distance and only then we can select two chains --- to combine. Which would add a lot of complexity for little gain. --- --- So instead we just rank by the strength of the edge and use the first pair we --- find. - --- | For a given list of chains and edges try to combine chains with strong --- edges between them. -combineNeighbourhood :: [CfgEdge] -- ^ Edges to consider - -> [BlockChain] -- ^ Current chains of blocks - -> ([BlockChain], Set.Set (BlockId,BlockId)) - -- ^ Resulting list of block chains, and a set of edges which - -- were used to fuse chains and as such no longer need to be - -- considered. -combineNeighbourhood edges chains - = -- pprTraceIt "Neighbours" $ - -- pprTrace "combineNeighbours" (ppr edges) $ - applyEdges edges endFrontier startFrontier (Set.empty) - where - --Build maps from chain ends to chains - endFrontier, startFrontier :: FrontierMap - endFrontier = - mapFromList $ concatMap (\chain -> - let ends = getEnds chain :: [BlockId] - entry = (ends,chain) - in map (\x -> (x,entry)) ends ) chains - startFrontier = - mapFromList $ concatMap (\chain -> - let front = getFronts chain - entry = (front,chain) - in map (\x -> (x,entry)) front) chains - applyEdges :: [CfgEdge] -> FrontierMap -> FrontierMap -> Set.Set (BlockId, BlockId) - -> ([BlockChain], Set.Set (BlockId,BlockId)) - applyEdges [] chainEnds _chainFronts combined = - (ordNub $ map snd $ mapElems chainEnds, combined) - applyEdges ((CfgEdge from to _w):edges) chainEnds chainFronts combined - | Just (c1_e,c1) <- mapLookup from chainEnds - , Just (c2_f,c2) <- mapLookup to chainFronts - , c1 /= c2 -- Avoid trying to concat a chain with itself. - = let newChain = chainConcat c1 c2 - newChainFrontier = getFronts newChain - newChainEnds = getEnds newChain - newFronts :: FrontierMap - newFronts = - let withoutOld = - foldl' (\m b -> mapDelete b m :: FrontierMap) chainFronts (c2_f ++ getFronts c1) - entry = - (newChainFrontier,newChain) --let bound to ensure sharing - in foldl' (\m x -> mapInsert x entry m) - withoutOld newChainFrontier - - newEnds = - let withoutOld = foldl' (\m b -> mapDelete b m) chainEnds (c1_e ++ getEnds c2) - entry = (newChainEnds,newChain) --let bound to ensure sharing - in foldl' (\m x -> mapInsert x entry m) - withoutOld newChainEnds - in - -- pprTrace "ApplyEdges" - -- (text "before" $$ - -- text "fronts" <+> ppr chainFronts $$ - -- text "ends" <+> ppr chainEnds $$ - - -- text "various" $$ - -- text "newChain" <+> ppr newChain $$ - -- text "newChainFrontier" <+> ppr newChainFrontier $$ - -- text "newChainEnds" <+> ppr newChainEnds $$ - -- text "drop" <+> ppr ((c2_f ++ getFronts c1) ++ (c1_e ++ getEnds c2)) $$ - - -- text "after" $$ - -- text "fronts" <+> ppr newFronts $$ - -- text "ends" <+> ppr newEnds - -- ) - applyEdges edges newEnds newFronts (Set.insert (from,to) combined) - | otherwise - = applyEdges edges chainEnds chainFronts combined - where - - getFronts chain = takeL neighbourOverlapp chain - getEnds chain = takeR neighbourOverlapp chain - --- In the last stop we combine all chains into a single one. --- Trying to place chains with strong edges next to each other. -mergeChains :: [CfgEdge] -> [BlockChain] - -> (BlockChain) -mergeChains edges chains - = -- pprTrace "combine" (ppr edges) $ - runST $ do - let addChain m0 chain = do - ref <- newSTRef chain - return $ chainFoldl (\m' b -> mapInsert b ref m') m0 chain - chainMap' <- foldM (\m0 c -> addChain m0 c) mapEmpty chains - merge edges chainMap' - where - -- We keep a map from ALL blocks to their respective chain (sigh) - -- This is required since when looking at an edge we need to find - -- the associated chains quickly. - -- We use a map of STRefs, maintaining a invariant of one STRef per chain. - -- When merging chains we can update the - -- STRef of one chain once (instead of writing to the map for each block). - -- We then overwrite the STRefs for the other chain so there is again only - -- a single STRef for the combined chain. - -- The difference in terms of allocations saved is ~0.2% with -O so actually - -- significant compared to using a regular map. - - merge :: forall s. [CfgEdge] -> LabelMap (STRef s BlockChain) -> ST s BlockChain - merge [] chains = do - chains' <- ordNub <$> (mapM readSTRef $ mapElems chains) :: ST s [BlockChain] - return $ foldl' chainConcat (head chains') (tail chains') - merge ((CfgEdge from to _):edges) chains - -- | pprTrace "merge" (ppr (from,to) <> ppr chains) False - -- = undefined - | cFrom == cTo - = merge edges chains - | otherwise - = do - chains' <- mergeComb cFrom cTo - merge edges chains' - where - mergeComb :: STRef s BlockChain -> STRef s BlockChain -> ST s (LabelMap (STRef s BlockChain)) - mergeComb refFrom refTo = do - cRight <- readSTRef refTo - chain <- pure chainConcat <*> readSTRef refFrom <*> pure cRight - writeSTRef refFrom chain - return $ chainFoldl (\m b -> mapInsert b refFrom m) chains cRight - - cFrom = expectJust "mergeChains:chainMap:from" $ mapLookup from chains - cTo = expectJust "mergeChains:chainMap:to" $ mapLookup to chains - - --- See Note [Chain based CFG serialization] for the general idea. --- This creates and fuses chains at the same time for performance reasons. - --- Try to build chains from a list of edges. --- Edges must be sorted **descending** by their priority. --- Returns the constructed chains, along with all edges which --- are irrelevant past this point, this information doesn't need --- to be complete - it's only used to speed up the process. --- An Edge is irrelevant if the ends are part of the same chain. --- We say these edges are already linked -buildChains :: [CfgEdge] -> [BlockId] - -> ( LabelMap BlockChain -- Resulting chains, indexd by end if chain. - , Set.Set (BlockId, BlockId)) --List of fused edges. -buildChains edges blocks - = runST $ buildNext setEmpty mapEmpty mapEmpty edges Set.empty - where - -- buildNext builds up chains from edges one at a time. - - -- We keep a map from the ends of chains to the chains. - -- This we we can easily check if an block should be appended to an - -- existing chain! - -- We store them using STRefs so we don't have to rebuild the spine of both - -- maps every time we update a chain. - buildNext :: forall s. LabelSet - -> LabelMap (STRef s BlockChain) -- Map from end of chain to chain. - -> LabelMap (STRef s BlockChain) -- Map from start of chain to chain. - -> [CfgEdge] -- Edges to check - ordered by decreasing weight - -> Set.Set (BlockId, BlockId) -- Used edges - -> ST s ( LabelMap BlockChain -- Chains by end - , Set.Set (BlockId, BlockId) --List of fused edges - ) - buildNext placed _chainStarts chainEnds [] linked = do - ends' <- sequence $ mapMap readSTRef chainEnds :: ST s (LabelMap BlockChain) - -- Any remaining blocks have to be made to singleton chains. - -- They might be combined with other chains later on outside this function. - let unplaced = filter (\x -> not (setMember x placed)) blocks - singletons = map (\x -> (x,chainSingleton x)) unplaced :: [(BlockId,BlockChain)] - return (foldl' (\m (k,v) -> mapInsert k v m) ends' singletons , linked) - buildNext placed chainStarts chainEnds (edge:todo) linked - | from == to - -- We skip self edges - = buildNext placed chainStarts chainEnds todo (Set.insert (from,to) linked) - | not (alreadyPlaced from) && - not (alreadyPlaced to) - = do - --pprTraceM "Edge-Chain:" (ppr edge) - chain' <- newSTRef $ chainFromList [from,to] - buildNext - (setInsert to (setInsert from placed)) - (mapInsert from chain' chainStarts) - (mapInsert to chain' chainEnds) - todo - (Set.insert (from,to) linked) - - | (alreadyPlaced from) && - (alreadyPlaced to) - , Just predChain <- mapLookup from chainEnds - , Just succChain <- mapLookup to chainStarts - , predChain /= succChain -- Otherwise we try to create a cycle. - = do - -- pprTraceM "Fusing edge" (ppr edge) - fuseChain predChain succChain - - | (alreadyPlaced from) && - (alreadyPlaced to) - = --pprTraceM "Skipping:" (ppr edge) >> - buildNext placed chainStarts chainEnds todo linked - - | otherwise - = do -- pprTraceM "Finding chain for:" (ppr edge $$ - -- text "placed" <+> ppr placed) - findChain - where - from = edgeFrom edge - to = edgeTo edge - alreadyPlaced blkId = (setMember blkId placed) - - -- Combine two chains into a single one. - fuseChain :: STRef s BlockChain -> STRef s BlockChain - -> ST s ( LabelMap BlockChain -- Chains by end - , Set.Set (BlockId, BlockId) --List of fused edges - ) - fuseChain fromRef toRef = do - fromChain <- readSTRef fromRef - toChain <- readSTRef toRef - let newChain = chainConcat fromChain toChain - ref <- newSTRef newChain - let start = head $ takeL 1 newChain - let end = head $ takeR 1 newChain - -- chains <- sequence $ mapMap readSTRef chainStarts - -- pprTraceM "pre-fuse chains:" $ ppr chains - buildNext - placed - (mapInsert start ref $ mapDelete to $ chainStarts) - (mapInsert end ref $ mapDelete from $ chainEnds) - todo - (Set.insert (from,to) linked) - - - --Add the block to a existing chain or creates a new chain - findChain :: ST s ( LabelMap BlockChain -- Chains by end - , Set.Set (BlockId, BlockId) --List of fused edges - ) - findChain - -- We can attach the block to the end of a chain - | alreadyPlaced from - , Just predChain <- mapLookup from chainEnds - = do - chain <- readSTRef predChain - let newChain = chainSnoc chain to - writeSTRef predChain newChain - let chainEnds' = mapInsert to predChain $ mapDelete from chainEnds - -- chains <- sequence $ mapMap readSTRef chainStarts - -- pprTraceM "from chains:" $ ppr chains - buildNext (setInsert to placed) chainStarts chainEnds' todo (Set.insert (from,to) linked) - -- We can attack it to the front of a chain - | alreadyPlaced to - , Just succChain <- mapLookup to chainStarts - = do - chain <- readSTRef succChain - let newChain = from `chainCons` chain - writeSTRef succChain newChain - let chainStarts' = mapInsert from succChain $ mapDelete to chainStarts - -- chains <- sequence $ mapMap readSTRef chainStarts' - -- pprTraceM "to chains:" $ ppr chains - buildNext (setInsert from placed) chainStarts' chainEnds todo (Set.insert (from,to) linked) - -- The placed end of the edge is part of a chain already and not an end. - | otherwise - = do - let block = if alreadyPlaced to then from else to - --pprTraceM "Singleton" $ ppr block - let newChain = chainSingleton block - ref <- newSTRef newChain - buildNext (setInsert block placed) (mapInsert block ref chainStarts) - (mapInsert block ref chainEnds) todo (linked) - where - alreadyPlaced blkId = (setMember blkId placed) - --- | Place basic blocks based on the given CFG. --- See Note [Chain based CFG serialization] -sequenceChain :: forall a i. (Instruction i, Outputable i) - => LabelMap a -- ^ Keys indicate an info table on the block. - -> CFG -- ^ Control flow graph and some meta data. - -> [GenBasicBlock i] -- ^ List of basic blocks to be placed. - -> [GenBasicBlock i] -- ^ Blocks placed in sequence. -sequenceChain _info _weights [] = [] -sequenceChain _info _weights [x] = [x] -sequenceChain info weights' blocks@((BasicBlock entry _):_) = - let weights :: CFG - weights = --pprTrace "cfg'" (pprEdgeWeights cfg') - cfg' - where - (_, globalEdgeWeights) = {-# SCC mkGlobalWeights #-} mkGlobalWeights entry weights' - cfg' = {-# SCC rewriteEdges #-} - mapFoldlWithKey - (\cfg from m -> - mapFoldlWithKey - (\cfg to w -> setEdgeWeight cfg (EdgeWeight w) from to ) - cfg m ) - weights' - globalEdgeWeights - - directEdges :: [CfgEdge] - directEdges = sortBy (flip compare) $ catMaybes . map relevantWeight $ (infoEdgeList weights) - where - relevantWeight :: CfgEdge -> Maybe CfgEdge - relevantWeight edge@(CfgEdge from to edgeInfo) - | (EdgeInfo CmmSource { trans_cmmNode = CmmCall {} } _) <- edgeInfo - -- Ignore edges across calls - = Nothing - | mapMember to info - , w <- edgeWeight edgeInfo - -- The payoff is small if we jump over an info table - = Just (CfgEdge from to edgeInfo { edgeWeight = w/8 }) - | otherwise - = Just edge - - blockMap :: LabelMap (GenBasicBlock i) - blockMap - = foldl' (\m blk@(BasicBlock lbl _ins) -> - mapInsert lbl blk m) - mapEmpty blocks - - (builtChains, builtEdges) - = {-# SCC "buildChains" #-} - --pprTraceIt "generatedChains" $ - --pprTrace "blocks" (ppr (mapKeys blockMap)) $ - buildChains directEdges (mapKeys blockMap) - - rankedEdges :: [CfgEdge] - -- Sort descending by weight, remove fused edges - rankedEdges = - filter (\edge -> not (Set.member (edgeFrom edge,edgeTo edge) builtEdges)) $ - directEdges - - (neighbourChains, combined) - = ASSERT(noDups $ mapElems builtChains) - {-# SCC "groupNeighbourChains" #-} - -- pprTraceIt "NeighbourChains" $ - combineNeighbourhood rankedEdges (mapElems builtChains) - - - allEdges :: [CfgEdge] - allEdges = {-# SCC allEdges #-} - sortOn (relevantWeight) $ filter (not . deadEdge) $ (infoEdgeList weights) - where - deadEdge :: CfgEdge -> Bool - deadEdge (CfgEdge from to _) = let e = (from,to) in Set.member e combined || Set.member e builtEdges - relevantWeight :: CfgEdge -> EdgeWeight - relevantWeight (CfgEdge _ _ edgeInfo) - | EdgeInfo (CmmSource { trans_cmmNode = CmmCall {}}) _ <- edgeInfo - -- Penalize edges across calls - = weight/(64.0) - | otherwise - = weight - where - -- negate to sort descending - weight = negate (edgeWeight edgeInfo) - - masterChain = - {-# SCC "mergeChains" #-} - -- pprTraceIt "MergedChains" $ - mergeChains allEdges neighbourChains - - --Make sure the first block stays first - prepedChains - | inFront entry masterChain - = [masterChain] - | (rest,entry) <- breakChainAt entry masterChain - = [entry,rest] -#if __GLASGOW_HASKELL__ <= 810 - | otherwise = pprPanic "Entry point eliminated" $ - ppr masterChain -#endif - - blockList - = ASSERT(noDups [masterChain]) - (concatMap fromOL $ map chainBlocks prepedChains) - - --chainPlaced = setFromList $ map blockId blockList :: LabelSet - chainPlaced = setFromList $ blockList :: LabelSet - unplaced = - let blocks = mapKeys blockMap - isPlaced b = setMember (b) chainPlaced - in filter (\block -> not (isPlaced block)) blocks - - placedBlocks = - -- We want debug builds to catch this as it's a good indicator for - -- issues with CFG invariants. But we don't want to blow up production - -- builds if something slips through. - ASSERT(null unplaced) - --pprTraceIt "placedBlocks" $ - -- ++ [] is stil kinda expensive - if null unplaced then blockList else blockList ++ unplaced - getBlock bid = expectJust "Block placement" $ mapLookup bid blockMap - in - --Assert we placed all blocks given as input - ASSERT(all (\bid -> mapMember bid blockMap) placedBlocks) - dropJumps info $ map getBlock placedBlocks - -{-# SCC dropJumps #-} --- | Remove redundant jumps between blocks when we can rely on --- fall through. -dropJumps :: forall a i. Instruction i => LabelMap a -> [GenBasicBlock i] - -> [GenBasicBlock i] -dropJumps _ [] = [] -dropJumps info ((BasicBlock lbl ins):todo) - | not . null $ ins --This can happen because of shortcutting - , [dest] <- jumpDestsOfInstr (last ins) - , ((BasicBlock nextLbl _) : _) <- todo - , not (mapMember dest info) - , nextLbl == dest - = BasicBlock lbl (init ins) : dropJumps info todo - | otherwise - = BasicBlock lbl ins : dropJumps info todo - - --- ----------------------------------------------------------------------------- --- Sequencing the basic blocks - --- Cmm BasicBlocks are self-contained entities: they always end in a --- jump, either non-local or to another basic block in the same proc. --- In this phase, we attempt to place the basic blocks in a sequence --- such that as many of the local jumps as possible turn into --- fallthroughs. - -sequenceTop - :: (Instruction instr, Outputable instr) - => DynFlags -- Determine which layout algo to use - -> NcgImpl statics instr jumpDest - -> Maybe CFG -- ^ CFG if we have one. - -> NatCmmDecl statics instr -- ^ Function to serialize - -> NatCmmDecl statics instr - -sequenceTop _ _ _ top@(CmmData _ _) = top -sequenceTop dflags ncgImpl edgeWeights - (CmmProc info lbl live (ListGraph blocks)) - | (gopt Opt_CfgBlocklayout dflags) && backendMaintainsCfg dflags - --Use chain based algorithm - , Just cfg <- edgeWeights - = CmmProc info lbl live ( ListGraph $ ncgMakeFarBranches ncgImpl info $ - {-# SCC layoutBlocks #-} - sequenceChain info cfg blocks ) - | otherwise - --Use old algorithm - = let cfg = if dontUseCfg then Nothing else edgeWeights - in CmmProc info lbl live ( ListGraph $ ncgMakeFarBranches ncgImpl info $ - {-# SCC layoutBlocks #-} - sequenceBlocks cfg info blocks) - where - dontUseCfg = gopt Opt_WeightlessBlocklayout dflags || - (not $ backendMaintainsCfg dflags) - --- The old algorithm: --- It is very simple (and stupid): We make a graph out of --- the blocks where there is an edge from one block to another iff the --- first block ends by jumping to the second. Then we topologically --- sort this graph. Then traverse the list: for each block, we first --- output the block, then if it has an out edge, we move the --- destination of the out edge to the front of the list, and continue. - --- FYI, the classic layout for basic blocks uses postorder DFS; this --- algorithm is implemented in Hoopl. - -sequenceBlocks :: Instruction inst => Maybe CFG -> LabelMap a - -> [GenBasicBlock inst] -> [GenBasicBlock inst] -sequenceBlocks _edgeWeight _ [] = [] -sequenceBlocks edgeWeights infos (entry:blocks) = - let entryNode = mkNode edgeWeights entry - bodyNodes = reverse - (flattenSCCs (sccBlocks edgeWeights blocks)) - in dropJumps infos . seqBlocks infos $ ( entryNode : bodyNodes) - -- the first block is the entry point ==> it must remain at the start. - -sccBlocks - :: Instruction instr - => Maybe CFG -> [NatBasicBlock instr] - -> [SCC (Node BlockId (NatBasicBlock instr))] -sccBlocks edgeWeights blocks = - stronglyConnCompFromEdgedVerticesUniqR - (map (mkNode edgeWeights) blocks) - -mkNode :: (Instruction t) - => Maybe CFG -> GenBasicBlock t - -> Node BlockId (GenBasicBlock t) -mkNode edgeWeights block@(BasicBlock id instrs) = - DigraphNode block id outEdges - where - outEdges :: [BlockId] - outEdges - --Select the heaviest successor, ignore weights <= zero - = successor - where - successor - | Just successors <- fmap (`getSuccEdgesSorted` id) - edgeWeights -- :: Maybe [(Label, EdgeInfo)] - = case successors of - [] -> [] - ((target,info):_) - | length successors > 2 || edgeWeight info <= 0 -> [] - | otherwise -> [target] - | otherwise - = case jumpDestsOfInstr (last instrs) of - [one] -> [one] - _many -> [] - - -seqBlocks :: LabelMap i -> [Node BlockId (GenBasicBlock t1)] - -> [GenBasicBlock t1] -seqBlocks infos blocks = placeNext pullable0 todo0 - where - -- pullable: Blocks that are not yet placed - -- todo: Original order of blocks, to be followed if we have no good - -- reason not to; - -- may include blocks that have already been placed, but then - -- these are not in pullable - pullable0 = listToUFM [ (i,(b,n)) | DigraphNode b i n <- blocks ] - todo0 = map node_key blocks - - placeNext _ [] = [] - placeNext pullable (i:rest) - | Just (block, pullable') <- lookupDeleteUFM pullable i - = place pullable' rest block - | otherwise - -- We already placed this block, so ignore - = placeNext pullable rest - - place pullable todo (block,[]) - = block : placeNext pullable todo - place pullable todo (block@(BasicBlock id instrs),[next]) - | mapMember next infos - = block : placeNext pullable todo - | Just (nextBlock, pullable') <- lookupDeleteUFM pullable next - = BasicBlock id instrs : place pullable' todo nextBlock - | otherwise - = block : placeNext pullable todo - place _ _ (_,tooManyNextNodes) - = pprPanic "seqBlocks" (ppr tooManyNextNodes) - - -lookupDeleteUFM :: Uniquable key => UniqFM elt -> key - -> Maybe (elt, UniqFM elt) -lookupDeleteUFM m k = do -- Maybe monad - v <- lookupUFM m k - return (v, delFromUFM m k) |