diff options
Diffstat (limited to 'compiler/GHC/CmmToAsm/CFG.hs')
-rw-r--r-- | compiler/GHC/CmmToAsm/CFG.hs | 1320 |
1 files changed, 1320 insertions, 0 deletions
diff --git a/compiler/GHC/CmmToAsm/CFG.hs b/compiler/GHC/CmmToAsm/CFG.hs new file mode 100644 index 0000000000..f52ff514b1 --- /dev/null +++ b/compiler/GHC/CmmToAsm/CFG.hs @@ -0,0 +1,1320 @@ +-- +-- Copyright (c) 2018 Andreas Klebinger +-- + +{-# LANGUAGE TypeFamilies, ScopedTypeVariables #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE Rank2Types #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE DataKinds #-} + +module GHC.CmmToAsm.CFG + ( CFG, CfgEdge(..), EdgeInfo(..), EdgeWeight(..) + , TransitionSource(..) + + --Modify the CFG + , addWeightEdge, addEdge + , delEdge, delNode + , addNodesBetween, shortcutWeightMap + , reverseEdges, filterEdges + , addImmediateSuccessor + , mkWeightInfo, adjustEdgeWeight, setEdgeWeight + + --Query the CFG + , infoEdgeList, edgeList + , getSuccessorEdges, getSuccessors + , getSuccEdgesSorted + , getEdgeInfo + , getCfgNodes, hasNode + + -- Loop Information + , loopMembers, loopLevels, loopInfo + + --Construction/Misc + , getCfg, getCfgProc, pprEdgeWeights, sanityCheckCfg + + --Find backedges and update their weight + , optimizeCFG + , mkGlobalWeights + + ) +where + +#include "HsVersions.h" + +import GhcPrelude + +import GHC.Cmm.BlockId +import GHC.Cmm as Cmm + +import GHC.Cmm.Utils +import GHC.Cmm.Switch +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm.Dataflow.Label +import GHC.Cmm.Dataflow.Block +import qualified GHC.Cmm.Dataflow.Graph as G + +import Util +import Digraph +import Maybes + +import Unique +import qualified GHC.CmmToAsm.CFG.Dominators as Dom +import Data.IntMap.Strict (IntMap) +import Data.IntSet (IntSet) + +import qualified Data.IntMap.Strict as IM +import qualified Data.Map as M +import qualified Data.IntSet as IS +import qualified Data.Set as S +import Data.Tree +import Data.Bifunctor + +import Outputable +-- DEBUGGING ONLY +--import GHC.Cmm.DebugBlock +--import OrdList +--import GHC.Cmm.DebugBlock.Trace +import GHC.Cmm.Ppr () -- For Outputable instances +import qualified GHC.Driver.Session as D + +import Data.List (sort, nub, partition) +import Data.STRef.Strict +import Control.Monad.ST + +import Data.Array.MArray +import Data.Array.ST +import Data.Array.IArray +import Data.Array.Unsafe (unsafeFreeze) +import Data.Array.Base (unsafeRead, unsafeWrite) + +import Control.Monad + +type Prob = Double + +type Edge = (BlockId, BlockId) +type Edges = [Edge] + +newtype EdgeWeight + = EdgeWeight { weightToDouble :: Double } + deriving (Eq,Ord,Enum,Num,Real,Fractional) + +instance Outputable EdgeWeight where + ppr (EdgeWeight w) = doublePrec 5 w + +type EdgeInfoMap edgeInfo = LabelMap (LabelMap edgeInfo) + +-- | A control flow graph where edges have been annotated with a weight. +-- Implemented as IntMap (IntMap <edgeData>) +-- We must uphold the invariant that for each edge A -> B we must have: +-- A entry B in the outer map. +-- A entry B in the map we get when looking up A. +-- Maintaining this invariant is useful as any failed lookup now indicates +-- an actual error in code which might go unnoticed for a while +-- otherwise. +type CFG = EdgeInfoMap EdgeInfo + +data CfgEdge + = CfgEdge + { edgeFrom :: !BlockId + , edgeTo :: !BlockId + , edgeInfo :: !EdgeInfo + } + +-- | Careful! Since we assume there is at most one edge from A to B +-- the Eq instance does not consider weight. +instance Eq CfgEdge where + (==) (CfgEdge from1 to1 _) (CfgEdge from2 to2 _) + = from1 == from2 && to1 == to2 + +-- | Edges are sorted ascending pointwise by weight, source and destination +instance Ord CfgEdge where + compare (CfgEdge from1 to1 (EdgeInfo {edgeWeight = weight1})) + (CfgEdge from2 to2 (EdgeInfo {edgeWeight = weight2})) + | weight1 < weight2 || weight1 == weight2 && from1 < from2 || + weight1 == weight2 && from1 == from2 && to1 < to2 + = LT + | from1 == from2 && to1 == to2 && weight1 == weight2 + = EQ + | otherwise + = GT + +instance Outputable CfgEdge where + ppr (CfgEdge from1 to1 edgeInfo) + = parens (ppr from1 <+> text "-(" <> ppr edgeInfo <> text ")->" <+> ppr to1) + +-- | Can we trace back a edge to a specific Cmm Node +-- or has it been introduced during assembly codegen. We use this to maintain +-- some information which would otherwise be lost during the +-- Cmm <-> asm transition. +-- See also Note [Inverting Conditional Branches] +data TransitionSource + = CmmSource { trans_cmmNode :: (CmmNode O C) + , trans_info :: BranchInfo } + | AsmCodeGen + deriving (Eq) + +data BranchInfo = NoInfo -- ^ Unknown, but not heap or stack check. + | HeapStackCheck -- ^ Heap or stack check + deriving Eq + +instance Outputable BranchInfo where + ppr NoInfo = text "regular" + ppr HeapStackCheck = text "heap/stack" + +isHeapOrStackCheck :: TransitionSource -> Bool +isHeapOrStackCheck (CmmSource { trans_info = HeapStackCheck}) = True +isHeapOrStackCheck _ = False + +-- | Information about edges +data EdgeInfo + = EdgeInfo + { transitionSource :: !TransitionSource + , edgeWeight :: !EdgeWeight + } deriving (Eq) + +instance Outputable EdgeInfo where + ppr edgeInfo = text "weight:" <+> ppr (edgeWeight edgeInfo) + +-- | Convenience function, generate edge info based +-- on weight not originating from cmm. +mkWeightInfo :: EdgeWeight -> EdgeInfo +mkWeightInfo = EdgeInfo AsmCodeGen + +-- | Adjust the weight between the blocks using the given function. +-- If there is no such edge returns the original map. +adjustEdgeWeight :: CFG -> (EdgeWeight -> EdgeWeight) + -> BlockId -> BlockId -> CFG +adjustEdgeWeight cfg f from to + | Just info <- getEdgeInfo from to cfg + , !weight <- edgeWeight info + , !newWeight <- f weight + = addEdge from to (info { edgeWeight = newWeight}) cfg + | otherwise = cfg + +-- | Set the weight between the blocks to the given weight. +-- If there is no such edge returns the original map. +setEdgeWeight :: CFG -> EdgeWeight + -> BlockId -> BlockId -> CFG +setEdgeWeight cfg !weight from to + | Just info <- getEdgeInfo from to cfg + = addEdge from to (info { edgeWeight = weight}) cfg + | otherwise = cfg + + +getCfgNodes :: CFG -> [BlockId] +getCfgNodes m = + mapKeys m + +-- | Is this block part of this graph? +hasNode :: CFG -> BlockId -> Bool +hasNode m node = + -- Check the invariant that each node must exist in the first map or not at all. + ASSERT( found || not (any (mapMember node) m)) + found + where + found = mapMember node m + + + +-- | Check if the nodes in the cfg and the set of blocks are the same. +-- In a case of a missmatch we panic and show the difference. +sanityCheckCfg :: CFG -> LabelSet -> SDoc -> Bool +sanityCheckCfg m blockSet msg + | blockSet == cfgNodes + = True + | otherwise = + pprPanic "Block list and cfg nodes don't match" ( + text "difference:" <+> ppr diff $$ + text "blocks:" <+> ppr blockSet $$ + text "cfg:" <+> pprEdgeWeights m $$ + msg ) + False + where + cfgNodes = setFromList $ getCfgNodes m :: LabelSet + diff = (setUnion cfgNodes blockSet) `setDifference` (setIntersection cfgNodes blockSet) :: LabelSet + +-- | Filter the CFG with a custom function f. +-- Paramaeters are `f from to edgeInfo` +filterEdges :: (BlockId -> BlockId -> EdgeInfo -> Bool) -> CFG -> CFG +filterEdges f cfg = + mapMapWithKey filterSources cfg + where + filterSources from m = + mapFilterWithKey (\to w -> f from to w) m + + +{- Note [Updating the CFG during shortcutting] + +See Note [What is shortcutting] in the control flow optimization +code (GHC.Cmm.ContFlowOpt) for a slightly more in depth explanation on shortcutting. + +In the native backend we shortcut jumps at the assembly level. (AsmCodeGen.hs) +This means we remove blocks containing only one jump from the code +and instead redirecting all jumps targeting this block to the deleted +blocks jump target. + +However we want to have an accurate representation of control +flow in the CFG. So we add/remove edges accordingly to account +for the eliminated blocks and new edges. + +If we shortcut A -> B -> C to A -> C: +* We delete edges A -> B and B -> C +* Replacing them with the edge A -> C + +We also try to preserve jump weights while doing so. + +Note that: +* The edge B -> C can't have interesting weights since + the block B consists of a single unconditional jump without branching. +* We delete the edge A -> B and add the edge A -> C. +* The edge A -> B can be one of many edges originating from A so likely + has edge weights we want to preserve. + +For this reason we simply store the edge info from the original A -> B +edge and apply this information to the new edge A -> C. + +Sometimes we have a scenario where jump target C is not represented by an +BlockId but an immediate value. I'm only aware of this happening without +tables next to code currently. + +Then we go from A ---> B - -> IMM to A - -> IMM where the dashed arrows +are not stored in the CFG. + +In that case we simply delete the edge A -> B. + +In terms of implementation the native backend first builds a mapping +from blocks suitable for shortcutting to their jump targets. +Then it redirects all jump instructions to these blocks using the +built up mapping. +This function (shortcutWeightMap) takes the same mapping and +applies the mapping to the CFG in the way laid out above. + +-} +shortcutWeightMap :: LabelMap (Maybe BlockId) -> CFG -> CFG +shortcutWeightMap cuts cfg = + foldl' applyMapping cfg $ mapToList cuts + where +-- takes the tuple (B,C) from the notation in [Updating the CFG during shortcutting] + applyMapping :: CFG -> (BlockId,Maybe BlockId) -> CFG + --Shortcut immediate + applyMapping m (from, Nothing) = + mapDelete from . + fmap (mapDelete from) $ m + --Regular shortcut + applyMapping m (from, Just to) = + let updatedMap :: CFG + updatedMap + = fmap (shortcutEdge (from,to)) $ + (mapDelete from m :: CFG ) + --Sometimes we can shortcut multiple blocks like so: + -- A -> B -> C -> D -> E => A -> E + -- so we check for such chains. + in case mapLookup to cuts of + Nothing -> updatedMap + Just dest -> applyMapping updatedMap (to, dest) + --Redirect edge from B to C + shortcutEdge :: (BlockId, BlockId) -> LabelMap EdgeInfo -> LabelMap EdgeInfo + shortcutEdge (from, to) m = + case mapLookup from m of + Just info -> mapInsert to info $ mapDelete from m + Nothing -> m + +-- | Sometimes we insert a block which should unconditionally be executed +-- after a given block. This function updates the CFG for these cases. +-- So we get A -> B => A -> A' -> B +-- \ \ +-- -> C => -> C +-- +addImmediateSuccessor :: BlockId -> BlockId -> CFG -> CFG +addImmediateSuccessor node follower cfg + = updateEdges . addWeightEdge node follower uncondWeight $ cfg + where + uncondWeight = fromIntegral . D.uncondWeight . + D.cfgWeightInfo $ D.unsafeGlobalDynFlags + targets = getSuccessorEdges cfg node + successors = map fst targets :: [BlockId] + updateEdges = addNewSuccs . remOldSuccs + remOldSuccs m = foldl' (flip (delEdge node)) m successors + addNewSuccs m = + foldl' (\m' (t,info) -> addEdge follower t info m') m targets + +-- | Adds a new edge, overwrites existing edges if present +addEdge :: BlockId -> BlockId -> EdgeInfo -> CFG -> CFG +addEdge from to info cfg = + mapAlter addFromToEdge from $ + mapAlter addDestNode to cfg + where + -- Simply insert the edge into the edge list. + addFromToEdge Nothing = Just $ mapSingleton to info + addFromToEdge (Just wm) = Just $ mapInsert to info wm + -- We must add the destination node explicitly + addDestNode Nothing = Just $ mapEmpty + addDestNode n@(Just _) = n + + +-- | Adds a edge with the given weight to the cfg +-- If there already existed an edge it is overwritten. +-- `addWeightEdge from to weight cfg` +addWeightEdge :: BlockId -> BlockId -> EdgeWeight -> CFG -> CFG +addWeightEdge from to weight cfg = + addEdge from to (mkWeightInfo weight) cfg + +delEdge :: BlockId -> BlockId -> CFG -> CFG +delEdge from to m = + mapAlter remDest from m + where + remDest Nothing = Nothing + remDest (Just wm) = Just $ mapDelete to wm + +delNode :: BlockId -> CFG -> CFG +delNode node cfg = + fmap (mapDelete node) -- < Edges to the node + (mapDelete node cfg) -- < Edges from the node + +-- | Destinations from bid ordered by weight (descending) +getSuccEdgesSorted :: CFG -> BlockId -> [(BlockId,EdgeInfo)] +getSuccEdgesSorted m bid = + let destMap = mapFindWithDefault mapEmpty bid m + cfgEdges = mapToList destMap + sortedEdges = sortWith (negate . edgeWeight . snd) cfgEdges + in --pprTrace "getSuccEdgesSorted" (ppr bid <+> text "map:" <+> ppr m) + sortedEdges + +-- | Get successors of a given node with edge weights. +getSuccessorEdges :: HasDebugCallStack => CFG -> BlockId -> [(BlockId,EdgeInfo)] +getSuccessorEdges m bid = maybe lookupError mapToList (mapLookup bid m) + where + lookupError = pprPanic "getSuccessorEdges: Block does not exist" $ + ppr bid <+> pprEdgeWeights m + +getEdgeInfo :: BlockId -> BlockId -> CFG -> Maybe EdgeInfo +getEdgeInfo from to m + | Just wm <- mapLookup from m + , Just info <- mapLookup to wm + = Just $! info + | otherwise + = Nothing + +getEdgeWeight :: CFG -> BlockId -> BlockId -> EdgeWeight +getEdgeWeight cfg from to = + edgeWeight $ expectJust "Edgeweight for noexisting block" $ + getEdgeInfo from to cfg + +getTransitionSource :: BlockId -> BlockId -> CFG -> TransitionSource +getTransitionSource from to cfg = transitionSource $ expectJust "Source info for noexisting block" $ + getEdgeInfo from to cfg + +reverseEdges :: CFG -> CFG +reverseEdges cfg = mapFoldlWithKey (\cfg from toMap -> go (addNode cfg from) from toMap) mapEmpty cfg + where + -- We must preserve nodes without outgoing edges! + addNode :: CFG -> BlockId -> CFG + addNode cfg b = mapInsertWith mapUnion b mapEmpty cfg + go :: CFG -> BlockId -> (LabelMap EdgeInfo) -> CFG + go cfg from toMap = mapFoldlWithKey (\cfg to info -> addEdge to from info cfg) cfg toMap :: CFG + + +-- | Returns a unordered list of all edges with info +infoEdgeList :: CFG -> [CfgEdge] +infoEdgeList m = + go (mapToList m) [] + where + -- We avoid foldMap to avoid thunk buildup + go :: [(BlockId,LabelMap EdgeInfo)] -> [CfgEdge] -> [CfgEdge] + go [] acc = acc + go ((from,toMap):xs) acc + = go' xs from (mapToList toMap) acc + go' :: [(BlockId,LabelMap EdgeInfo)] -> BlockId -> [(BlockId,EdgeInfo)] -> [CfgEdge] -> [CfgEdge] + go' froms _ [] acc = go froms acc + go' froms from ((to,info):tos) acc + = go' froms from tos (CfgEdge from to info : acc) + +-- | Returns a unordered list of all edges without weights +edgeList :: CFG -> [Edge] +edgeList m = + go (mapToList m) [] + where + -- We avoid foldMap to avoid thunk buildup + go :: [(BlockId,LabelMap EdgeInfo)] -> [Edge] -> [Edge] + go [] acc = acc + go ((from,toMap):xs) acc + = go' xs from (mapKeys toMap) acc + go' :: [(BlockId,LabelMap EdgeInfo)] -> BlockId -> [BlockId] -> [Edge] -> [Edge] + go' froms _ [] acc = go froms acc + go' froms from (to:tos) acc + = go' froms from tos ((from,to) : acc) + +-- | Get successors of a given node without edge weights. +getSuccessors :: HasDebugCallStack => CFG -> BlockId -> [BlockId] +getSuccessors m bid + | Just wm <- mapLookup bid m + = mapKeys wm + | otherwise = lookupError + where + lookupError = pprPanic "getSuccessors: Block does not exist" $ + ppr bid <+> pprEdgeWeights m + +pprEdgeWeights :: CFG -> SDoc +pprEdgeWeights m = + let edges = sort $ infoEdgeList m :: [CfgEdge] + printEdge (CfgEdge from to (EdgeInfo { edgeWeight = weight })) + = text "\t" <> ppr from <+> text "->" <+> ppr to <> + text "[label=\"" <> ppr weight <> text "\",weight=\"" <> + ppr weight <> text "\"];\n" + --for the case that there are no edges from/to this node. + --This should rarely happen but it can save a lot of time + --to immediately see it when it does. + printNode node + = text "\t" <> ppr node <> text ";\n" + getEdgeNodes (CfgEdge from to _) = [from,to] + edgeNodes = setFromList $ concatMap getEdgeNodes edges :: LabelSet + nodes = filter (\n -> (not . setMember n) edgeNodes) . mapKeys $ mapFilter null m + in + text "digraph {\n" <> + (foldl' (<>) empty (map printEdge edges)) <> + (foldl' (<>) empty (map printNode nodes)) <> + text "}\n" + +{-# INLINE updateEdgeWeight #-} --Allows eliminating the tuple when possible +-- | Invariant: The edge **must** exist already in the graph. +updateEdgeWeight :: (EdgeWeight -> EdgeWeight) -> Edge -> CFG -> CFG +updateEdgeWeight f (from, to) cfg + | Just oldInfo <- getEdgeInfo from to cfg + = let !oldWeight = edgeWeight oldInfo + !newWeight = f oldWeight + in addEdge from to (oldInfo {edgeWeight = newWeight}) cfg + | otherwise + = panic "Trying to update invalid edge" + +-- from to oldWeight => newWeight +mapWeights :: (BlockId -> BlockId -> EdgeWeight -> EdgeWeight) -> CFG -> CFG +mapWeights f cfg = + foldl' (\cfg (CfgEdge from to info) -> + let oldWeight = edgeWeight info + newWeight = f from to oldWeight + in addEdge from to (info {edgeWeight = newWeight}) cfg) + cfg (infoEdgeList cfg) + + +-- | Insert a block in the control flow between two other blocks. +-- We pass a list of tuples (A,B,C) where +-- * A -> C: Old edge +-- * A -> B -> C : New Arc, where B is the new block. +-- It's possible that a block has two jumps to the same block +-- in the assembly code. However we still only store a single edge for +-- these cases. +-- We assign the old edge info to the edge A -> B and assign B -> C the +-- weight of an unconditional jump. +addNodesBetween :: CFG -> [(BlockId,BlockId,BlockId)] -> CFG +addNodesBetween m updates = + foldl' updateWeight m . + weightUpdates $ updates + where + weight = fromIntegral . D.uncondWeight . + D.cfgWeightInfo $ D.unsafeGlobalDynFlags + -- We might add two blocks for different jumps along a single + -- edge. So we end up with edges: A -> B -> C , A -> D -> C + -- in this case after applying the first update the weight for A -> C + -- is no longer available. So we calculate future weights before updates. + weightUpdates = map getWeight + getWeight :: (BlockId,BlockId,BlockId) -> (BlockId,BlockId,BlockId,EdgeInfo) + getWeight (from,between,old) + | Just edgeInfo <- getEdgeInfo from old m + = (from,between,old,edgeInfo) + | otherwise + = pprPanic "Can't find weight for edge that should have one" ( + text "triple" <+> ppr (from,between,old) $$ + text "updates" <+> ppr updates $$ + text "cfg:" <+> pprEdgeWeights m ) + updateWeight :: CFG -> (BlockId,BlockId,BlockId,EdgeInfo) -> CFG + updateWeight m (from,between,old,edgeInfo) + = addEdge from between edgeInfo . + addWeightEdge between old weight . + delEdge from old $ m + +{- + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~~~ Note [CFG Edge Weights] ~~~ + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Edge weights assigned do not currently represent a specific + cost model and rather just a ranking of which blocks should + be placed next to each other given their connection type in + the CFG. + This is especially relevant if we whenever two blocks will + jump to the same target. + + A B + \ / + C + + Should A or B be placed in front of C? The block layout algorithm + decides this based on which edge (A,C)/(B,C) is heavier. So we + make a educated guess on which branch should be preferred. + + We rank edges in this order: + * Unconditional Control Transfer - They will always + transfer control to their target. Unless there is a info table + we can turn the jump into a fallthrough as well. + We use 20k as default, so it's easy to spot if values have been + modified but unlikely that we run into issues with overflow. + * If branches (likely) - We assume branches marked as likely + are taken more than 80% of the time. + By ranking them below unconditional jumps we make sure we + prefer the unconditional if there is a conditional and + unconditional edge towards a block. + * If branches (regular) - The false branch can potentially be turned + into a fallthrough so we prefer it slightly over the true branch. + * Unlikely branches - These can be assumed to be taken less than 20% + of the time. So we given them one of the lowest priorities. + * Switches - Switches at this level are implemented as jump tables + so have a larger number of successors. So without more information + we can only say that each individual successor is unlikely to be + jumped to and we rank them accordingly. + * Calls - We currently ignore calls completely: + * By the time we return from a call there is a good chance + that the address we return to has already been evicted from + cache eliminating a main advantage sequential placement brings. + * Calls always require a info table in front of their return + address. This reduces the chance that we return to the same + cache line further. + +-} +-- | Generate weights for a Cmm proc based on some simple heuristics. +getCfgProc :: D.CfgWeights -> RawCmmDecl -> CFG +getCfgProc _ (CmmData {}) = mapEmpty +getCfgProc weights (CmmProc _info _lab _live graph) = getCfg weights graph + +getCfg :: D.CfgWeights -> CmmGraph -> CFG +getCfg weights graph = + foldl' insertEdge edgelessCfg $ concatMap getBlockEdges blocks + where + D.CFGWeights + { D.uncondWeight = uncondWeight + , D.condBranchWeight = condBranchWeight + , D.switchWeight = switchWeight + , D.callWeight = callWeight + , D.likelyCondWeight = likelyCondWeight + , D.unlikelyCondWeight = unlikelyCondWeight + -- Last two are used in other places + --, D.infoTablePenalty = infoTablePenalty + --, D.backEdgeBonus = backEdgeBonus + } = weights + -- Explicitly add all nodes to the cfg to ensure they are part of the + -- CFG. + edgelessCfg = mapFromList $ zip (map G.entryLabel blocks) (repeat mapEmpty) + insertEdge :: CFG -> ((BlockId,BlockId),EdgeInfo) -> CFG + insertEdge m ((from,to),weight) = + mapAlter f from m + where + f :: Maybe (LabelMap EdgeInfo) -> Maybe (LabelMap EdgeInfo) + f Nothing = Just $ mapSingleton to weight + f (Just destMap) = Just $ mapInsert to weight destMap + getBlockEdges :: CmmBlock -> [((BlockId,BlockId),EdgeInfo)] + getBlockEdges block = + case branch of + CmmBranch dest -> [mkEdge dest uncondWeight] + CmmCondBranch cond t f l + | l == Nothing -> + [mkEdge f condBranchWeight, mkEdge t condBranchWeight] + | l == Just True -> + [mkEdge f unlikelyCondWeight, mkEdge t likelyCondWeight] + | l == Just False -> + [mkEdge f likelyCondWeight, mkEdge t unlikelyCondWeight] + where + mkEdgeInfo = -- pprTrace "Info" (ppr branchInfo <+> ppr cond) + EdgeInfo (CmmSource branch branchInfo) . fromIntegral + mkEdge target weight = ((bid,target), mkEdgeInfo weight) + branchInfo = + foldRegsUsed + (panic "foldRegsDynFlags") + (\info r -> if r == SpLim || r == HpLim || r == BaseReg + then HeapStackCheck else info) + NoInfo cond + + (CmmSwitch _e ids) -> + let switchTargets = switchTargetsToList ids + --Compiler performance hack - for very wide switches don't + --consider targets for layout. + adjustedWeight = + if (length switchTargets > 10) then -1 else switchWeight + in map (\x -> mkEdge x adjustedWeight) switchTargets + (CmmCall { cml_cont = Just cont}) -> [mkEdge cont callWeight] + (CmmForeignCall {Cmm.succ = cont}) -> [mkEdge cont callWeight] + (CmmCall { cml_cont = Nothing }) -> [] + other -> + panic "Foo" $ + ASSERT2(False, ppr "Unknown successor cause:" <> + (ppr branch <+> text "=>" <> ppr (G.successors other))) + map (\x -> ((bid,x),mkEdgeInfo 0)) $ G.successors other + where + bid = G.entryLabel block + mkEdgeInfo = EdgeInfo (CmmSource branch NoInfo) . fromIntegral + mkEdge target weight = ((bid,target), mkEdgeInfo weight) + branch = lastNode block :: CmmNode O C + + blocks = revPostorder graph :: [CmmBlock] + +--Find back edges by BFS +findBackEdges :: HasDebugCallStack => BlockId -> CFG -> Edges +findBackEdges root cfg = + --pprTraceIt "Backedges:" $ + map fst . + filter (\x -> snd x == Backward) $ typedEdges + where + edges = edgeList cfg :: [(BlockId,BlockId)] + getSuccs = getSuccessors cfg :: BlockId -> [BlockId] + typedEdges = + classifyEdges root getSuccs edges :: [((BlockId,BlockId),EdgeType)] + + +optimizeCFG :: D.CfgWeights -> RawCmmDecl -> CFG -> CFG +optimizeCFG _ (CmmData {}) cfg = cfg +optimizeCFG weights (CmmProc info _lab _live graph) cfg = + {-# SCC optimizeCFG #-} + -- pprTrace "Initial:" (pprEdgeWeights cfg) $ + -- pprTrace "Initial:" (ppr $ mkGlobalWeights (g_entry graph) cfg) $ + + -- pprTrace "LoopInfo:" (ppr $ loopInfo cfg (g_entry graph)) $ + favourFewerPreds . + penalizeInfoTables info . + increaseBackEdgeWeight (g_entry graph) $ cfg + where + + -- | Increase the weight of all backedges in the CFG + -- this helps to make loop jumpbacks the heaviest edges + increaseBackEdgeWeight :: BlockId -> CFG -> CFG + increaseBackEdgeWeight root cfg = + let backedges = findBackEdges root cfg + update weight + --Keep irrelevant edges irrelevant + | weight <= 0 = 0 + | otherwise + = weight + fromIntegral (D.backEdgeBonus weights) + in foldl' (\cfg edge -> updateEdgeWeight update edge cfg) + cfg backedges + + -- | Since we cant fall through info tables we penalize these. + penalizeInfoTables :: LabelMap a -> CFG -> CFG + penalizeInfoTables info cfg = + mapWeights fupdate cfg + where + fupdate :: BlockId -> BlockId -> EdgeWeight -> EdgeWeight + fupdate _ to weight + | mapMember to info + = weight - (fromIntegral $ D.infoTablePenalty weights) + | otherwise = weight + + -- | If a block has two successors, favour the one with fewer + -- predecessors and/or the one allowing fall through. + favourFewerPreds :: CFG -> CFG + favourFewerPreds cfg = + let + revCfg = + reverseEdges $ filterEdges + (\_from -> fallthroughTarget) cfg + + predCount n = length $ getSuccessorEdges revCfg n + nodes = getCfgNodes cfg + + modifiers :: Int -> Int -> (EdgeWeight, EdgeWeight) + modifiers preds1 preds2 + | preds1 < preds2 = ( 1,-1) + | preds1 == preds2 = ( 0, 0) + | otherwise = (-1, 1) + + update :: CFG -> BlockId -> CFG + update cfg node + | [(s1,e1),(s2,e2)] <- getSuccessorEdges cfg node + , !w1 <- edgeWeight e1 + , !w2 <- edgeWeight e2 + --Only change the weights if there isn't already a ordering. + , w1 == w2 + , (mod1,mod2) <- modifiers (predCount s1) (predCount s2) + = (\cfg' -> + (adjustEdgeWeight cfg' (+mod2) node s2)) + (adjustEdgeWeight cfg (+mod1) node s1) + | otherwise + = cfg + in foldl' update cfg nodes + where + fallthroughTarget :: BlockId -> EdgeInfo -> Bool + fallthroughTarget to (EdgeInfo source _weight) + | mapMember to info = False + | AsmCodeGen <- source = True + | CmmSource { trans_cmmNode = CmmBranch {} } <- source = True + | CmmSource { trans_cmmNode = CmmCondBranch {} } <- source = True + | otherwise = False + +-- | Determine loop membership of blocks based on SCC analysis +-- This is faster but only gives yes/no answers. +loopMembers :: HasDebugCallStack => CFG -> LabelMap Bool +loopMembers cfg = + foldl' (flip setLevel) mapEmpty sccs + where + mkNode :: BlockId -> Node BlockId BlockId + mkNode bid = DigraphNode bid bid (getSuccessors cfg bid) + nodes = map mkNode (getCfgNodes cfg) + + sccs = stronglyConnCompFromEdgedVerticesOrd nodes + + setLevel :: SCC BlockId -> LabelMap Bool -> LabelMap Bool + setLevel (AcyclicSCC bid) m = mapInsert bid False m + setLevel (CyclicSCC bids) m = foldl' (\m k -> mapInsert k True m) m bids + +loopLevels :: CFG -> BlockId -> LabelMap Int +loopLevels cfg root = liLevels loopInfos + where + loopInfos = loopInfo cfg root + +data LoopInfo = LoopInfo + { liBackEdges :: [(Edge)] -- ^ List of back edges + , liLevels :: LabelMap Int -- ^ BlockId -> LoopLevel mapping + , liLoops :: [(Edge, LabelSet)] -- ^ (backEdge, loopBody), body includes header + } + +instance Outputable LoopInfo where + ppr (LoopInfo _ _lvls loops) = + text "Loops:(backEdge, bodyNodes)" $$ + (vcat $ map ppr loops) + +{- Note [Determining the loop body] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Starting with the knowledge that: + * head dominates the loop + * `tail` -> `head` is a backedge + + We can determine all nodes by: + * Deleting the loop head from the graph. + * Collect all blocks which are reachable from the `tail`. + + We do so by performing bfs from the tail node towards the head. + -} + +-- | Determine loop membership of blocks based on Dominator analysis. +-- This is slower but gives loop levels instead of just loop membership. +-- However it only detects natural loops. Irreducible control flow is not +-- recognized even if it loops. But that is rare enough that we don't have +-- to care about that special case. +loopInfo :: HasDebugCallStack => CFG -> BlockId -> LoopInfo +loopInfo cfg root = LoopInfo { liBackEdges = backEdges + , liLevels = mapFromList loopCounts + , liLoops = loopBodies } + where + revCfg = reverseEdges cfg + + graph = -- pprTrace "CFG - loopInfo" (pprEdgeWeights cfg) $ + fmap (setFromList . mapKeys ) cfg :: LabelMap LabelSet + + + --TODO - This should be a no op: Export constructors? Use unsafeCoerce? ... + rooted = ( fromBlockId root + , toIntMap $ fmap toIntSet graph) :: (Int, IntMap IntSet) + tree = fmap toBlockId $ Dom.domTree rooted :: Tree BlockId + + -- Map from Nodes to their dominators + domMap :: LabelMap LabelSet + domMap = mkDomMap tree + + edges = edgeList cfg :: [(BlockId, BlockId)] + -- We can't recompute nodes from edges, there might be blocks not connected via edges. + nodes = getCfgNodes cfg :: [BlockId] + + -- identify back edges + isBackEdge (from,to) + | Just doms <- mapLookup from domMap + , setMember to doms + = True + | otherwise = False + + -- See Note [Determining the loop body] + -- Get the loop body associated with a back edge. + findBody edge@(tail, head) + = ( edge, setInsert head $ go (setSingleton tail) (setSingleton tail) ) + where + -- See Note [Determining the loop body] + cfg' = delNode head revCfg + + go :: LabelSet -> LabelSet -> LabelSet + go found current + | setNull current = found + | otherwise = go (setUnion newSuccessors found) + newSuccessors + where + -- Really predecessors, since we use the reversed cfg. + newSuccessors = setFilter (\n -> not $ setMember n found) successors :: LabelSet + successors = setFromList $ concatMap + (getSuccessors cfg') + -- we filter head as it's no longer part of the cfg. + (filter (/= head) $ setElems current) :: LabelSet + + backEdges = filter isBackEdge edges + loopBodies = map findBody backEdges :: [(Edge, LabelSet)] + + -- Block b is part of n loop bodies => loop nest level of n + loopCounts = + let bodies = map (first snd) loopBodies -- [(Header, Body)] + loopCount n = length $ nub . map fst . filter (setMember n . snd) $ bodies + in map (\n -> (n, loopCount n)) $ nodes :: [(BlockId, Int)] + + toIntSet :: LabelSet -> IntSet + toIntSet s = IS.fromList . map fromBlockId . setElems $ s + toIntMap :: LabelMap a -> IntMap a + toIntMap m = IM.fromList $ map (\(x,y) -> (fromBlockId x,y)) $ mapToList m + + mkDomMap :: Tree BlockId -> LabelMap LabelSet + mkDomMap root = mapFromList $ go setEmpty root + where + go :: LabelSet -> Tree BlockId -> [(Label,LabelSet)] + go parents (Node lbl []) + = [(lbl, parents)] + go parents (Node _ leaves) + = let nodes = map rootLabel leaves + entries = map (\x -> (x,parents)) nodes + in entries ++ concatMap + (\n -> go (setInsert (rootLabel n) parents) n) + leaves + + fromBlockId :: BlockId -> Int + fromBlockId = getKey . getUnique + + toBlockId :: Int -> BlockId + toBlockId = mkBlockId . mkUniqueGrimily + +-- We make the CFG a Hoopl Graph, so we can reuse revPostOrder. +newtype BlockNode (e :: Extensibility) (x :: Extensibility) = BN (BlockId,[BlockId]) + +instance G.NonLocal (BlockNode) where + entryLabel (BN (lbl,_)) = lbl + successors (BN (_,succs)) = succs + +revPostorderFrom :: HasDebugCallStack => CFG -> BlockId -> [BlockId] +revPostorderFrom cfg root = + map fromNode $ G.revPostorderFrom hooplGraph root + where + nodes = getCfgNodes cfg + hooplGraph = foldl' (\m n -> mapInsert n (toNode n) m) mapEmpty nodes + + fromNode :: BlockNode C C -> BlockId + fromNode (BN x) = fst x + + toNode :: BlockId -> BlockNode C C + toNode bid = + BN (bid,getSuccessors cfg $ bid) + + +-- | We take in a CFG which has on its edges weights which are +-- relative only to other edges originating from the same node. +-- +-- We return a CFG for which each edge represents a GLOBAL weight. +-- This means edge weights are comparable across the whole graph. +-- +-- For irreducible control flow results might be imprecise, otherwise they +-- are reliable. +-- +-- The algorithm is based on the Paper +-- "Static Branch Prediction and Program Profile Analysis" by Y Wu, JR Larus +-- The only big change is that we go over the nodes in the body of loops in +-- reverse post order. Which is required for diamond control flow to work probably. +-- +-- We also apply a few prediction heuristics (based on the same paper) + +{-# NOINLINE mkGlobalWeights #-} +{-# SCC mkGlobalWeights #-} +mkGlobalWeights :: HasDebugCallStack => BlockId -> CFG -> (LabelMap Double, LabelMap (LabelMap Double)) +mkGlobalWeights root localCfg + | null localCfg = panic "Error - Empty CFG" + | otherwise + = (blockFreqs', edgeFreqs') + where + -- Calculate fixpoints + (blockFreqs, edgeFreqs) = calcFreqs nodeProbs backEdges' bodies' revOrder' + blockFreqs' = mapFromList $ map (first fromVertex) (assocs blockFreqs) :: LabelMap Double + edgeFreqs' = fmap fromVertexMap $ fromVertexMap edgeFreqs + + fromVertexMap :: IM.IntMap x -> LabelMap x + fromVertexMap m = mapFromList . map (first fromVertex) $ IM.toList m + + revOrder = revPostorderFrom localCfg root :: [BlockId] + loopResults@(LoopInfo backedges _levels bodies) = loopInfo localCfg root + + revOrder' = map toVertex revOrder + backEdges' = map (bimap toVertex toVertex) backedges + bodies' = map calcBody bodies + + estimatedCfg = staticBranchPrediction root loopResults localCfg + -- Normalize the weights to probabilities and apply heuristics + nodeProbs = cfgEdgeProbabilities estimatedCfg toVertex + + -- By mapping vertices to numbers in reverse post order we can bring any subset into reverse post + -- order simply by sorting. + -- TODO: The sort is redundant if we can guarantee that setElems returns elements ascending + calcBody (backedge, blocks) = + (toVertex $ snd backedge, sort . map toVertex $ (setElems blocks)) + + vertexMapping = mapFromList $ zip revOrder [0..] :: LabelMap Int + blockMapping = listArray (0,mapSize vertexMapping - 1) revOrder :: Array Int BlockId + -- Map from blockId to indices starting at zero + toVertex :: BlockId -> Int + toVertex blockId = expectJust "mkGlobalWeights" $ mapLookup blockId vertexMapping + -- Map from indices starting at zero to blockIds + fromVertex :: Int -> BlockId + fromVertex vertex = blockMapping ! vertex + +{- Note [Static Branch Prediction] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The work here has been based on the paper +"Static Branch Prediction and Program Profile Analysis" by Y Wu, JR Larus. + +The primary differences are that if we branch on the result of a heap +check we do not apply any of the heuristics. +The reason is simple: They look like loops in the control flow graph +but are usually never entered, and if at most once. + +Currently implemented is a heuristic to predict that we do not exit +loops (lehPredicts) and one to predict that backedges are more likely +than any other edge. + +The back edge case is special as it superceeds any other heuristic if it +applies. + +Do NOT rely solely on nofib results for benchmarking this. I recommend at least +comparing megaparsec and container benchmarks. Nofib does not seeem to have +many instances of "loopy" Cmm where these make a difference. + +TODO: +* The paper containers more benchmarks which should be implemented. +* If we turn the likelihood on if/else branches into a probability + instead of true/false we could implement this as a Cmm pass. + + The complete Cmm code still exists and can be accessed by the heuristics + + There is no chance of register allocation/codegen inserting branches/blocks + + making the TransitionSource info wrong. + + potential to use this information in CmmPasses. + - Requires refactoring of all the code relying on the binary nature of likelihood. + - Requires refactoring `loopInfo` to work on both, Cmm Graphs and the backend CFG. +-} + +-- | Combination of target node id and information about the branch +-- we are looking at. +type TargetNodeInfo = (BlockId, EdgeInfo) + + +-- | Update branch weights based on certain heuristics. +-- See Note [Static Branch Prediction] +-- TODO: This should be combined with optimizeCFG +{-# SCC staticBranchPrediction #-} +staticBranchPrediction :: BlockId -> LoopInfo -> CFG -> CFG +staticBranchPrediction _root (LoopInfo l_backEdges loopLevels l_loops) cfg = + -- pprTrace "staticEstimatesOn" (ppr (cfg)) $ + foldl' update cfg nodes + where + nodes = getCfgNodes cfg + backedges = S.fromList $ l_backEdges + -- Loops keyed by their back edge + loops = M.fromList $ l_loops :: M.Map Edge LabelSet + loopHeads = S.fromList $ map snd $ M.keys loops + + update :: CFG -> BlockId -> CFG + update cfg node + -- No successors, nothing to do. + | null successors = cfg + + -- Mix of backedges and others: + -- Always predict the backedges. + | not (null m) && length m < length successors + -- Heap/Stack checks "loop", but only once. + -- So we simply exclude any case involving them. + , not $ any (isHeapOrStackCheck . transitionSource . snd) successors + = let loopChance = repeat $! pred_LBH / (fromIntegral $ length m) + exitChance = repeat $! (1 - pred_LBH) / fromIntegral (length not_m) + updates = zip (map fst m) loopChance ++ zip (map fst not_m) exitChance + in -- pprTrace "mix" (ppr (node,successors)) $ + foldl' (\cfg (to,weight) -> setEdgeWeight cfg weight node to) cfg updates + + -- For (regular) non-binary branches we keep the weights from the STG -> Cmm translation. + | length successors /= 2 + = cfg + + -- Only backedges - no need to adjust + | length m > 0 + = cfg + + -- A regular binary branch, we can plug addition predictors in here. + | [(s1,s1_info),(s2,s2_info)] <- successors + , not $ any (isHeapOrStackCheck . transitionSource . snd) successors + = -- Normalize weights to total of 1 + let !w1 = max (edgeWeight s1_info) (0) + !w2 = max (edgeWeight s2_info) (0) + -- Of both weights are <= 0 we set both to 0.5 + normalizeWeight w = if w1 + w2 == 0 then 0.5 else w/(w1+w2) + !cfg' = setEdgeWeight cfg (normalizeWeight w1) node s1 + !cfg'' = setEdgeWeight cfg' (normalizeWeight w2) node s2 + + -- Figure out which heuristics apply to these successors + heuristics = map ($ ((s1,s1_info),(s2,s2_info))) + [lehPredicts, phPredicts, ohPredicts, ghPredicts, lhhPredicts, chPredicts + , shPredicts, rhPredicts] + -- Apply result of a heuristic. Argument is the likelihood + -- predicted for s1. + applyHeuristic :: CFG -> Maybe Prob -> CFG + applyHeuristic cfg Nothing = cfg + applyHeuristic cfg (Just (s1_pred :: Double)) + | s1_old == 0 || s2_old == 0 || + isHeapOrStackCheck (transitionSource s1_info) || + isHeapOrStackCheck (transitionSource s2_info) + = cfg + | otherwise = + let -- Predictions from heuristic + s1_prob = EdgeWeight s1_pred :: EdgeWeight + s2_prob = 1.0 - s1_prob + -- Update + d = (s1_old * s1_prob) + (s2_old * s2_prob) :: EdgeWeight + s1_prob' = s1_old * s1_prob / d + !s2_prob' = s2_old * s2_prob / d + !cfg_s1 = setEdgeWeight cfg s1_prob' node s1 + in -- pprTrace "Applying heuristic!" (ppr (node,s1,s2) $$ ppr (s1_prob', s2_prob')) $ + setEdgeWeight cfg_s1 s2_prob' node s2 + where + -- Old weights + s1_old = getEdgeWeight cfg node s1 + s2_old = getEdgeWeight cfg node s2 + + in + -- pprTraceIt "RegularCfgResult" $ + foldl' applyHeuristic cfg'' heuristics + + -- Branch on heap/stack check + | otherwise = cfg + + where + -- Chance that loops are taken. + pred_LBH = 0.875 + -- successors + successors = getSuccessorEdges cfg node + -- backedges + (m,not_m) = partition (\succ -> S.member (node, fst succ) backedges) successors + + -- Heuristics return nothing if they don't say anything about this branch + -- or Just (prob_s1) where prob_s1 is the likelihood for s1 to be the + -- taken branch. s1 is the branch in the true case. + + -- Loop exit heuristic. + -- We are unlikely to leave a loop unless it's to enter another one. + pred_LEH = 0.75 + -- If and only if no successor is a loopheader, + -- then we will likely not exit the current loop body. + lehPredicts :: (TargetNodeInfo,TargetNodeInfo) -> Maybe Prob + lehPredicts ((s1,_s1_info),(s2,_s2_info)) + | S.member s1 loopHeads || S.member s2 loopHeads + = Nothing + + | otherwise + = --pprTrace "lehPredict:" (ppr $ compare s1Level s2Level) $ + case compare s1Level s2Level of + EQ -> Nothing + LT -> Just (1-pred_LEH) --s1 exits to a shallower loop level (exits loop) + GT -> Just (pred_LEH) --s1 exits to a deeper loop level + where + s1Level = mapLookup s1 loopLevels + s2Level = mapLookup s2 loopLevels + + -- Comparing to a constant is unlikely to be equal. + ohPredicts (s1,_s2) + | CmmSource { trans_cmmNode = src1 } <- getTransitionSource node (fst s1) cfg + , CmmCondBranch cond ltrue _lfalse likely <- src1 + , likely == Nothing + , CmmMachOp mop args <- cond + , MO_Eq {} <- mop + , not (null [x | x@CmmLit{} <- args]) + = if fst s1 == ltrue then Just 0.3 else Just 0.7 + + | otherwise + = Nothing + + -- TODO: These are all the other heuristics from the paper. + -- Not all will apply, for now we just stub them out as Nothing. + phPredicts = const Nothing + ghPredicts = const Nothing + lhhPredicts = const Nothing + chPredicts = const Nothing + shPredicts = const Nothing + rhPredicts = const Nothing + +-- We normalize all edge weights as probabilities between 0 and 1. +-- Ignoring rounding errors all outgoing edges sum up to 1. +cfgEdgeProbabilities :: CFG -> (BlockId -> Int) -> IM.IntMap (IM.IntMap Prob) +cfgEdgeProbabilities cfg toVertex + = mapFoldlWithKey foldEdges IM.empty cfg + where + foldEdges = (\m from toMap -> IM.insert (toVertex from) (normalize toMap) m) + + normalize :: (LabelMap EdgeInfo) -> (IM.IntMap Prob) + normalize weightMap + | edgeCount <= 1 = mapFoldlWithKey (\m k _ -> IM.insert (toVertex k) 1.0 m) IM.empty weightMap + | otherwise = mapFoldlWithKey (\m k _ -> IM.insert (toVertex k) (normalWeight k) m) IM.empty weightMap + where + edgeCount = mapSize weightMap + -- Negative weights are generally allowed but are mapped to zero. + -- We then check if there is at least one non-zero edge and if not + -- assign uniform weights to all branches. + minWeight = 0 :: Prob + weightMap' = fmap (\w -> max (weightToDouble . edgeWeight $ w) minWeight) weightMap + totalWeight = sum weightMap' + + normalWeight :: BlockId -> Prob + normalWeight bid + | totalWeight == 0 + = 1.0 / fromIntegral edgeCount + | Just w <- mapLookup bid weightMap' + = w/totalWeight + | otherwise = panic "impossible" + +-- This is the fixpoint algorithm from +-- "Static Branch Prediction and Program Profile Analysis" by Y Wu, JR Larus +-- The adaption to Haskell is my own. +calcFreqs :: IM.IntMap (IM.IntMap Prob) -> [(Int,Int)] -> [(Int, [Int])] -> [Int] + -> (Array Int Double, IM.IntMap (IM.IntMap Prob)) +calcFreqs graph backEdges loops revPostOrder = runST $ do + visitedNodes <- newArray (0,nodeCount-1) False :: ST s (STUArray s Int Bool) + blockFreqs <- newArray (0,nodeCount-1) 0.0 :: ST s (STUArray s Int Double) + edgeProbs <- newSTRef graph + edgeBackProbs <- newSTRef graph + + -- let traceArray a = do + -- vs <- forM [0..nodeCount-1] $ \i -> readArray a i >>= (\v -> return (i,v)) + -- trace ("array: " ++ show vs) $ return () + + let -- See #1600, we need to inline or unboxing makes perf worse. + -- {-# INLINE getFreq #-} + {-# INLINE visited #-} + visited b = unsafeRead visitedNodes b + getFreq b = unsafeRead blockFreqs b + -- setFreq :: forall s. Int -> Double -> ST s () + setFreq b f = unsafeWrite blockFreqs b f + -- setVisited :: forall s. Node -> ST s () + setVisited b = unsafeWrite visitedNodes b True + -- Frequency/probability that edge is taken. + getProb' arr b1 b2 = readSTRef arr >>= + (\graph -> + return . + fromMaybe (error "getFreq 1") . + IM.lookup b2 . + fromMaybe (error "getFreq 2") $ + (IM.lookup b1 graph) + ) + setProb' arr b1 b2 prob = do + g <- readSTRef arr + let !m = fromMaybe (error "Foo") $ IM.lookup b1 g + !m' = IM.insert b2 prob m + writeSTRef arr $! (IM.insert b1 m' g) + + getEdgeFreq b1 b2 = getProb' edgeProbs b1 b2 + setEdgeFreq b1 b2 = setProb' edgeProbs b1 b2 + getProb b1 b2 = fromMaybe (error "getProb") $ do + m' <- IM.lookup b1 graph + IM.lookup b2 m' + + getBackProb b1 b2 = getProb' edgeBackProbs b1 b2 + setBackProb b1 b2 = setProb' edgeBackProbs b1 b2 + + + let -- calcOutFreqs :: Node -> ST s () + calcOutFreqs bhead block = do + !f <- getFreq block + forM (successors block) $ \bi -> do + let !prob = getProb block bi + let !succFreq = f * prob + setEdgeFreq block bi succFreq + -- traceM $ "SetOut: " ++ show (block, bi, f, prob, succFreq) + when (bi == bhead) $ setBackProb block bi succFreq + + + let propFreq block head = do + -- traceM ("prop:" ++ show (block,head)) + -- traceShowM block + + !v <- visited block + if v then + return () --Dont look at nodes twice + else if block == head then + setFreq block 1.0 -- Loop header frequency is always 1 + else do + let preds = IS.elems $ predecessors block + irreducible <- (fmap or) $ forM preds $ \bp -> do + !bp_visited <- visited bp + let bp_backedge = isBackEdge bp block + return (not bp_visited && not bp_backedge) + + if irreducible + then return () -- Rare we don't care + else do + setFreq block 0 + !cycleProb <- sum <$> (forM preds $ \pred -> do + if isBackEdge pred block + then + getBackProb pred block + else do + !f <- getFreq block + !prob <- getEdgeFreq pred block + setFreq block $! f + prob + return 0) + -- traceM $ "cycleProb:" ++ show cycleProb + let limit = 1 - 1/512 -- Paper uses 1 - epsilon, but this works. + -- determines how large likelyhoods in loops can grow. + !cycleProb <- return $ min cycleProb limit -- <- return $ if cycleProb > limit then limit else cycleProb + -- traceM $ "cycleProb:" ++ show cycleProb + + !f <- getFreq block + setFreq block (f / (1.0 - cycleProb)) + + setVisited block + calcOutFreqs head block + + -- Loops, by nesting, inner to outer + forM_ loops $ \(head, body) -> do + forM_ [0 .. nodeCount - 1] (\i -> unsafeWrite visitedNodes i True) -- Mark all nodes as visited. + forM_ body (\i -> unsafeWrite visitedNodes i False) -- Mark all blocks reachable from head as not visited + forM_ body $ \block -> propFreq block head + + -- After dealing with all loops, deal with non-looping parts of the CFG + forM_ [0 .. nodeCount - 1] (\i -> unsafeWrite visitedNodes i False) -- Everything in revPostOrder is reachable + forM_ revPostOrder $ \block -> propFreq block (head revPostOrder) + + -- trace ("Final freqs:") $ return () + -- let freqString = pprFreqs freqs + -- trace (unlines freqString) $ return () + -- trace (pprFre) $ return () + graph' <- readSTRef edgeProbs + freqs' <- unsafeFreeze blockFreqs + + return (freqs', graph') + where + -- How can these lookups fail? Consider the CFG [A -> B] + predecessors :: Int -> IS.IntSet + predecessors b = fromMaybe IS.empty $ IM.lookup b revGraph + successors :: Int -> [Int] + successors b = fromMaybe (lookupError "succ" b graph)$ IM.keys <$> IM.lookup b graph + lookupError s b g = pprPanic ("Lookup error " ++ s) $ + ( text "node" <+> ppr b $$ + text "graph" <+> + vcat (map (\(k,m) -> ppr (k,m :: IM.IntMap Double)) $ IM.toList g) + ) + + nodeCount = IM.foldl' (\count toMap -> IM.foldlWithKey' countTargets count toMap) (IM.size graph) graph + where + countTargets = (\count k _ -> countNode k + count ) + countNode n = if IM.member n graph then 0 else 1 + + isBackEdge from to = S.member (from,to) backEdgeSet + backEdgeSet = S.fromList backEdges + + revGraph :: IntMap IntSet + revGraph = IM.foldlWithKey' (\m from toMap -> addEdges m from toMap) IM.empty graph + where + addEdges m0 from toMap = IM.foldlWithKey' (\m k _ -> addEdge m from k) m0 toMap + addEdge m0 from to = IM.insertWith IS.union to (IS.singleton from) m0 |