diff options
-rw-r--r-- | compiler/nativeGen/AsmCodeGen.hs | 25 | ||||
-rw-r--r-- | compiler/nativeGen/CFG.hs | 127 | ||||
-rw-r--r-- | compiler/nativeGen/NCGMonad.hs | 4 | ||||
-rw-r--r-- | compiler/nativeGen/RegAlloc/Liveness.hs | 4 | ||||
-rw-r--r-- | compiler/nativeGen/X86/CodeGen.hs | 187 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Instr.hs | 4 | ||||
-rw-r--r-- | compiler/utils/Dominators.hs | 23 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_compile/T17334.hs | 384 |
8 files changed, 544 insertions, 214 deletions
diff --git a/compiler/nativeGen/AsmCodeGen.hs b/compiler/nativeGen/AsmCodeGen.hs index 4c883e7185..a40bf02013 100644 --- a/compiler/nativeGen/AsmCodeGen.hs +++ b/compiler/nativeGen/AsmCodeGen.hs @@ -534,6 +534,10 @@ cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count = do let platform = targetPlatform dflags + let proc_name = case cmm of + (CmmProc _ entry_label _ _) -> ppr entry_label + _ -> text "DataChunk" + -- rewrite assignments to global regs let fixed_cmm = {-# SCC "fixStgRegisters" #-} @@ -562,12 +566,11 @@ cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count Opt_D_dump_asm_native "Native code" (vcat $ map (pprNatCmmDecl ncgImpl) native) - when (not $ null nativeCfgWeights) $ dumpIfSet_dyn dflags - Opt_D_dump_cfg_weights "CFG Weights" - (pprEdgeWeights nativeCfgWeights) + maybeDumpCfg dflags (Just nativeCfgWeights) "CFG Weights - Native" proc_name -- tag instructions with register liveness information - -- also drops dead code + -- also drops dead code. We don't keep the cfg in sync on + -- some backends, so don't use it there. let livenessCfg = if (backendMaintainsCfg dflags) then Just nativeCfgWeights else Nothing @@ -705,10 +708,7 @@ cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count optimizedCFG = optimizeCFG (cfgWeightInfo dflags) cmm <$!> postShortCFG - maybe (return ()) (\cfg-> - dumpIfSet_dyn dflags Opt_D_dump_cfg_weights "CFG Final Weights" - ( pprEdgeWeights cfg )) - optimizedCFG + maybeDumpCfg dflags optimizedCFG "CFG Weights - Final" proc_name --TODO: Partially check validity of the cfg. let getBlks (CmmProc _info _lbl _live (ListGraph blocks)) = blocks @@ -771,6 +771,15 @@ cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count , ppr_raStatsLinear , unwinds ) +maybeDumpCfg :: DynFlags -> Maybe CFG -> String -> SDoc -> IO () +maybeDumpCfg _dflags Nothing _ _ = return () +maybeDumpCfg dflags (Just cfg) msg proc_name + | null cfg = return () + | otherwise + = dumpIfSet_dyn + dflags Opt_D_dump_cfg_weights msg + (proc_name <> char ':' $$ pprEdgeWeights cfg) + -- | Make sure all blocks we want the layout algorithm to place have been placed. checkLayout :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr] diff --git a/compiler/nativeGen/CFG.hs b/compiler/nativeGen/CFG.hs index 8eb69a9dbf..fb17d269a8 100644 --- a/compiler/nativeGen/CFG.hs +++ b/compiler/nativeGen/CFG.hs @@ -82,7 +82,6 @@ import PprCmm () -- For Outputable instances import qualified DynFlags as D import Data.List - import Data.STRef.Strict import Control.Monad.ST @@ -109,6 +108,13 @@ instance Outputable EdgeWeight where type EdgeInfoMap edgeInfo = LabelMap (LabelMap edgeInfo) -- | A control flow graph where edges have been annotated with a weight. +-- Implemented as IntMap (IntMap <edgeData>) +-- We must uphold the invariant that for each edge A -> B we must have: +-- A entry B in the outer map. +-- A entry B in the map we get when looking up A. +-- Maintaining this invariant is useful as any failed lookup now indicates +-- an actual error in code which might go unnoticed for a while +-- otherwise. type CFG = EdgeInfoMap EdgeInfo data CfgEdge @@ -199,13 +205,20 @@ setEdgeWeight cfg !weight from to | otherwise = cfg - -getCfgNodes :: CFG -> LabelSet +getCfgNodes :: CFG -> [BlockId] getCfgNodes m = - mapFoldlWithKey (\s k toMap -> mapFoldlWithKey (\s k _ -> setInsert k s) (setInsert k s) toMap ) setEmpty m + mapKeys m +-- | Is this block part of this graph? hasNode :: CFG -> BlockId -> Bool -hasNode m node = mapMember node m || any (mapMember node) m +hasNode m node = + -- Check the invariant that each node must exist in the first map or not at all. + ASSERT( found || not (any (mapMember node) m)) + found + where + found = mapMember node m + + -- | Check if the nodes in the cfg and the set of blocks are the same. -- In a case of a missmatch we panic and show the difference. @@ -217,11 +230,11 @@ sanityCheckCfg m blockSet msg pprPanic "Block list and cfg nodes don't match" ( text "difference:" <+> ppr diff $$ text "blocks:" <+> ppr blockSet $$ - text "cfg:" <+> ppr m $$ + text "cfg:" <+> pprEdgeWeights m $$ msg ) False where - cfgNodes = getCfgNodes m :: LabelSet + cfgNodes = setFromList $ getCfgNodes m :: LabelSet diff = (setUnion cfgNodes blockSet) `setDifference` (setIntersection cfgNodes blockSet) :: LabelSet -- | Filter the CFG with a custom function f. @@ -332,10 +345,16 @@ addImmediateSuccessor node follower cfg -- | Adds a new edge, overwrites existing edges if present addEdge :: BlockId -> BlockId -> EdgeInfo -> CFG -> CFG addEdge from to info cfg = - mapAlter addDest from cfg + mapAlter addFromToEdge from $ + mapAlter addDestNode to cfg where - addDest Nothing = Just $ mapSingleton to info - addDest (Just wm) = Just $ mapInsert to info wm + -- Simply insert the edge into the edge list. + addFromToEdge Nothing = Just $ mapSingleton to info + addFromToEdge (Just wm) = Just $ mapInsert to info wm + -- We must add the destination node explicitly + addDestNode Nothing = Just $ mapEmpty + addDestNode n@(Just _) = n + -- | Adds a edge with the given weight to the cfg -- If there already existed an edge it is overwritten. @@ -366,8 +385,11 @@ getSuccEdgesSorted m bid = sortedEdges -- | Get successors of a given node with edge weights. -getSuccessorEdges :: CFG -> BlockId -> [(BlockId,EdgeInfo)] -getSuccessorEdges m bid = maybe [] mapToList $ mapLookup bid m +getSuccessorEdges :: HasDebugCallStack => CFG -> BlockId -> [(BlockId,EdgeInfo)] +getSuccessorEdges m bid = maybe lookupError mapToList (mapLookup bid m) + where + lookupError = pprPanic "getSuccessorEdges: Block does not exist" $ + ppr bid <+> pprEdgeWeights m getEdgeInfo :: BlockId -> BlockId -> CFG -> Maybe EdgeInfo getEdgeInfo from to m @@ -389,7 +411,7 @@ getTransitionSource from to cfg = transitionSource $ expectJust "Source info for reverseEdges :: CFG -> CFG reverseEdges cfg = mapFoldlWithKey (\cfg from toMap -> go (addNode cfg from) from toMap) mapEmpty cfg where - -- We preserve nodes without outgoing edges! + -- We must preserve nodes without outgoing edges! addNode :: CFG -> BlockId -> CFG addNode cfg b = mapInsertWith mapUnion b mapEmpty cfg go :: CFG -> BlockId -> (LabelMap EdgeInfo) -> CFG @@ -427,11 +449,14 @@ edgeList m = = go' froms from tos ((from,to) : acc) -- | Get successors of a given node without edge weights. -getSuccessors :: CFG -> BlockId -> [BlockId] +getSuccessors :: HasDebugCallStack => CFG -> BlockId -> [BlockId] getSuccessors m bid | Just wm <- mapLookup bid m = mapKeys wm - | otherwise = [] + | otherwise = lookupError + where + lookupError = pprPanic "getSuccessors: Block does not exist" $ + ppr bid <+> pprEdgeWeights m pprEdgeWeights :: CFG -> SDoc pprEdgeWeights m = @@ -455,6 +480,7 @@ pprEdgeWeights m = text "}\n" {-# INLINE updateEdgeWeight #-} --Allows eliminating the tuple when possible +-- | Invariant: The edge **must** exist already in the graph. updateEdgeWeight :: (EdgeWeight -> EdgeWeight) -> Edge -> CFG -> CFG updateEdgeWeight f (from, to) cfg | Just oldInfo <- getEdgeInfo from to cfg @@ -503,7 +529,7 @@ addNodesBetween m updates = = pprPanic "Can't find weight for edge that should have one" ( text "triple" <+> ppr (from,between,old) $$ text "updates" <+> ppr updates $$ - text "cfg:" <+> ppr m ) + text "cfg:" <+> pprEdgeWeights m ) updateWeight :: CFG -> (BlockId,BlockId,BlockId,EdgeInfo) -> CFG updateWeight m (from,between,old,edgeInfo) = addEdge from between edgeInfo . @@ -634,7 +660,7 @@ getCfg weights graph = blocks = revPostorder graph :: [CmmBlock] --Find back edges by BFS -findBackEdges :: BlockId -> CFG -> Edges +findBackEdges :: HasDebugCallStack => BlockId -> CFG -> Edges findBackEdges root cfg = --pprTraceIt "Backedges:" $ map fst . @@ -714,7 +740,7 @@ optimizeCFG weights (CmmProc info _lab _live graph) cfg = (adjustEdgeWeight cfg (+mod1) node s1) | otherwise = cfg - in setFoldl update cfg nodes + in foldl' update cfg nodes where fallthroughTarget :: BlockId -> EdgeInfo -> Bool fallthroughTarget to (EdgeInfo source _weight) @@ -726,13 +752,13 @@ optimizeCFG weights (CmmProc info _lab _live graph) cfg = -- | Determine loop membership of blocks based on SCC analysis -- This is faster but only gives yes/no answers. -loopMembers :: CFG -> LabelMap Bool +loopMembers :: HasDebugCallStack => CFG -> LabelMap Bool loopMembers cfg = foldl' (flip setLevel) mapEmpty sccs where mkNode :: BlockId -> Node BlockId BlockId mkNode bid = DigraphNode bid bid (getSuccessors cfg bid) - nodes = map mkNode (setElems $ getCfgNodes cfg) + nodes = map mkNode (getCfgNodes cfg) sccs = stronglyConnCompFromEdgedVerticesOrd nodes @@ -741,7 +767,9 @@ loopMembers cfg = setLevel (CyclicSCC bids) m = foldl' (\m k -> mapInsert k True m) m bids loopLevels :: CFG -> BlockId -> LabelMap Int -loopLevels cfg root = liLevels $ loopInfo cfg root +loopLevels cfg root = liLevels loopInfos + where + loopInfos = loopInfo cfg root data LoopInfo = LoopInfo { liBackEdges :: [(Edge)] -- ^ List of back edges @@ -754,23 +782,39 @@ instance Outputable LoopInfo where text "Loops:(backEdge, bodyNodes)" $$ (vcat $ map ppr loops) +{- Note [Determining the loop body] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Starting with the knowledge that: + * head dominates the loop + * `tail` -> `head` is a backedge + + We can determine all nodes by: + * Deleting the loop head from the graph. + * Collect all blocks which are reachable from the `tail`. + + We do so by performing bfs from the tail node towards the head. + -} + -- | Determine loop membership of blocks based on Dominator analysis. -- This is slower but gives loop levels instead of just loop membership. -- However it only detects natural loops. Irreducible control flow is not -- recognized even if it loops. But that is rare enough that we don't have -- to care about that special case. -loopInfo :: CFG -> BlockId -> LoopInfo +loopInfo :: HasDebugCallStack => CFG -> BlockId -> LoopInfo loopInfo cfg root = LoopInfo { liBackEdges = backEdges , liLevels = mapFromList loopCounts , liLoops = loopBodies } where revCfg = reverseEdges cfg - graph = fmap (setFromList . mapKeys ) cfg :: LabelMap LabelSet + + graph = -- pprTrace "CFG - loopInfo" (pprEdgeWeights cfg) $ + fmap (setFromList . mapKeys ) cfg :: LabelMap LabelSet + --TODO - This should be a no op: Export constructors? Use unsafeCoerce? ... rooted = ( fromBlockId root , toIntMap $ fmap toIntSet graph) :: (Int, IntMap IntSet) - -- rooted = unsafeCoerce (root, graph) tree = fmap toBlockId $ Dom.domTree rooted :: Tree BlockId -- Map from Nodes to their dominators @@ -778,8 +822,8 @@ loopInfo cfg root = LoopInfo { liBackEdges = backEdges domMap = mkDomMap tree edges = edgeList cfg :: [(BlockId, BlockId)] - -- We can't recompute this from the edges, there might be blocks not connected via edges. - nodes = getCfgNodes cfg :: LabelSet + -- We can't recompute nodes from edges, there might be blocks not connected via edges. + nodes = getCfgNodes cfg :: [BlockId] -- identify back edges isBackEdge (from,to) @@ -788,22 +832,26 @@ loopInfo cfg root = LoopInfo { liBackEdges = backEdges = True | otherwise = False - -- determine the loop body for a back edge + -- See Note [Determining the loop body] + -- Get the loop body associated with a back edge. findBody edge@(tail, head) = ( edge, setInsert head $ go (setSingleton tail) (setSingleton tail) ) where - -- The reversed cfg makes it easier to look up predecessors + -- See Note [Determining the loop body] cfg' = delNode head revCfg + go :: LabelSet -> LabelSet -> LabelSet go found current | setNull current = found | otherwise = go (setUnion newSuccessors found) newSuccessors where + -- Really predecessors, since we use the reversed cfg. newSuccessors = setFilter (\n -> not $ setMember n found) successors :: LabelSet successors = setFromList $ concatMap (getSuccessors cfg') - (setElems current) :: LabelSet + -- we filter head as it's no longer part of the cfg. + (filter (/= head) $ setElems current) :: LabelSet backEdges = filter isBackEdge edges loopBodies = map findBody backEdges :: [(Edge, LabelSet)] @@ -812,7 +860,7 @@ loopInfo cfg root = LoopInfo { liBackEdges = backEdges loopCounts = let bodies = map (first snd) loopBodies -- [(Header, Body)] loopCount n = length $ nub . map fst . filter (setMember n . snd) $ bodies - in map (\n -> (n, loopCount n)) $ setElems nodes :: [(BlockId, Int)] + in map (\n -> (n, loopCount n)) $ nodes :: [(BlockId, Int)] toIntSet :: LabelSet -> IntSet toIntSet s = IS.fromList . map fromBlockId . setElems $ s @@ -845,12 +893,12 @@ instance G.NonLocal (BlockNode) where entryLabel (BN (lbl,_)) = lbl successors (BN (_,succs)) = succs -revPostorderFrom :: CFG -> BlockId -> [BlockId] +revPostorderFrom :: HasDebugCallStack => CFG -> BlockId -> [BlockId] revPostorderFrom cfg root = map fromNode $ G.revPostorderFrom hooplGraph root where nodes = getCfgNodes cfg - hooplGraph = setFoldl (\m n -> mapInsert n (toNode n) m) mapEmpty nodes + hooplGraph = foldl' (\m n -> mapInsert n (toNode n) m) mapEmpty nodes fromNode :: BlockNode C C -> BlockId fromNode (BN x) = fst x @@ -876,14 +924,13 @@ revPostorderFrom cfg root = -- -- We also apply a few prediction heuristics (based on the same paper) +{-# NOINLINE mkGlobalWeights #-} {-# SCC mkGlobalWeights #-} -mkGlobalWeights :: BlockId -> CFG -> (LabelMap Double, LabelMap (LabelMap Double)) +mkGlobalWeights :: HasDebugCallStack => BlockId -> CFG -> (LabelMap Double, LabelMap (LabelMap Double)) mkGlobalWeights root localCfg | null localCfg = panic "Error - Empty CFG" | otherwise - = --pprTrace "revOrder" (ppr revOrder) $ - -- undefined --propagate (mapSingleton root 1) (revOrder) - (blockFreqs', edgeFreqs') + = (blockFreqs', edgeFreqs') where -- Calculate fixpoints (blockFreqs, edgeFreqs) = calcFreqs nodeProbs backEdges' bodies' revOrder' @@ -894,13 +941,13 @@ mkGlobalWeights root localCfg fromVertexMap m = mapFromList . map (first fromVertex) $ IM.toList m revOrder = revPostorderFrom localCfg root :: [BlockId] - loopinfo@(LoopInfo backedges _levels bodies) = loopInfo localCfg root + loopResults@(LoopInfo backedges _levels bodies) = loopInfo localCfg root revOrder' = map toVertex revOrder backEdges' = map (bimap toVertex toVertex) backedges bodies' = map calcBody bodies - estimatedCfg = staticBranchPrediction root loopinfo localCfg + estimatedCfg = staticBranchPrediction root loopResults localCfg -- Normalize the weights to probabilities and apply heuristics nodeProbs = cfgEdgeProbabilities estimatedCfg toVertex @@ -965,7 +1012,7 @@ type TargetNodeInfo = (BlockId, EdgeInfo) staticBranchPrediction :: BlockId -> LoopInfo -> CFG -> CFG staticBranchPrediction _root (LoopInfo l_backEdges loopLevels l_loops) cfg = -- pprTrace "staticEstimatesOn" (ppr (cfg)) $ - setFoldl update cfg nodes + foldl' update cfg nodes where nodes = getCfgNodes cfg backedges = S.fromList $ l_backEdges @@ -1248,8 +1295,10 @@ calcFreqs graph backEdges loops revPostOrder = runST $ do return (freqs', graph') where + -- How can these lookups fail? Consider the CFG [A -> B] predecessors :: Int -> IS.IntSet predecessors b = fromMaybe IS.empty $ IM.lookup b revGraph + successors :: Int -> [Int] successors b = fromMaybe (lookupError "succ" b graph)$ IM.keys <$> IM.lookup b graph lookupError s b g = pprPanic ("Lookup error " ++ s) $ ( text "node" <+> ppr b $$ diff --git a/compiler/nativeGen/NCGMonad.hs b/compiler/nativeGen/NCGMonad.hs index cf3c58844f..71503aa653 100644 --- a/compiler/nativeGen/NCGMonad.hs +++ b/compiler/nativeGen/NCGMonad.hs @@ -1,5 +1,6 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE BangPatterns #-} -- ----------------------------------------------------------------------------- -- @@ -204,7 +205,8 @@ addImportNat imp updateCfgNat :: (CFG -> CFG) -> NatM () updateCfgNat f - = NatM $ \ st -> ((), st { natm_cfg = f (natm_cfg st) }) + = NatM $ \ st -> let !cfg' = f (natm_cfg st) + in ((), st { natm_cfg = cfg'}) -- | Record that we added a block between `from` and `old`. addNodeBetweenNat :: BlockId -> BlockId -> BlockId -> NatM () diff --git a/compiler/nativeGen/RegAlloc/Liveness.hs b/compiler/nativeGen/RegAlloc/Liveness.hs index 3f160ea678..a5a9b503cd 100644 --- a/compiler/nativeGen/RegAlloc/Liveness.hs +++ b/compiler/nativeGen/RegAlloc/Liveness.hs @@ -705,8 +705,8 @@ sccBlocks blocks entries mcfg = map (fmap node_payload) sccs reachable :: LabelSet reachable | Just cfg <- mcfg - -- Our CFG only contains reachable nodes by construction. - = getCfgNodes cfg + -- Our CFG only contains reachable nodes by construction at this point. + = setFromList $ getCfgNodes cfg | otherwise = setFromList $ [ node_key node | node <- reachablesG g1 roots ] diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index b1dd9c58ad..1807bdcea1 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -39,6 +39,7 @@ import GhcPrelude import X86.Instr import X86.Cond import X86.Regs +import X86.Ppr ( ) import X86.RegInfo import GHC.Platform.Regs @@ -137,6 +138,56 @@ cmmTopCodeGen (CmmProc info lab live graph) = do cmmTopCodeGen (CmmData sec dat) = do return [CmmData sec (mkAlignment 1, dat)] -- no translation, we just use CmmStatic +{- Note [Verifying basic blocks] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + We want to guarantee a few things about the results + of instruction selection. + + Namely that each basic blocks consists of: + * A (potentially empty) sequence of straight line instructions + followed by + * A (potentially empty) sequence of jump like instructions. + + We can verify this by going through the instructions and + making sure that any non-jumpish instruction can't appear + after a jumpish instruction. + + There are gotchas however: + * CALLs are strictly speaking control flow but here we care + not about them. Hence we treat them as regular instructions. + + It's safe for them to appear inside a basic block + as (ignoring side effects inside the call) they will result in + straight line code. + + * NEWBLOCK marks the start of a new basic block so can + be followed by any instructions. +-} + +-- Verifying basic blocks is cheap, but not cheap enough to enable it unconditionally. +verifyBasicBlock :: [Instr] -> () +verifyBasicBlock instrs + | debugIsOn = go False instrs + | otherwise = () + where + go _ [] = () + go atEnd (i:instr) + = case i of + -- Start a new basic block + NEWBLOCK {} -> go False instr + -- Calls are not viable block terminators + CALL {} | atEnd -> faultyBlockWith i + | not atEnd -> go atEnd instr + -- All instructions ok, check if we reached the end and continue. + _ | not atEnd -> go (isJumpishInstr i) instr + -- Only jumps allowed at the end of basic blocks. + | otherwise -> if isJumpishInstr i + then go True instr + else faultyBlockWith i + faultyBlockWith i + = pprPanic "Non control flow instructions after end of basic block." + (ppr i <+> text "in:" $$ vcat (map ppr instrs)) basicBlockCodeGen :: CmmBlock @@ -155,9 +206,10 @@ basicBlockCodeGen block = do let line = srcSpanStartLine span; col = srcSpanStartCol span return $ unitOL $ LOCATION fileId line col name _ -> return nilOL - mid_instrs <- stmtsToInstrs id stmts - (!tail_instrs,_) <- stmtToInstrs id tail + (mid_instrs,mid_bid) <- stmtsToInstrs id stmts + (!tail_instrs,_) <- stmtToInstrs mid_bid tail let instrs = loc_instrs `appOL` mid_instrs `appOL` tail_instrs + return $! verifyBasicBlock (fromOL instrs) instrs' <- fold <$> traverse addSpUnwindings instrs -- code generation may introduce new basic block boundaries, which -- are indicated by the NEWBLOCK instruction. We must split up the @@ -251,12 +303,12 @@ basic block. -- See Note [Keeping track of the current block] for why -- we pass the BlockId. stmtsToInstrs :: BlockId -- ^ Basic block these statement will start to be placed in. - -> [CmmNode e x] -- ^ Cmm Statement - -> NatM InstrBlock -- ^ Resulting instruction + -> [CmmNode O O] -- ^ Cmm Statement + -> NatM (InstrBlock, BlockId) -- ^ Resulting instruction stmtsToInstrs bid stmts = go bid stmts nilOL where - go _ [] instr = return instr + go bid [] instrs = return (instrs,bid) go bid (s:stmts) instrs = do (instrs',bid') <- stmtToInstrs bid s -- If the statement introduced a new block, we use that one @@ -1822,6 +1874,109 @@ genCondBranch' _ bid id false bool = do updateCfgNat (\cfg -> adjustEdgeWeight cfg (+3) bid false) return (cond_code `appOL` code) +{- Note [Introducing cfg edges inside basic blocks] + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + During instruction selection a statement `s` + in a block B with control of the sort: B -> C + will sometimes result in control + flow of the sort: + + ┌ < ┐ + v ^ + B -> B1 ┴ -> C + + as is the case for some atomic operations. + + Now to keep the CFG in sync when introducing B1 we clearly + want to insert it between B and C. However there is + a catch when we have to deal with self loops. + + We might start with code and a CFG of these forms: + + loop: + stmt1 ┌ < ┐ + .... v ^ + stmtX loop ┘ + stmtY + .... + goto loop: + + Now we introduce B1: + ┌ ─ ─ ─ ─ ─┐ + loop: │ ┌ < ┐ │ + instrs v │ │ ^ + .... loop ┴ B1 ┴ ┘ + instrsFromX + stmtY + goto loop: + + This is simple, all outgoing edges from loop now simply + start from B1 instead and the code generator knows which + new edges it introduced for the self loop of B1. + + Disaster strikes if the statement Y follows the same pattern. + If we apply the same rule that all outgoing edges change then + we end up with: + + loop ─> B1 ─> B2 ┬─┐ + │ │ └─<┤ │ + │ └───<───┘ │ + └───────<────────┘ + + This is problematic. The edge B1->B1 is modified as expected. + However the modification is wrong! + + The assembly in this case looked like this: + + _loop: + <instrs> + _B1: + ... + cmpxchgq ... + jne _B1 + <instrs> + <end _B1> + _B2: + ... + cmpxchgq ... + jne _B2 + <instrs> + jmp loop + + There is no edge _B2 -> _B1 here. It's still a self loop onto _B1. + + The problem here is that really B1 should be two basic blocks. + Otherwise we have control flow in the *middle* of a basic block. + A contradiction! + + So to account for this we add yet another basic block marker: + + _B: + <instrs> + _B1: + ... + cmpxchgq ... + jne _B1 + jmp _B1' + _B1': + <instrs> + <end _B1> + _B2: + ... + + Now when inserting B2 we will only look at the outgoing edges of B1' and + everything will work out nicely. + + You might also wonder why we don't insert jumps at the end of _B1'. There is + no way another block ends up jumping to the labels _B1 or _B2 since they are + essentially invisible to other blocks. View them as control flow labels local + to the basic block if you'd like. + + Not doing this ultimately caused (part 2 of) #17334. +-} + + -- ----------------------------------------------------------------------------- -- Generating C calls @@ -1889,26 +2044,34 @@ genCCall dflags is32Bit (PrimTarget (MO_AtomicRMW width amop)) cmpxchg_code :: (Operand -> Operand -> OrdList Instr) -> NatM (OrdList Instr, BlockId) cmpxchg_code instrs = do - lbl <- getBlockIdNat + lbl1 <- getBlockIdNat + lbl2 <- getBlockIdNat tmp <- getNewRegNat format --Record inserted blocks - addImmediateSuccessorNat bid lbl - updateCfgNat (addWeightEdge lbl lbl 0) + -- We turn A -> B into A -> A' -> A'' -> B + -- with a self loop on A'. + addImmediateSuccessorNat bid lbl1 + addImmediateSuccessorNat lbl1 lbl2 + updateCfgNat (addWeightEdge lbl1 lbl1 0) return $ (toOL [ MOV format (OpAddr amode) (OpReg eax) - , JXX ALWAYS lbl - , NEWBLOCK lbl + , JXX ALWAYS lbl1 + , NEWBLOCK lbl1 -- Keep old value so we can return it: , MOV format (OpReg eax) (OpReg dst_r) , MOV format (OpReg eax) (OpReg tmp) ] `appOL` instrs (OpReg arg) (OpReg tmp) `appOL` toOL [ LOCK (CMPXCHG format (OpReg tmp) (OpAddr amode)) - , JXX NE lbl + , JXX NE lbl1 + -- See Note [Introducing cfg edges inside basic blocks] + -- why this basic block is required. + , JXX ALWAYS lbl2 + , NEWBLOCK lbl2 ], - lbl) + lbl2) format = intFormat width genCCall dflags is32Bit (PrimTarget (MO_Ctz width)) [dst] [src] bid diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs index d4502a0088..7e47860143 100644 --- a/compiler/nativeGen/X86/Instr.hs +++ b/compiler/nativeGen/X86/Instr.hs @@ -292,7 +292,9 @@ data Instr [Maybe JumpDest] -- Targets of the jump table Section -- Data section jump table should be put in CLabel -- Label of jump table - | CALL (Either Imm Reg) [Reg] + -- | X86 call instruction + | CALL (Either Imm Reg) -- ^ Jump target + [Reg] -- ^ Arguments (required for register allocation) -- Other things. | CLTD Format -- sign extend %eax into %edx:%eax diff --git a/compiler/utils/Dominators.hs b/compiler/utils/Dominators.hs index 9877c2c1f0..d6d8404564 100644 --- a/compiler/utils/Dominators.hs +++ b/compiler/utils/Dominators.hs @@ -53,9 +53,12 @@ import Control.Monad import Control.Monad.ST.Strict
import Data.Array.ST
-import Data.Array.Base
- (unsafeNewArray_
- ,unsafeWrite,unsafeRead)
+import Data.Array.Base hiding ((!))
+ -- (unsafeNewArray_
+ -- ,unsafeWrite,unsafeRead
+ -- ,readArray,writeArray)
+
+import Util (debugIsOn)
-----------------------------------------------------------------------------
@@ -399,13 +402,19 @@ infixr 2 .= (.=) :: (MArray (A s) a (ST s))
=> Arr s a -> a -> Int -> ST s ()
-(v .= x) i = unsafeWrite v i x
+(v .= x) i
+ | debugIsOn = writeArray v i x
+ | otherwise = unsafeWrite v i x
(!:) :: (MArray (A s) a (ST s))
=> A s Int a -> Int -> ST s a
-a !: i = do
- o <- unsafeRead a i
- return $! o
+a !: i
+ | debugIsOn = do
+ o <- readArray a i
+ return $! o
+ | otherwise = do
+ o <- unsafeRead a i
+ return $! o
new :: (MArray (A s) a (ST s))
=> Int -> ST s (Arr s a)
diff --git a/testsuite/tests/codeGen/should_compile/T17334.hs b/testsuite/tests/codeGen/should_compile/T17334.hs index 27c0742aa7..6ad6d347e8 100644 --- a/testsuite/tests/codeGen/should_compile/T17334.hs +++ b/testsuite/tests/codeGen/should_compile/T17334.hs @@ -1,144 +1,240 @@ --- Reproducer for T17334
-{-# LANGUAGE BangPatterns #-}
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE MagicHash #-}
-{-# LANGUAGE MultiParamTypeClasses #-}
-{-# LANGUAGE TypeFamilies #-}
-{-# LANGUAGE UnboxedTuples #-}
-
-module T17334 where
-
-import Control.Monad.ST
-import Data.Bits
-import Data.Kind
-import GHC.Exts
-import GHC.ST (ST(..))
-
-reverseInPlace :: UMVector s Bit -> ST s ()
-reverseInPlace xs = loop 0
- where
- len = 4
-
- loop !i
- | i' < j = do
- let w = 1
- k = 2
- x <- return 1
- y <- return 2
-
- writeWord xs i (meld w (reversePartialWord w y) x)
-
- loop i'
-
- where
- !j = 5
- !i' = i + wordSize
-
-newtype Bit = Bit { unBit :: Bool }
-
-instance Unbox Bit
-
-data instance UMVector s Bit = BitMVec !Int !Int !(MutableByteArray s)
-data instance UVector Bit = BitVec !Int !Int !ByteArray
-
--- {-# NOINLINE writeWord #-}
-writeWord :: UMVector s Bit -> Int -> Word -> ST s ()
-writeWord !(BitMVec _ 0 _) _ _ = pure ()
-writeWord !(BitMVec off len' arr@(MutableByteArray mba)) !i' !x@(W# x#) = do
- let len = 5
- lenMod = 6
- i = 7
- nMod = 8
- loIx@(I# loIx#) = 9
-
- do
- let W# andMask# = hiMask lenMod
- W# orMask# = x .&. loMask lenMod
- primitive $ \state ->
- let !(# state', _ #) = fetchAndIntArray# mba loIx# (word2Int# andMask#) state in
- let !(# state'', _ #) = fetchOrIntArray# mba loIx# (word2Int# orMask#) state' in
- (# state'', () #)
-
-instance GMVector UMVector Bit where
- {-# INLINE basicLength #-}
- basicLength (BitMVec _ n _) = n
-
-instance GVector UVector Bit where
-
-wordSize :: Int
-wordSize = 10
-
-lgWordSize :: Int
-lgWordSize = 11
-
-modWordSize :: Int -> Int
-modWordSize x = 12
-
-mask :: Int -> Word
-mask b = 13
-
-meld :: Int -> Word -> Word -> Word
-meld b lo hi = 14
-{-# INLINE meld #-}
-
-reverseWord :: Word -> Word
-reverseWord x0 = 15
-
-reversePartialWord :: Int -> Word -> Word
-reversePartialWord n w = 16
-
-loMask :: Int -> Word
-loMask n = 17
-
-hiMask :: Int -> Word
-hiMask n = 18
-
-class GMVector v a where
- basicLength :: v s a -> Int
-
-type family GMutable (v :: Type -> Type) :: Type -> Type -> Type
-class GMVector (GMutable v) a => GVector v a
-data family UMVector s a
-data family UVector a
-class (GVector UVector a, GMVector UMVector a) => Unbox a
-type instance GMutable UVector = UMVector
-
-data ByteArray = ByteArray ByteArray#
-data MutableByteArray s = MutableByteArray (MutableByteArray# s)
-
-readByteArray
- :: (Prim a, PrimMonad m) => MutableByteArray (PrimState m) -> Int -> m a
-{-# INLINE readByteArray #-}
-readByteArray (MutableByteArray arr#) (I# i#)
- = primitive (readByteArray# arr# i#)
-
-writeByteArray
- :: (Prim a, PrimMonad m) => MutableByteArray (PrimState m) -> Int -> a -> m ()
-{-# INLINE writeByteArray #-}
-writeByteArray (MutableByteArray arr#) (I# i#) x
- = primitive_ (writeByteArray# arr# i# x)
-
-class Prim a where
- readByteArray# :: MutableByteArray# s -> Int# -> State# s -> (# State# s, a #)
- writeByteArray# :: MutableByteArray# s -> Int# -> a -> State# s -> State# s
-
-instance Prim Word where
- readByteArray# arr# i# s# = case readWordArray# arr# i# s# of
- (# s1#, x# #) -> (# s1#, W# x# #)
- writeByteArray# arr# i# (W# x#) s# = writeWordArray# arr# i# x# s#
-
-class Monad m => PrimMonad m where
- type PrimState m
- primitive :: (State# (PrimState m) -> (# State# (PrimState m), a #)) -> m a
-
-instance PrimMonad (ST s) where
- type PrimState (ST s) = s
- primitive = ST
- {-# INLINE primitive #-}
-
-primitive_ :: PrimMonad m
- => (State# (PrimState m) -> State# (PrimState m)) -> m ()
-{-# INLINE primitive_ #-}
-primitive_ f = primitive (\s# ->
- case f s# of
- s'# -> (# s'#, () #))
+-- Reproducer for T17334 +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE UnboxedTuples #-} + +--Reproducer uses 64bit literals in reverseWord. +--It's ok to truncate those in x86 +{-# OPTIONS_GHC -Wno-overflowed-literals #-} + +module Bug (reverseInPlace) where + +import Control.Monad.ST +import Data.Bits +import GHC.Exts +import GHC.ST (ST(..)) +import Data.Kind + +reverseInPlace :: PrimMonad m => UMVector (PrimState m) Bit -> m () +reverseInPlace xs | len == 0 = pure () + | otherwise = loop 0 + where + len = ulength xs + + loop !i + | i' <= j' = do + x <- readWord xs i + y <- readWord xs j' + + writeWord xs i (reverseWord y) + writeWord xs j' (reverseWord x) + + loop i' + | i' < j = do + let w = (j - i) `shiftR` 1 + k = j - w + x <- readWord xs i + y <- readWord xs k + + writeWord xs i (meld w (reversePartialWord w y) x) + writeWord xs k (meld w (reversePartialWord w x) y) + + loop i' + | otherwise = do + let w = j - i + x <- readWord xs i + writeWord xs i (meld w (reversePartialWord w x) x) + where + !j = len - i + !i' = i + wordSize + !j' = j - wordSize +{-# SPECIALIZE reverseInPlace :: UMVector s Bit -> ST s () #-} + +newtype Bit = Bit { unBit :: Bool } + +instance Unbox Bit + +data instance UMVector s Bit = BitMVec !Int !Int !(MutableByteArray s) +data instance UVector Bit = BitVec !Int !Int !ByteArray + +readWord :: PrimMonad m => UMVector (PrimState m) Bit -> Int -> m Word +readWord !(BitMVec _ 0 _) _ = pure 0 +readWord !(BitMVec off len' arr) !i' = do + let len = off + len' + i = off + i' + nMod = modWordSize i + loIx = divWordSize i + loWord <- readByteArray arr loIx + + if nMod == 0 + then pure loWord + else if loIx == divWordSize (len - 1) + then pure (loWord `unsafeShiftR` nMod) + else do + hiWord <- readByteArray arr (loIx + 1) + pure + $ (loWord `unsafeShiftR` nMod) + .|. (hiWord `unsafeShiftL` (wordSize - nMod)) +{-# SPECIALIZE readWord :: UMVector s Bit -> Int -> ST s Word #-} +{-# INLINE readWord #-} + +writeWord :: PrimMonad m => UMVector (PrimState m) Bit -> Int -> Word -> m () +writeWord !(BitMVec _ 0 _) _ _ = pure () +writeWord !(BitMVec off len' arr@(MutableByteArray mba)) !i' !x@(W# x#) = do + let len = off + len' + lenMod = modWordSize len + i = off + i' + nMod = modWordSize i + loIx@(I# loIx#) = divWordSize i + + if nMod == 0 + then if len >= i + wordSize + then primitive $ \state -> + (# atomicWriteIntArray# mba loIx# (word2Int# x#) state, () #) + else do + let W# andMask# = hiMask lenMod + W# orMask# = x .&. loMask lenMod + primitive $ \state -> + let !(# state', _ #) = fetchAndIntArray# mba loIx# (word2Int# andMask#) state in + let !(# state'', _ #) = fetchOrIntArray# mba loIx# (word2Int# orMask#) state' in + (# state'', () #) + else if loIx == divWordSize (len - 1) + then do + loWord <- readByteArray arr loIx + if lenMod == 0 + then + writeByteArray arr loIx + $ (loWord .&. loMask nMod) + .|. (x `unsafeShiftL` nMod) + else + writeByteArray arr loIx + $ (loWord .&. (loMask nMod .|. hiMask lenMod)) + .|. ((x `unsafeShiftL` nMod) .&. loMask lenMod) + else do + loWord <- readByteArray arr loIx + writeByteArray arr loIx + $ (loWord .&. loMask nMod) + .|. (x `unsafeShiftL` nMod) + hiWord <- readByteArray arr (loIx + 1) + writeByteArray arr (loIx + 1) + $ (hiWord .&. hiMask nMod) + .|. (x `unsafeShiftR` (wordSize - nMod)) +{-# SPECIALIZE writeWord :: UMVector s Bit -> Int -> Word -> ST s () #-} +{-# INLINE writeWord #-} + +instance GMVector UMVector Bit where + {-# INLINE basicLength #-} + basicLength (BitMVec _ n _) = n + +instance GVector UVector Bit where + +wordSize :: Int +wordSize = finiteBitSize (0 :: Word) + +lgWordSize :: Int +lgWordSize = case wordSize of + 32 -> 5 + 64 -> 6 + _ -> error "wordsToBytes: unknown architecture" + +divWordSize :: Bits a => a -> a +divWordSize x = unsafeShiftR x lgWordSize +{-# INLINE divWordSize #-} + +modWordSize :: Int -> Int +modWordSize x = x .&. (wordSize - 1) +{-# INLINE modWordSize #-} + +mask :: Int -> Word +mask b = m + where + m | b >= finiteBitSize m = complement 0 + | b < 0 = 0 + | otherwise = bit b - 1 + +meld :: Int -> Word -> Word -> Word +meld b lo hi = (lo .&. m) .|. (hi .&. complement m) where m = mask b +{-# INLINE meld #-} + +reverseWord :: Word -> Word +reverseWord x0 = x6 + where + x1 = ((x0 .&. 0x5555555555555555) `shiftL` 1) .|. ((x0 .&. 0xAAAAAAAAAAAAAAAA) `shiftR` 1) + x2 = ((x1 .&. 0x3333333333333333) `shiftL` 2) .|. ((x1 .&. 0xCCCCCCCCCCCCCCCC) `shiftR` 2) + x3 = ((x2 .&. 0x0F0F0F0F0F0F0F0F) `shiftL` 4) .|. ((x2 .&. 0xF0F0F0F0F0F0F0F0) `shiftR` 4) + x4 = ((x3 .&. 0x00FF00FF00FF00FF) `shiftL` 8) .|. ((x3 .&. 0xFF00FF00FF00FF00) `shiftR` 8) + x5 = ((x4 .&. 0x0000FFFF0000FFFF) `shiftL` 16) .|. ((x4 .&. 0xFFFF0000FFFF0000) `shiftR` 16) + x6 = ((x5 .&. 0x00000000FFFFFFFF) `shiftL` 32) .|. ((x5 .&. 0xFFFFFFFF00000000) `shiftR` 32) + +reversePartialWord :: Int -> Word -> Word +reversePartialWord n w | n >= wordSize = reverseWord w + | otherwise = reverseWord w `shiftR` (wordSize - n) + +loMask :: Int -> Word +loMask n = 1 `unsafeShiftL` n - 1 +{-# INLINE loMask #-} + +hiMask :: Int -> Word +hiMask n = complement (1 `unsafeShiftL` n - 1) +{-# INLINE hiMask #-} + +class GMVector v a where + basicLength :: v s a -> Int + +glength :: GMVector v a => v s a -> Int +{-# INLINE glength #-} +glength = basicLength + +type family GMutable (v :: Type -> Type) :: Type -> Type -> Type +class GMVector (GMutable v) a => GVector v a +data family UMVector s a +data family UVector a +class (GVector UVector a, GMVector UMVector a) => Unbox a +type instance GMutable UVector = UMVector + +ulength :: Unbox a => UMVector s a -> Int +{-# INLINE ulength #-} +ulength = glength + +data ByteArray = ByteArray ByteArray# +data MutableByteArray s = MutableByteArray (MutableByteArray# s) + +readByteArray + :: (Prim a, PrimMonad m) => MutableByteArray (PrimState m) -> Int -> m a +{-# INLINE readByteArray #-} +readByteArray (MutableByteArray arr#) (I# i#) + = primitive (readByteArray# arr# i#) + +writeByteArray + :: (Prim a, PrimMonad m) => MutableByteArray (PrimState m) -> Int -> a -> m () +{-# INLINE writeByteArray #-} +writeByteArray (MutableByteArray arr#) (I# i#) x + = primitive_ (writeByteArray# arr# i# x) + +class Prim a where + readByteArray# :: MutableByteArray# s -> Int# -> State# s -> (# State# s, a #) + writeByteArray# :: MutableByteArray# s -> Int# -> a -> State# s -> State# s + +instance Prim Word where + readByteArray# arr# i# s# = case readWordArray# arr# i# s# of + (# s1#, x# #) -> (# s1#, W# x# #) + writeByteArray# arr# i# (W# x#) s# = writeWordArray# arr# i# x# s# + +class Monad m => PrimMonad m where + type PrimState m + primitive :: (State# (PrimState m) -> (# State# (PrimState m), a #)) -> m a + +instance PrimMonad (ST s) where + type PrimState (ST s) = s + primitive = ST + {-# INLINE primitive #-} + +primitive_ :: PrimMonad m + => (State# (PrimState m) -> State# (PrimState m)) -> m () +{-# INLINE primitive_ #-} +primitive_ f = primitive (\s# -> + case f s# of + s'# -> (# s'#, () #)) |