diff options
Diffstat (limited to 'compiler/nativeGen')
67 files changed, 0 insertions, 27914 deletions
diff --git a/compiler/nativeGen/AsmCodeGen.hs b/compiler/nativeGen/AsmCodeGen.hs deleted file mode 100644 index 9aa6933757..0000000000 --- a/compiler/nativeGen/AsmCodeGen.hs +++ /dev/null @@ -1,1236 +0,0 @@ --- ----------------------------------------------------------------------------- --- --- (c) The University of Glasgow 1993-2004 --- --- This is the top-level module in the native code generator. --- --- ----------------------------------------------------------------------------- - -{-# LANGUAGE BangPatterns, CPP, GADTs, ScopedTypeVariables, PatternSynonyms, - DeriveFunctor #-} - -#if !defined(GHC_LOADED_INTO_GHCI) -{-# LANGUAGE UnboxedTuples #-} -#endif - -{-# OPTIONS_GHC -Wno-incomplete-record-updates #-} - -module AsmCodeGen ( - -- * Module entry point - nativeCodeGen - - -- * Test-only exports: see trac #12744 - -- used by testGraphNoSpills, which needs to access - -- the register allocator intermediate data structures - -- cmmNativeGen emits - , cmmNativeGen - , NcgImpl(..) - , x86NcgImpl - ) where - -#include "HsVersions.h" - -import GhcPrelude - -import qualified X86.CodeGen -import qualified X86.Regs -import qualified X86.Instr -import qualified X86.Ppr - -import qualified SPARC.CodeGen -import qualified SPARC.Regs -import qualified SPARC.Instr -import qualified SPARC.Ppr -import qualified SPARC.ShortcutJump -import qualified SPARC.CodeGen.Expand - -import qualified PPC.CodeGen -import qualified PPC.Regs -import qualified PPC.RegInfo -import qualified PPC.Instr -import qualified PPC.Ppr - -import RegAlloc.Liveness -import qualified RegAlloc.Linear.Main as Linear - -import qualified GraphColor as Color -import qualified RegAlloc.Graph.Main as Color -import qualified RegAlloc.Graph.Stats as Color -import qualified RegAlloc.Graph.TrivColorable as Color - -import AsmUtils -import TargetReg -import GHC.Platform -import BlockLayout -import Config -import Instruction -import PIC -import Reg -import NCGMonad -import CFG -import Dwarf -import GHC.Cmm.DebugBlock - -import GHC.Cmm.BlockId -import GHC.StgToCmm.CgUtils ( fixStgRegisters ) -import GHC.Cmm -import GHC.Cmm.Utils -import GHC.Cmm.Dataflow.Collections -import GHC.Cmm.Dataflow.Label -import GHC.Cmm.Dataflow.Block -import GHC.Cmm.Opt ( cmmMachOpFold ) -import GHC.Cmm.Ppr -import GHC.Cmm.CLabel - -import UniqFM -import UniqSupply -import GHC.Driver.Session -import Util - -import BasicTypes ( Alignment ) -import qualified Pretty -import BufWrite -import Outputable -import FastString -import UniqSet -import ErrUtils -import Module -import Stream (Stream) -import qualified Stream - --- DEBUGGING ONLY ---import OrdList - -import Data.List -import Data.Maybe -import Data.Ord ( comparing ) -import Control.Exception -import Control.Monad -import System.IO - -{- -The native-code generator has machine-independent and -machine-dependent modules. - -This module ("AsmCodeGen") is the top-level machine-independent -module. Before entering machine-dependent land, we do some -machine-independent optimisations (defined below) on the -'CmmStmts's. - -We convert to the machine-specific 'Instr' datatype with -'cmmCodeGen', assuming an infinite supply of registers. We then use -a machine-independent register allocator ('regAlloc') to rejoin -reality. Obviously, 'regAlloc' has machine-specific helper -functions (see about "RegAllocInfo" below). - -Finally, we order the basic blocks of the function so as to minimise -the number of jumps between blocks, by utilising fallthrough wherever -possible. - -The machine-dependent bits break down as follows: - - * ["MachRegs"] Everything about the target platform's machine - registers (and immediate operands, and addresses, which tend to - intermingle/interact with registers). - - * ["MachInstrs"] Includes the 'Instr' datatype (possibly should - have a module of its own), plus a miscellany of other things - (e.g., 'targetDoubleSize', 'smStablePtrTable', ...) - - * ["MachCodeGen"] is where 'Cmm' stuff turns into - machine instructions. - - * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really - a 'SDoc'). - - * ["RegAllocInfo"] In the register allocator, we manipulate - 'MRegsState's, which are 'BitSet's, one bit per machine register. - When we want to say something about a specific machine register - (e.g., ``it gets clobbered by this instruction''), we set/unset - its bit. Obviously, we do this 'BitSet' thing for efficiency - reasons. - - The 'RegAllocInfo' module collects together the machine-specific - info needed to do register allocation. - - * ["RegisterAlloc"] The (machine-independent) register allocator. --} - --------------------- -nativeCodeGen :: forall a . DynFlags -> Module -> ModLocation -> Handle -> UniqSupply - -> Stream IO RawCmmGroup a - -> IO a -nativeCodeGen dflags this_mod modLoc h us cmms - = let platform = targetPlatform dflags - nCG' :: ( Outputable statics, Outputable instr - , Outputable jumpDest, Instruction instr) - => NcgImpl statics instr jumpDest -> IO a - nCG' ncgImpl = nativeCodeGen' dflags this_mod modLoc ncgImpl h us cmms - in case platformArch platform of - ArchX86 -> nCG' (x86NcgImpl dflags) - ArchX86_64 -> nCG' (x86_64NcgImpl dflags) - ArchPPC -> nCG' (ppcNcgImpl dflags) - ArchS390X -> panic "nativeCodeGen: No NCG for S390X" - ArchSPARC -> nCG' (sparcNcgImpl dflags) - ArchSPARC64 -> panic "nativeCodeGen: No NCG for SPARC64" - ArchARM {} -> panic "nativeCodeGen: No NCG for ARM" - ArchARM64 -> panic "nativeCodeGen: No NCG for ARM64" - ArchPPC_64 _ -> nCG' (ppcNcgImpl dflags) - ArchAlpha -> panic "nativeCodeGen: No NCG for Alpha" - ArchMipseb -> panic "nativeCodeGen: No NCG for mipseb" - ArchMipsel -> panic "nativeCodeGen: No NCG for mipsel" - ArchUnknown -> panic "nativeCodeGen: No NCG for unknown arch" - ArchJavaScript-> panic "nativeCodeGen: No NCG for JavaScript" - -x86NcgImpl :: DynFlags -> NcgImpl (Alignment, RawCmmStatics) - X86.Instr.Instr X86.Instr.JumpDest -x86NcgImpl dflags - = (x86_64NcgImpl dflags) - -x86_64NcgImpl :: DynFlags -> NcgImpl (Alignment, RawCmmStatics) - X86.Instr.Instr X86.Instr.JumpDest -x86_64NcgImpl dflags - = NcgImpl { - cmmTopCodeGen = X86.CodeGen.cmmTopCodeGen - ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr dflags - ,getJumpDestBlockId = X86.Instr.getJumpDestBlockId - ,canShortcut = X86.Instr.canShortcut - ,shortcutStatics = X86.Instr.shortcutStatics - ,shortcutJump = X86.Instr.shortcutJump - ,pprNatCmmDecl = X86.Ppr.pprNatCmmDecl - ,maxSpillSlots = X86.Instr.maxSpillSlots dflags - ,allocatableRegs = X86.Regs.allocatableRegs platform - ,ncgAllocMoreStack = X86.Instr.allocMoreStack platform - ,ncgExpandTop = id - ,ncgMakeFarBranches = const id - ,extractUnwindPoints = X86.CodeGen.extractUnwindPoints - ,invertCondBranches = X86.CodeGen.invertCondBranches - } - where platform = targetPlatform dflags - -ppcNcgImpl :: DynFlags -> NcgImpl RawCmmStatics PPC.Instr.Instr PPC.RegInfo.JumpDest -ppcNcgImpl dflags - = NcgImpl { - cmmTopCodeGen = PPC.CodeGen.cmmTopCodeGen - ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr dflags - ,getJumpDestBlockId = PPC.RegInfo.getJumpDestBlockId - ,canShortcut = PPC.RegInfo.canShortcut - ,shortcutStatics = PPC.RegInfo.shortcutStatics - ,shortcutJump = PPC.RegInfo.shortcutJump - ,pprNatCmmDecl = PPC.Ppr.pprNatCmmDecl - ,maxSpillSlots = PPC.Instr.maxSpillSlots dflags - ,allocatableRegs = PPC.Regs.allocatableRegs platform - ,ncgAllocMoreStack = PPC.Instr.allocMoreStack platform - ,ncgExpandTop = id - ,ncgMakeFarBranches = PPC.Instr.makeFarBranches - ,extractUnwindPoints = const [] - ,invertCondBranches = \_ _ -> id - } - where platform = targetPlatform dflags - -sparcNcgImpl :: DynFlags -> NcgImpl RawCmmStatics SPARC.Instr.Instr SPARC.ShortcutJump.JumpDest -sparcNcgImpl dflags - = NcgImpl { - cmmTopCodeGen = SPARC.CodeGen.cmmTopCodeGen - ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr dflags - ,getJumpDestBlockId = SPARC.ShortcutJump.getJumpDestBlockId - ,canShortcut = SPARC.ShortcutJump.canShortcut - ,shortcutStatics = SPARC.ShortcutJump.shortcutStatics - ,shortcutJump = SPARC.ShortcutJump.shortcutJump - ,pprNatCmmDecl = SPARC.Ppr.pprNatCmmDecl - ,maxSpillSlots = SPARC.Instr.maxSpillSlots dflags - ,allocatableRegs = SPARC.Regs.allocatableRegs - ,ncgAllocMoreStack = noAllocMoreStack - ,ncgExpandTop = map SPARC.CodeGen.Expand.expandTop - ,ncgMakeFarBranches = const id - ,extractUnwindPoints = const [] - ,invertCondBranches = \_ _ -> id - } - --- --- Allocating more stack space for spilling is currently only --- supported for the linear register allocator on x86/x86_64, the rest --- default to the panic below. To support allocating extra stack on --- more platforms provide a definition of ncgAllocMoreStack. --- -noAllocMoreStack :: Int -> NatCmmDecl statics instr - -> UniqSM (NatCmmDecl statics instr, [(BlockId,BlockId)]) -noAllocMoreStack amount _ - = panic $ "Register allocator: out of stack slots (need " ++ show amount ++ ")\n" - ++ " If you are trying to compile SHA1.hs from the crypto library then this\n" - ++ " is a known limitation in the linear allocator.\n" - ++ "\n" - ++ " Try enabling the graph colouring allocator with -fregs-graph instead." - ++ " You can still file a bug report if you like.\n" - - --- | Data accumulated during code generation. Mostly about statistics, --- but also collects debug data for DWARF generation. -data NativeGenAcc statics instr - = NGS { ngs_imports :: ![[CLabel]] - , ngs_natives :: ![[NatCmmDecl statics instr]] - -- ^ Native code generated, for statistics. This might - -- hold a lot of data, so it is important to clear this - -- field as early as possible if it isn't actually - -- required. - , ngs_colorStats :: ![[Color.RegAllocStats statics instr]] - , ngs_linearStats :: ![[Linear.RegAllocStats]] - , ngs_labels :: ![Label] - , ngs_debug :: ![DebugBlock] - , ngs_dwarfFiles :: !DwarfFiles - , ngs_unwinds :: !(LabelMap [UnwindPoint]) - -- ^ see Note [Unwinding information in the NCG] - -- and Note [What is this unwinding business?] in Debug. - } - -{- -Note [Unwinding information in the NCG] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Unwind information is a type of metadata which allows a debugging tool -to reconstruct the values of machine registers at the time a procedure was -entered. For the most part, the production of unwind information is handled by -the Cmm stage, where it is represented by CmmUnwind nodes. - -Unfortunately, the Cmm stage doesn't know everything necessary to produce -accurate unwinding information. For instance, the x86-64 calling convention -requires that the stack pointer be aligned to 16 bytes, which in turn means that -GHC must sometimes add padding to $sp prior to performing a foreign call. When -this happens unwind information must be updated accordingly. -For this reason, we make the NCG backends responsible for producing -unwinding tables (with the extractUnwindPoints function in NcgImpl). - -We accumulate the produced unwind tables over CmmGroups in the ngs_unwinds -field of NativeGenAcc. This is a label map which contains an entry for each -procedure, containing a list of unwinding points (e.g. a label and an associated -unwinding table). - -See also Note [What is this unwinding business?] in Debug. --} - -nativeCodeGen' :: (Outputable statics, Outputable instr,Outputable jumpDest, - Instruction instr) - => DynFlags - -> Module -> ModLocation - -> NcgImpl statics instr jumpDest - -> Handle - -> UniqSupply - -> Stream IO RawCmmGroup a - -> IO a -nativeCodeGen' dflags this_mod modLoc ncgImpl h us cmms - = do - -- BufHandle is a performance hack. We could hide it inside - -- Pretty if it weren't for the fact that we do lots of little - -- printDocs here (in order to do codegen in constant space). - bufh <- newBufHandle h - let ngs0 = NGS [] [] [] [] [] [] emptyUFM mapEmpty - (ngs, us', a) <- cmmNativeGenStream dflags this_mod modLoc ncgImpl bufh us - cmms ngs0 - _ <- finishNativeGen dflags modLoc bufh us' ngs - return a - -finishNativeGen :: Instruction instr - => DynFlags - -> ModLocation - -> BufHandle - -> UniqSupply - -> NativeGenAcc statics instr - -> IO UniqSupply -finishNativeGen dflags modLoc bufh@(BufHandle _ _ h) us ngs - = withTimingSilent dflags (text "NCG") (`seq` ()) $ do - -- Write debug data and finish - let emitDw = debugLevel dflags > 0 - us' <- if not emitDw then return us else do - (dwarf, us') <- dwarfGen dflags modLoc us (ngs_debug ngs) - emitNativeCode dflags bufh dwarf - return us' - bFlush bufh - - -- dump global NCG stats for graph coloring allocator - let stats = concat (ngs_colorStats ngs) - unless (null stats) $ do - - -- build the global register conflict graph - let graphGlobal - = foldl' Color.union Color.initGraph - $ [ Color.raGraph stat - | stat@Color.RegAllocStatsStart{} <- stats] - - dump_stats (Color.pprStats stats graphGlobal) - - let platform = targetPlatform dflags - dumpIfSet_dyn dflags - Opt_D_dump_asm_conflicts "Register conflict graph" - FormatText - $ Color.dotGraph - (targetRegDotColor platform) - (Color.trivColorable platform - (targetVirtualRegSqueeze platform) - (targetRealRegSqueeze platform)) - $ graphGlobal - - - -- dump global NCG stats for linear allocator - let linearStats = concat (ngs_linearStats ngs) - unless (null linearStats) $ - dump_stats (Linear.pprStats (concat (ngs_natives ngs)) linearStats) - - -- write out the imports - printSDocLn Pretty.LeftMode dflags h (mkCodeStyle AsmStyle) - $ makeImportsDoc dflags (concat (ngs_imports ngs)) - return us' - where - dump_stats = dumpAction dflags (mkDumpStyle dflags alwaysQualify) - (dumpOptionsFromFlag Opt_D_dump_asm_stats) "NCG stats" - FormatText - -cmmNativeGenStream :: (Outputable statics, Outputable instr - ,Outputable jumpDest, Instruction instr) - => DynFlags - -> Module -> ModLocation - -> NcgImpl statics instr jumpDest - -> BufHandle - -> UniqSupply - -> Stream IO RawCmmGroup a - -> NativeGenAcc statics instr - -> IO (NativeGenAcc statics instr, UniqSupply, a) - -cmmNativeGenStream dflags this_mod modLoc ncgImpl h us cmm_stream ngs - = do r <- Stream.runStream cmm_stream - case r of - Left a -> - return (ngs { ngs_imports = reverse $ ngs_imports ngs - , ngs_natives = reverse $ ngs_natives ngs - , ngs_colorStats = reverse $ ngs_colorStats ngs - , ngs_linearStats = reverse $ ngs_linearStats ngs - }, - us, - a) - Right (cmms, cmm_stream') -> do - (us', ngs'') <- - withTimingSilent - dflags - ncglabel (\(a, b) -> a `seq` b `seq` ()) $ do - -- Generate debug information - let debugFlag = debugLevel dflags > 0 - !ndbgs | debugFlag = cmmDebugGen modLoc cmms - | otherwise = [] - dbgMap = debugToMap ndbgs - - -- Generate native code - (ngs',us') <- cmmNativeGens dflags this_mod modLoc ncgImpl h - dbgMap us cmms ngs 0 - - -- Link native code information into debug blocks - -- See Note [What is this unwinding business?] in Debug. - let !ldbgs = cmmDebugLink (ngs_labels ngs') (ngs_unwinds ngs') ndbgs - unless (null ldbgs) $ - dumpIfSet_dyn dflags Opt_D_dump_debug "Debug Infos" FormatText - (vcat $ map ppr ldbgs) - - -- Accumulate debug information for emission in finishNativeGen. - let ngs'' = ngs' { ngs_debug = ngs_debug ngs' ++ ldbgs, ngs_labels = [] } - return (us', ngs'') - - cmmNativeGenStream dflags this_mod modLoc ncgImpl h us' - cmm_stream' ngs'' - - where ncglabel = text "NCG" - --- | Do native code generation on all these cmms. --- -cmmNativeGens :: forall statics instr jumpDest. - (Outputable statics, Outputable instr - ,Outputable jumpDest, Instruction instr) - => DynFlags - -> Module -> ModLocation - -> NcgImpl statics instr jumpDest - -> BufHandle - -> LabelMap DebugBlock - -> UniqSupply - -> [RawCmmDecl] - -> NativeGenAcc statics instr - -> Int - -> IO (NativeGenAcc statics instr, UniqSupply) - -cmmNativeGens dflags this_mod modLoc ncgImpl h dbgMap = go - where - go :: UniqSupply -> [RawCmmDecl] - -> NativeGenAcc statics instr -> Int - -> IO (NativeGenAcc statics instr, UniqSupply) - - go us [] ngs !_ = - return (ngs, us) - - go us (cmm : cmms) ngs count = do - let fileIds = ngs_dwarfFiles ngs - (us', fileIds', native, imports, colorStats, linearStats, unwinds) - <- {-# SCC "cmmNativeGen" #-} - cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap - cmm count - - -- Generate .file directives for every new file that has been - -- used. Note that it is important that we generate these in - -- ascending order, as Clang's 3.6 assembler complains. - let newFileIds = sortBy (comparing snd) $ - nonDetEltsUFM $ fileIds' `minusUFM` fileIds - -- See Note [Unique Determinism and code generation] - pprDecl (f,n) = text "\t.file " <> ppr n <+> - pprFilePathString (unpackFS f) - - emitNativeCode dflags h $ vcat $ - map pprDecl newFileIds ++ - map (pprNatCmmDecl ncgImpl) native - - -- force evaluation all this stuff to avoid space leaks - {-# SCC "seqString" #-} evaluate $ seqList (showSDoc dflags $ vcat $ map ppr imports) () - - let !labels' = if debugLevel dflags > 0 - then cmmDebugLabels isMetaInstr native else [] - !natives' = if dopt Opt_D_dump_asm_stats dflags - then native : ngs_natives ngs else [] - - mCon = maybe id (:) - ngs' = ngs{ ngs_imports = imports : ngs_imports ngs - , ngs_natives = natives' - , ngs_colorStats = colorStats `mCon` ngs_colorStats ngs - , ngs_linearStats = linearStats `mCon` ngs_linearStats ngs - , ngs_labels = ngs_labels ngs ++ labels' - , ngs_dwarfFiles = fileIds' - , ngs_unwinds = ngs_unwinds ngs `mapUnion` unwinds - } - go us' cmms ngs' (count + 1) - - -emitNativeCode :: DynFlags -> BufHandle -> SDoc -> IO () -emitNativeCode dflags h sdoc = do - - {-# SCC "pprNativeCode" #-} bufLeftRenderSDoc dflags h - (mkCodeStyle AsmStyle) sdoc - - -- dump native code - dumpIfSet_dyn dflags - Opt_D_dump_asm "Asm code" FormatASM - sdoc - --- | Complete native code generation phase for a single top-level chunk of Cmm. --- Dumping the output of each stage along the way. --- Global conflict graph and NGC stats -cmmNativeGen - :: forall statics instr jumpDest. (Instruction instr, - Outputable statics, Outputable instr, Outputable jumpDest) - => DynFlags - -> Module -> ModLocation - -> NcgImpl statics instr jumpDest - -> UniqSupply - -> DwarfFiles - -> LabelMap DebugBlock - -> RawCmmDecl -- ^ the cmm to generate code for - -> Int -- ^ sequence number of this top thing - -> IO ( UniqSupply - , DwarfFiles - , [NatCmmDecl statics instr] -- native code - , [CLabel] -- things imported by this cmm - , Maybe [Color.RegAllocStats statics instr] -- stats for the coloring register allocator - , Maybe [Linear.RegAllocStats] -- stats for the linear register allocators - , LabelMap [UnwindPoint] -- unwinding information for blocks - ) - -cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count - = do - let platform = targetPlatform dflags - - let proc_name = case cmm of - (CmmProc _ entry_label _ _) -> ppr entry_label - _ -> text "DataChunk" - - -- rewrite assignments to global regs - let fixed_cmm = - {-# SCC "fixStgRegisters" #-} - fixStgRegisters dflags cmm - - -- cmm to cmm optimisations - let (opt_cmm, imports) = - {-# SCC "cmmToCmm" #-} - cmmToCmm dflags this_mod fixed_cmm - - dumpIfSet_dyn dflags - Opt_D_dump_opt_cmm "Optimised Cmm" FormatCMM - (pprCmmGroup [opt_cmm]) - - let cmmCfg = {-# SCC "getCFG" #-} - getCfgProc (cfgWeightInfo dflags) opt_cmm - - -- generate native code from cmm - let ((native, lastMinuteImports, fileIds', nativeCfgWeights), usGen) = - {-# SCC "genMachCode" #-} - initUs us $ genMachCode dflags this_mod modLoc - (cmmTopCodeGen ncgImpl) - fileIds dbgMap opt_cmm cmmCfg - - dumpIfSet_dyn dflags - Opt_D_dump_asm_native "Native code" FormatASM - (vcat $ map (pprNatCmmDecl ncgImpl) native) - - maybeDumpCfg dflags (Just nativeCfgWeights) "CFG Weights - Native" proc_name - - -- tag instructions with register liveness information - -- also drops dead code. We don't keep the cfg in sync on - -- some backends, so don't use it there. - let livenessCfg = if (backendMaintainsCfg dflags) - then Just nativeCfgWeights - else Nothing - let (withLiveness, usLive) = - {-# SCC "regLiveness" #-} - initUs usGen - $ mapM (cmmTopLiveness livenessCfg platform) native - - dumpIfSet_dyn dflags - Opt_D_dump_asm_liveness "Liveness annotations added" - FormatCMM - (vcat $ map ppr withLiveness) - - -- allocate registers - (alloced, usAlloc, ppr_raStatsColor, ppr_raStatsLinear, raStats, stack_updt_blks) <- - if ( gopt Opt_RegsGraph dflags - || gopt Opt_RegsIterative dflags ) - then do - -- the regs usable for allocation - let (alloc_regs :: UniqFM (UniqSet RealReg)) - = foldr (\r -> plusUFM_C unionUniqSets - $ unitUFM (targetClassOfRealReg platform r) (unitUniqSet r)) - emptyUFM - $ allocatableRegs ncgImpl - - -- do the graph coloring register allocation - let ((alloced, maybe_more_stack, regAllocStats), usAlloc) - = {-# SCC "RegAlloc-color" #-} - initUs usLive - $ Color.regAlloc - dflags - alloc_regs - (mkUniqSet [0 .. maxSpillSlots ncgImpl]) - (maxSpillSlots ncgImpl) - withLiveness - livenessCfg - - let ((alloced', stack_updt_blks), usAlloc') - = initUs usAlloc $ - case maybe_more_stack of - Nothing -> return (alloced, []) - Just amount -> do - (alloced',stack_updt_blks) <- unzip <$> - (mapM ((ncgAllocMoreStack ncgImpl) amount) alloced) - return (alloced', concat stack_updt_blks ) - - - -- dump out what happened during register allocation - dumpIfSet_dyn dflags - Opt_D_dump_asm_regalloc "Registers allocated" - FormatCMM - (vcat $ map (pprNatCmmDecl ncgImpl) alloced) - - dumpIfSet_dyn dflags - Opt_D_dump_asm_regalloc_stages "Build/spill stages" - FormatText - (vcat $ map (\(stage, stats) - -> text "# --------------------------" - $$ text "# cmm " <> int count <> text " Stage " <> int stage - $$ ppr stats) - $ zip [0..] regAllocStats) - - let mPprStats = - if dopt Opt_D_dump_asm_stats dflags - then Just regAllocStats else Nothing - - -- force evaluation of the Maybe to avoid space leak - mPprStats `seq` return () - - return ( alloced', usAlloc' - , mPprStats - , Nothing - , [], stack_updt_blks) - - else do - -- do linear register allocation - let reg_alloc proc = do - (alloced, maybe_more_stack, ra_stats) <- - Linear.regAlloc dflags proc - case maybe_more_stack of - Nothing -> return ( alloced, ra_stats, [] ) - Just amount -> do - (alloced',stack_updt_blks) <- - ncgAllocMoreStack ncgImpl amount alloced - return (alloced', ra_stats, stack_updt_blks ) - - let ((alloced, regAllocStats, stack_updt_blks), usAlloc) - = {-# SCC "RegAlloc-linear" #-} - initUs usLive - $ liftM unzip3 - $ mapM reg_alloc withLiveness - - dumpIfSet_dyn dflags - Opt_D_dump_asm_regalloc "Registers allocated" - FormatCMM - (vcat $ map (pprNatCmmDecl ncgImpl) alloced) - - let mPprStats = - if dopt Opt_D_dump_asm_stats dflags - then Just (catMaybes regAllocStats) else Nothing - - -- force evaluation of the Maybe to avoid space leak - mPprStats `seq` return () - - return ( alloced, usAlloc - , Nothing - , mPprStats, (catMaybes regAllocStats) - , concat stack_updt_blks ) - - -- Fixupblocks the register allocator inserted (from, regMoves, to) - let cfgRegAllocUpdates :: [(BlockId,BlockId,BlockId)] - cfgRegAllocUpdates = (concatMap Linear.ra_fixupList raStats) - - let cfgWithFixupBlks = - (\cfg -> addNodesBetween cfg cfgRegAllocUpdates) <$> livenessCfg - - -- Insert stack update blocks - let postRegCFG = - pure (foldl' (\m (from,to) -> addImmediateSuccessor from to m )) - <*> cfgWithFixupBlks - <*> pure stack_updt_blks - - ---- generate jump tables - let tabled = - {-# SCC "generateJumpTables" #-} - generateJumpTables ncgImpl alloced - - when (not $ null nativeCfgWeights) $ dumpIfSet_dyn dflags - Opt_D_dump_cfg_weights "CFG Update information" - FormatText - ( text "stack:" <+> ppr stack_updt_blks $$ - text "linearAlloc:" <+> ppr cfgRegAllocUpdates ) - - ---- shortcut branches - let (shorted, postShortCFG) = - {-# SCC "shortcutBranches" #-} - shortcutBranches dflags ncgImpl tabled postRegCFG - - let optimizedCFG :: Maybe CFG - optimizedCFG = - optimizeCFG (cfgWeightInfo dflags) cmm <$!> postShortCFG - - maybeDumpCfg dflags optimizedCFG "CFG Weights - Final" proc_name - - --TODO: Partially check validity of the cfg. - let getBlks (CmmProc _info _lbl _live (ListGraph blocks)) = blocks - getBlks _ = [] - - when ( backendMaintainsCfg dflags && - (gopt Opt_DoAsmLinting dflags || debugIsOn )) $ do - let blocks = concatMap getBlks shorted - let labels = setFromList $ fmap blockId blocks :: LabelSet - let cfg = fromJust optimizedCFG - return $! seq (sanityCheckCfg cfg labels $ - text "cfg not in lockstep") () - - ---- sequence blocks - let sequenced :: [NatCmmDecl statics instr] - sequenced = - checkLayout shorted $ - {-# SCC "sequenceBlocks" #-} - map (BlockLayout.sequenceTop - dflags - ncgImpl optimizedCFG) - shorted - - let branchOpt :: [NatCmmDecl statics instr] - branchOpt = - {-# SCC "invertCondBranches" #-} - map invert sequenced - where - invertConds :: LabelMap RawCmmStatics -> [NatBasicBlock instr] - -> [NatBasicBlock instr] - invertConds = invertCondBranches ncgImpl optimizedCFG - invert top@CmmData {} = top - invert (CmmProc info lbl live (ListGraph blocks)) = - CmmProc info lbl live (ListGraph $ invertConds info blocks) - - ---- expansion of SPARC synthetic instrs - let expanded = - {-# SCC "sparc_expand" #-} - ncgExpandTop ncgImpl branchOpt - --ncgExpandTop ncgImpl sequenced - - dumpIfSet_dyn dflags - Opt_D_dump_asm_expanded "Synthetic instructions expanded" - FormatCMM - (vcat $ map (pprNatCmmDecl ncgImpl) expanded) - - -- generate unwinding information from cmm - let unwinds :: BlockMap [UnwindPoint] - unwinds = - {-# SCC "unwindingInfo" #-} - foldl' addUnwind mapEmpty expanded - where - addUnwind acc proc = - acc `mapUnion` computeUnwinding dflags ncgImpl proc - - return ( usAlloc - , fileIds' - , expanded - , lastMinuteImports ++ imports - , ppr_raStatsColor - , ppr_raStatsLinear - , unwinds ) - -maybeDumpCfg :: DynFlags -> Maybe CFG -> String -> SDoc -> IO () -maybeDumpCfg _dflags Nothing _ _ = return () -maybeDumpCfg dflags (Just cfg) msg proc_name - | null cfg = return () - | otherwise - = dumpIfSet_dyn - dflags Opt_D_dump_cfg_weights msg - FormatText - (proc_name <> char ':' $$ pprEdgeWeights cfg) - --- | Make sure all blocks we want the layout algorithm to place have been placed. -checkLayout :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr] - -> [NatCmmDecl statics instr] -checkLayout procsUnsequenced procsSequenced = - ASSERT2(setNull diff, - ppr "Block sequencing dropped blocks:" <> ppr diff) - procsSequenced - where - blocks1 = foldl' (setUnion) setEmpty $ - map getBlockIds procsUnsequenced :: LabelSet - blocks2 = foldl' (setUnion) setEmpty $ - map getBlockIds procsSequenced - diff = setDifference blocks1 blocks2 - - getBlockIds (CmmData _ _) = setEmpty - getBlockIds (CmmProc _ _ _ (ListGraph blocks)) = - setFromList $ map blockId blocks - --- | Compute unwinding tables for the blocks of a procedure -computeUnwinding :: Instruction instr - => DynFlags -> NcgImpl statics instr jumpDest - -> NatCmmDecl statics instr - -- ^ the native code generated for the procedure - -> LabelMap [UnwindPoint] - -- ^ unwinding tables for all points of all blocks of the - -- procedure -computeUnwinding dflags _ _ - | debugLevel dflags == 0 = mapEmpty -computeUnwinding _ _ (CmmData _ _) = mapEmpty -computeUnwinding _ ncgImpl (CmmProc _ _ _ (ListGraph blks)) = - -- In general we would need to push unwinding information down the - -- block-level call-graph to ensure that we fully account for all - -- relevant register writes within a procedure. - -- - -- However, the only unwinding information that we care about in GHC is for - -- Sp. The fact that GHC.Cmm.LayoutStack already ensures that we have unwind - -- information at the beginning of every block means that there is no need - -- to perform this sort of push-down. - mapFromList [ (blk_lbl, extractUnwindPoints ncgImpl instrs) - | BasicBlock blk_lbl instrs <- blks ] - --- | Build a doc for all the imports. --- -makeImportsDoc :: DynFlags -> [CLabel] -> SDoc -makeImportsDoc dflags imports - = dyld_stubs imports - $$ - -- On recent versions of Darwin, the linker supports - -- dead-stripping of code and data on a per-symbol basis. - -- There's a hack to make this work in PprMach.pprNatCmmDecl. - (if platformHasSubsectionsViaSymbols platform - then text ".subsections_via_symbols" - else Outputable.empty) - $$ - -- On recent GNU ELF systems one can mark an object file - -- as not requiring an executable stack. If all objects - -- linked into a program have this note then the program - -- will not use an executable stack, which is good for - -- security. GHC generated code does not need an executable - -- stack so add the note in: - (if platformHasGnuNonexecStack platform - then text ".section .note.GNU-stack,\"\"," <> sectionType "progbits" - else Outputable.empty) - $$ - -- And just because every other compiler does, let's stick in - -- an identifier directive: .ident "GHC x.y.z" - (if platformHasIdentDirective platform - then let compilerIdent = text "GHC" <+> text cProjectVersion - in text ".ident" <+> doubleQuotes compilerIdent - else Outputable.empty) - - where - platform = targetPlatform dflags - arch = platformArch platform - os = platformOS platform - - -- Generate "symbol stubs" for all external symbols that might - -- come from a dynamic library. - dyld_stubs :: [CLabel] -> SDoc -{- dyld_stubs imps = vcat $ map pprDyldSymbolStub $ - map head $ group $ sort imps-} - -- (Hack) sometimes two Labels pretty-print the same, but have - -- different uniques; so we compare their text versions... - dyld_stubs imps - | needImportedSymbols dflags arch os - = vcat $ - (pprGotDeclaration dflags arch os :) $ - map ( pprImportedSymbol dflags platform . fst . head) $ - groupBy (\(_,a) (_,b) -> a == b) $ - sortBy (\(_,a) (_,b) -> compare a b) $ - map doPpr $ - imps - | otherwise - = Outputable.empty - - doPpr lbl = (lbl, renderWithStyle - (initSDocContext dflags astyle) - (pprCLabel dflags lbl)) - astyle = mkCodeStyle AsmStyle - --- ----------------------------------------------------------------------------- --- Generate jump tables - --- Analyzes all native code and generates data sections for all jump --- table instructions. -generateJumpTables - :: NcgImpl statics instr jumpDest - -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr] -generateJumpTables ncgImpl xs = concatMap f xs - where f p@(CmmProc _ _ _ (ListGraph xs)) = p : concatMap g xs - f p = [p] - g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl) xs) - --- ----------------------------------------------------------------------------- --- Shortcut branches - -shortcutBranches - :: forall statics instr jumpDest. (Outputable jumpDest) => DynFlags - -> NcgImpl statics instr jumpDest - -> [NatCmmDecl statics instr] - -> Maybe CFG - -> ([NatCmmDecl statics instr],Maybe CFG) - -shortcutBranches dflags ncgImpl tops weights - | gopt Opt_AsmShortcutting dflags - = ( map (apply_mapping ncgImpl mapping) tops' - , shortcutWeightMap mappingBid <$!> weights ) - | otherwise - = (tops, weights) - where - (tops', mappings) = mapAndUnzip (build_mapping ncgImpl) tops - mapping = mapUnions mappings :: LabelMap jumpDest - mappingBid = fmap (getJumpDestBlockId ncgImpl) mapping - -build_mapping :: forall instr t d statics jumpDest. - NcgImpl statics instr jumpDest - -> GenCmmDecl d (LabelMap t) (ListGraph instr) - -> (GenCmmDecl d (LabelMap t) (ListGraph instr) - ,LabelMap jumpDest) -build_mapping _ top@(CmmData _ _) = (top, mapEmpty) -build_mapping _ (CmmProc info lbl live (ListGraph [])) - = (CmmProc info lbl live (ListGraph []), mapEmpty) -build_mapping ncgImpl (CmmProc info lbl live (ListGraph (head:blocks))) - = (CmmProc info lbl live (ListGraph (head:others)), mapping) - -- drop the shorted blocks, but don't ever drop the first one, - -- because it is pointed to by a global label. - where - -- find all the blocks that just consist of a jump that can be - -- shorted. - -- Don't completely eliminate loops here -- that can leave a dangling jump! - shortcut_blocks :: [(BlockId, jumpDest)] - (_, shortcut_blocks, others) = - foldl' split (setEmpty :: LabelSet, [], []) blocks - split (s, shortcut_blocks, others) b@(BasicBlock id [insn]) - | Just jd <- canShortcut ncgImpl insn - , Just dest <- getJumpDestBlockId ncgImpl jd - , not (has_info id) - , (setMember dest s) || dest == id -- loop checks - = (s, shortcut_blocks, b : others) - split (s, shortcut_blocks, others) (BasicBlock id [insn]) - | Just dest <- canShortcut ncgImpl insn - , not (has_info id) - = (setInsert id s, (id,dest) : shortcut_blocks, others) - split (s, shortcut_blocks, others) other = (s, shortcut_blocks, other : others) - - -- do not eliminate blocks that have an info table - has_info l = mapMember l info - - -- build a mapping from BlockId to JumpDest for shorting branches - mapping = mapFromList shortcut_blocks - -apply_mapping :: NcgImpl statics instr jumpDest - -> LabelMap jumpDest - -> GenCmmDecl statics h (ListGraph instr) - -> GenCmmDecl statics h (ListGraph instr) -apply_mapping ncgImpl ufm (CmmData sec statics) - = CmmData sec (shortcutStatics ncgImpl (\bid -> mapLookup bid ufm) statics) -apply_mapping ncgImpl ufm (CmmProc info lbl live (ListGraph blocks)) - = CmmProc info lbl live (ListGraph $ map short_bb blocks) - where - short_bb (BasicBlock id insns) = BasicBlock id $! map short_insn insns - short_insn i = shortcutJump ncgImpl (\bid -> mapLookup bid ufm) i - -- shortcutJump should apply the mapping repeatedly, - -- just in case we can short multiple branches. - --- ----------------------------------------------------------------------------- --- Instruction selection - --- Native code instruction selection for a chunk of stix code. For --- this part of the computation, we switch from the UniqSM monad to --- the NatM monad. The latter carries not only a Unique, but also an --- Int denoting the current C stack pointer offset in the generated --- code; this is needed for creating correct spill offsets on --- architectures which don't offer, or for which it would be --- prohibitively expensive to employ, a frame pointer register. Viz, --- x86. - --- The offset is measured in bytes, and indicates the difference --- between the current (simulated) C stack-ptr and the value it was at --- the beginning of the block. For stacks which grow down, this value --- should be either zero or negative. - --- Along with the stack pointer offset, we also carry along a LabelMap of --- DebugBlocks, which we read to generate .location directives. --- --- Switching between the two monads whilst carrying along the same --- Unique supply breaks abstraction. Is that bad? - -genMachCode - :: DynFlags - -> Module -> ModLocation - -> (RawCmmDecl -> NatM [NatCmmDecl statics instr]) - -> DwarfFiles - -> LabelMap DebugBlock - -> RawCmmDecl - -> CFG - -> UniqSM - ( [NatCmmDecl statics instr] - , [CLabel] - , DwarfFiles - , CFG - ) - -genMachCode dflags this_mod modLoc cmmTopCodeGen fileIds dbgMap cmm_top cmm_cfg - = do { initial_us <- getUniqueSupplyM - ; let initial_st = mkNatM_State initial_us 0 dflags this_mod - modLoc fileIds dbgMap cmm_cfg - (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top) - final_delta = natm_delta final_st - final_imports = natm_imports final_st - final_cfg = natm_cfg final_st - ; if final_delta == 0 - then return (new_tops, final_imports - , natm_fileid final_st, final_cfg) - else pprPanic "genMachCode: nonzero final delta" (int final_delta) - } - --- ----------------------------------------------------------------------------- --- Generic Cmm optimiser - -{- -Here we do: - - (a) Constant folding - (c) Position independent code and dynamic linking - (i) introduce the appropriate indirections - and position independent refs - (ii) compile a list of imported symbols - (d) Some arch-specific optimizations - -(a) will be moving to the new Hoopl pipeline, however, (c) and -(d) are only needed by the native backend and will continue to live -here. - -Ideas for other things we could do (put these in Hoopl please!): - - - shortcut jumps-to-jumps - - simple CSE: if an expr is assigned to a temp, then replace later occs of - that expr with the temp, until the expr is no longer valid (can push through - temp assignments, and certain assigns to mem...) --} - -cmmToCmm :: DynFlags -> Module -> RawCmmDecl -> (RawCmmDecl, [CLabel]) -cmmToCmm _ _ top@(CmmData _ _) = (top, []) -cmmToCmm dflags this_mod (CmmProc info lbl live graph) - = runCmmOpt dflags this_mod $ - do blocks' <- mapM cmmBlockConFold (toBlockList graph) - return $ CmmProc info lbl live (ofBlockList (g_entry graph) blocks') - --- Avoids using unboxed tuples when loading into GHCi -#if !defined(GHC_LOADED_INTO_GHCI) - -type OptMResult a = (# a, [CLabel] #) - -pattern OptMResult :: a -> b -> (# a, b #) -pattern OptMResult x y = (# x, y #) -{-# COMPLETE OptMResult #-} -#else - -data OptMResult a = OptMResult !a ![CLabel] deriving (Functor) -#endif - -newtype CmmOptM a = CmmOptM (DynFlags -> Module -> [CLabel] -> OptMResult a) - deriving (Functor) - -instance Applicative CmmOptM where - pure x = CmmOptM $ \_ _ imports -> OptMResult x imports - (<*>) = ap - -instance Monad CmmOptM where - (CmmOptM f) >>= g = - CmmOptM $ \dflags this_mod imports0 -> - case f dflags this_mod imports0 of - OptMResult x imports1 -> - case g x of - CmmOptM g' -> g' dflags this_mod imports1 - -instance CmmMakeDynamicReferenceM CmmOptM where - addImport = addImportCmmOpt - getThisModule = CmmOptM $ \_ this_mod imports -> OptMResult this_mod imports - -addImportCmmOpt :: CLabel -> CmmOptM () -addImportCmmOpt lbl = CmmOptM $ \_ _ imports -> OptMResult () (lbl:imports) - -instance HasDynFlags CmmOptM where - getDynFlags = CmmOptM $ \dflags _ imports -> OptMResult dflags imports - -runCmmOpt :: DynFlags -> Module -> CmmOptM a -> (a, [CLabel]) -runCmmOpt dflags this_mod (CmmOptM f) = - case f dflags this_mod [] of - OptMResult result imports -> (result, imports) - -cmmBlockConFold :: CmmBlock -> CmmOptM CmmBlock -cmmBlockConFold block = do - let (entry, middle, last) = blockSplit block - stmts = blockToList middle - stmts' <- mapM cmmStmtConFold stmts - last' <- cmmStmtConFold last - return $ blockJoin entry (blockFromList stmts') last' - --- This does three optimizations, but they're very quick to check, so we don't --- bother turning them off even when the Hoopl code is active. Since --- this is on the old Cmm representation, we can't reuse the code either: --- * reg = reg --> nop --- * if 0 then jump --> nop --- * if 1 then jump --> jump --- We might be tempted to skip this step entirely of not Opt_PIC, but --- there is some PowerPC code for the non-PIC case, which would also --- have to be separated. -cmmStmtConFold :: CmmNode e x -> CmmOptM (CmmNode e x) -cmmStmtConFold stmt - = case stmt of - CmmAssign reg src - -> do src' <- cmmExprConFold DataReference src - return $ case src' of - CmmReg reg' | reg == reg' -> CmmComment (fsLit "nop") - new_src -> CmmAssign reg new_src - - CmmStore addr src - -> do addr' <- cmmExprConFold DataReference addr - src' <- cmmExprConFold DataReference src - return $ CmmStore addr' src' - - CmmCall { cml_target = addr } - -> do addr' <- cmmExprConFold JumpReference addr - return $ stmt { cml_target = addr' } - - CmmUnsafeForeignCall target regs args - -> do target' <- case target of - ForeignTarget e conv -> do - e' <- cmmExprConFold CallReference e - return $ ForeignTarget e' conv - PrimTarget _ -> - return target - args' <- mapM (cmmExprConFold DataReference) args - return $ CmmUnsafeForeignCall target' regs args' - - CmmCondBranch test true false likely - -> do test' <- cmmExprConFold DataReference test - return $ case test' of - CmmLit (CmmInt 0 _) -> CmmBranch false - CmmLit (CmmInt _ _) -> CmmBranch true - _other -> CmmCondBranch test' true false likely - - CmmSwitch expr ids - -> do expr' <- cmmExprConFold DataReference expr - return $ CmmSwitch expr' ids - - other - -> return other - -cmmExprConFold :: ReferenceKind -> CmmExpr -> CmmOptM CmmExpr -cmmExprConFold referenceKind expr = do - dflags <- getDynFlags - - -- With -O1 and greater, the cmmSink pass does constant-folding, so - -- we don't need to do it again here. - let expr' = if optLevel dflags >= 1 - then expr - else cmmExprCon dflags expr - - cmmExprNative referenceKind expr' - -cmmExprCon :: DynFlags -> CmmExpr -> CmmExpr -cmmExprCon dflags (CmmLoad addr rep) = CmmLoad (cmmExprCon dflags addr) rep -cmmExprCon dflags (CmmMachOp mop args) - = cmmMachOpFold dflags mop (map (cmmExprCon dflags) args) -cmmExprCon _ other = other - --- handles both PIC and non-PIC cases... a very strange mixture --- of things to do. -cmmExprNative :: ReferenceKind -> CmmExpr -> CmmOptM CmmExpr -cmmExprNative referenceKind expr = do - dflags <- getDynFlags - let platform = targetPlatform dflags - arch = platformArch platform - case expr of - CmmLoad addr rep - -> do addr' <- cmmExprNative DataReference addr - return $ CmmLoad addr' rep - - CmmMachOp mop args - -> do args' <- mapM (cmmExprNative DataReference) args - return $ CmmMachOp mop args' - - CmmLit (CmmBlock id) - -> cmmExprNative referenceKind (CmmLit (CmmLabel (infoTblLbl id))) - -- we must convert block Ids to CLabels here, because we - -- might have to do the PIC transformation. Hence we must - -- not modify BlockIds beyond this point. - - CmmLit (CmmLabel lbl) - -> do - cmmMakeDynamicReference dflags referenceKind lbl - CmmLit (CmmLabelOff lbl off) - -> do - dynRef <- cmmMakeDynamicReference dflags referenceKind lbl - -- need to optimize here, since it's late - return $ cmmMachOpFold dflags (MO_Add (wordWidth dflags)) [ - dynRef, - (CmmLit $ CmmInt (fromIntegral off) (wordWidth dflags)) - ] - - -- On powerpc (non-PIC), it's easier to jump directly to a label than - -- to use the register table, so we replace these registers - -- with the corresponding labels: - CmmReg (CmmGlobal EagerBlackholeInfo) - | arch == ArchPPC && not (positionIndependent dflags) - -> cmmExprNative referenceKind $ - CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit "__stg_EAGER_BLACKHOLE_info"))) - CmmReg (CmmGlobal GCEnter1) - | arch == ArchPPC && not (positionIndependent dflags) - -> cmmExprNative referenceKind $ - CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit "__stg_gc_enter_1"))) - CmmReg (CmmGlobal GCFun) - | arch == ArchPPC && not (positionIndependent dflags) - -> cmmExprNative referenceKind $ - CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit "__stg_gc_fun"))) - - other - -> return other diff --git a/compiler/nativeGen/BlockLayout.hs b/compiler/nativeGen/BlockLayout.hs deleted file mode 100644 index e488f0908f..0000000000 --- a/compiler/nativeGen/BlockLayout.hs +++ /dev/null @@ -1,895 +0,0 @@ --- --- Copyright (c) 2018 Andreas Klebinger --- - -{-# LANGUAGE CPP #-} -{-# LANGUAGE DataKinds #-} -{-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TypeFamilies #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} -{-# LANGUAGE FlexibleContexts #-} - -module BlockLayout - ( sequenceTop ) -where - -#include "HsVersions.h" -import GhcPrelude - -import Instruction -import NCGMonad -import CFG - -import GHC.Cmm.BlockId -import GHC.Cmm -import GHC.Cmm.Dataflow.Collections -import GHC.Cmm.Dataflow.Label - -import GHC.Driver.Session (gopt, GeneralFlag(..), DynFlags, backendMaintainsCfg) -import UniqFM -import Util -import Unique - -import Digraph -import Outputable -import Maybes - --- DEBUGGING ONLY ---import GHC.Cmm.DebugBlock ---import Debug.Trace -import ListSetOps (removeDups) - -import OrdList -import Data.List -import Data.Foldable (toList) - -import qualified Data.Set as Set -import Data.STRef -import Control.Monad.ST.Strict -import Control.Monad (foldM) - -{- - Note [CFG based code layout] - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - The major steps in placing blocks are as follow: - * Compute a CFG based on the Cmm AST, see getCfgProc. - This CFG will have edge weights representing a guess - on how important they are. - * After we convert Cmm to Asm we run `optimizeCFG` which - adds a few more "educated guesses" to the equation. - * Then we run loop analysis on the CFG (`loopInfo`) which tells us - about loop headers, loop nesting levels and the sort. - * Based on the CFG and loop information refine the edge weights - in the CFG and normalize them relative to the most often visited - node. (See `mkGlobalWeights`) - * Feed this CFG into the block layout code (`sequenceTop`) in this - module. Which will then produce a code layout based on the input weights. - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ~~~ Note [Chain based CFG serialization] - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - For additional information also look at - https://gitlab.haskell.org/ghc/ghc/wikis/commentary/compiler/code-layout - - We have a CFG with edge weights based on which we try to place blocks next to - each other. - - Edge weights not only represent likelihood of control transfer between blocks - but also how much a block would benefit from being placed sequentially after - it's predecessor. - For example blocks which are preceded by an info table are more likely to end - up in a different cache line than their predecessor and we can't eliminate the jump - so there is less benefit to placing them sequentially. - - For example consider this example: - - A: ... - jmp cond D (weak successor) - jmp B - B: ... - jmp C - C: ... - jmp X - D: ... - jmp B (weak successor) - - We determine a block layout by building up chunks (calling them chains) of - possible control flows for which blocks will be placed sequentially. - - Eg for our example we might end up with two chains like: - [A->B->C->X],[D]. Blocks inside chains will always be placed sequentially. - However there is no particular order in which chains are placed since - (hopefully) the blocks for which sequentiality is important have already - been placed in the same chain. - - ----------------------------------------------------------------------------- - 1) First try to create a list of good chains. - ----------------------------------------------------------------------------- - - Good chains are these which allow us to eliminate jump instructions. - Which further eliminate often executed jumps first. - - We do so by: - - *) Ignore edges which represent instructions which can not be replaced - by fall through control flow. Primarily calls and edges to blocks which - are prefixed by a info table we have to jump across. - - *) Then process remaining edges in order of frequency taken and: - - +) If source and target have not been placed build a new chain from them. - - +) If source and target have been placed, and are ends of differing chains - try to merge the two chains. - - +) If one side of the edge is a end/front of a chain, add the other block of - to edge to the same chain - - Eg if we look at edge (B -> C) and already have the chain (A -> B) - then we extend the chain to (A -> B -> C). - - +) If the edge was used to modify or build a new chain remove the edge from - our working list. - - *) If there any blocks not being placed into a chain after these steps we place - them into a chain consisting of only this block. - - Ranking edges by their taken frequency, if - two edges compete for fall through on the same target block, the one taken - more often will automatically win out. Resulting in fewer instructions being - executed. - - Creating singleton chains is required for situations where we have code of the - form: - - A: goto B: - <infoTable> - B: goto C: - <infoTable> - C: ... - - As the code in block B is only connected to the rest of the program via edges - which will be ignored in this step we make sure that B still ends up in a chain - this way. - - ----------------------------------------------------------------------------- - 2) We also try to fuse chains. - ----------------------------------------------------------------------------- - - As a result from the above step we still end up with multiple chains which - represent sequential control flow chunks. But they are not yet suitable for - code layout as we need to place *all* blocks into a single sequence. - - In this step we combine chains result from the above step via these steps: - - *) Look at the ranked list of *all* edges, including calls/jumps across info tables - and the like. - - *) Look at each edge and - - +) Given an edge (A -> B) try to find two chains for which - * Block A is at the end of one chain - * Block B is at the front of the other chain. - +) If we find such a chain we "fuse" them into a single chain, remove the - edge from working set and continue. - +) If we can't find such chains we skip the edge and continue. - - ----------------------------------------------------------------------------- - 3) Place indirect successors (neighbours) after each other - ----------------------------------------------------------------------------- - - We might have chains [A,B,C,X],[E] in a CFG of the sort: - - A ---> B ---> C --------> X(exit) - \- ->E- -/ - - While E does not follow X it's still beneficial to place them near each other. - This can be advantageous if eg C,X,E will end up in the same cache line. - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ~~~ Note [Triangle Control Flow] - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - Checking if an argument is already evaluated leads to a somewhat - special case which looks like this: - - A: - if (R1 & 7 != 0) goto Leval; else goto Lwork; - Leval: // global - call (I64[R1])(R1) returns to Lwork, args: 8, res: 8, upd: 8; - Lwork: // global - ... - - A - |\ - | Leval - |/ - (This edge can be missing because of optimizations) - Lwork - - Once we hit the metal the call instruction is just 2-3 bytes large - depending on the register used. So we lay out the assembly like this: - - movq %rbx,%rax - andl $7,%eax - cmpq $1,%rax - jne Lwork - Leval: - jmp *(%rbx) # encoded in 2-3 bytes. - <info table> - Lwork: - ... - - We could explicitly check for this control flow pattern. - - This is advantageous because: - * It's optimal if the argument isn't evaluated. - * If it's evaluated we only have the extra cost of jumping over - the 2-3 bytes for the call. - * Guarantees the smaller encoding for the conditional jump. - - However given that Lwork usually has an info table we - penalize this edge. So Leval should get placed first - either way and things work out for the best. - - Optimizing for the evaluated case instead would penalize - the other code path. It adds an jump as we can't fall through - to Lwork because of the info table. - Assuming that Lwork is large the chance that the "call" ends up - in the same cache line is also fairly small. - --} - - --- | Look at X number of blocks in two chains to determine --- if they are "neighbours". -neighbourOverlapp :: Int -neighbourOverlapp = 2 - --- | Maps blocks near the end of a chain to it's chain AND --- the other blocks near the end. --- [A,B,C,D,E] Gives entries like (B -> ([A,B], [A,B,C,D,E])) --- where [A,B] are blocks in the end region of a chain. --- This is cheaper then recomputing the ends multiple times. -type FrontierMap = LabelMap ([BlockId],BlockChain) - --- | A non empty ordered sequence of basic blocks. --- It is suitable for serialization in this order. --- --- We use OrdList instead of [] to allow fast append on both sides --- when combining chains. -newtype BlockChain - = BlockChain { chainBlocks :: (OrdList BlockId) } - --- All chains are constructed the same way so comparison --- including structure is faster. -instance Eq BlockChain where - BlockChain b1 == BlockChain b2 = strictlyEqOL b1 b2 - --- Useful for things like sets and debugging purposes, sorts by blocks --- in the chain. -instance Ord (BlockChain) where - (BlockChain lbls1) `compare` (BlockChain lbls2) - = ASSERT(toList lbls1 /= toList lbls2 || lbls1 `strictlyEqOL` lbls2) - strictlyOrdOL lbls1 lbls2 - -instance Outputable (BlockChain) where - ppr (BlockChain blks) = - parens (text "Chain:" <+> ppr (fromOL $ blks) ) - -chainFoldl :: (b -> BlockId -> b) -> b -> BlockChain -> b -chainFoldl f z (BlockChain blocks) = foldl' f z blocks - -noDups :: [BlockChain] -> Bool -noDups chains = - let chainBlocks = concatMap chainToBlocks chains :: [BlockId] - (_blocks, dups) = removeDups compare chainBlocks - in if null dups then True - else pprTrace "Duplicates:" (ppr (map toList dups) $$ text "chains" <+> ppr chains ) False - -inFront :: BlockId -> BlockChain -> Bool -inFront bid (BlockChain seq) - = headOL seq == bid - -chainSingleton :: BlockId -> BlockChain -chainSingleton lbl - = BlockChain (unitOL lbl) - -chainFromList :: [BlockId] -> BlockChain -chainFromList = BlockChain . toOL - -chainSnoc :: BlockChain -> BlockId -> BlockChain -chainSnoc (BlockChain blks) lbl - = BlockChain (blks `snocOL` lbl) - -chainCons :: BlockId -> BlockChain -> BlockChain -chainCons lbl (BlockChain blks) - = BlockChain (lbl `consOL` blks) - -chainConcat :: BlockChain -> BlockChain -> BlockChain -chainConcat (BlockChain blks1) (BlockChain blks2) - = BlockChain (blks1 `appOL` blks2) - -chainToBlocks :: BlockChain -> [BlockId] -chainToBlocks (BlockChain blks) = fromOL blks - --- | Given the Chain A -> B -> C -> D and we break at C --- we get the two Chains (A -> B, C -> D) as result. -breakChainAt :: BlockId -> BlockChain - -> (BlockChain,BlockChain) -breakChainAt bid (BlockChain blks) - | not (bid == head rblks) - = panic "Block not in chain" - | otherwise - = (BlockChain (toOL lblks), - BlockChain (toOL rblks)) - where - (lblks, rblks) = break (\lbl -> lbl == bid) (fromOL blks) - -takeR :: Int -> BlockChain -> [BlockId] -takeR n (BlockChain blks) = - take n . fromOLReverse $ blks - -takeL :: Int -> BlockChain -> [BlockId] -takeL n (BlockChain blks) = - take n . fromOL $ blks - --- Note [Combining neighborhood chains] --- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - --- See also Note [Chain based CFG serialization] --- We have the chains (A-B-C-D) and (E-F) and an Edge C->E. --- --- While placing the latter after the former doesn't result in sequential --- control flow it is still beneficial. As block C and E might end --- up in the same cache line. --- --- So we place these chains next to each other even if we can't fuse them. --- --- A -> B -> C -> D --- v --- - -> E -> F ... --- --- A simple heuristic to chose which chains we want to combine: --- * Process edges in descending priority. --- * Check if there is a edge near the end of one chain which goes --- to a block near the start of another edge. --- --- While we could take into account the space between the two blocks which --- share an edge this blows up compile times quite a bit. It requires --- us to find all edges between two chains, check the distance for all edges, --- rank them based on the distance and only then we can select two chains --- to combine. Which would add a lot of complexity for little gain. --- --- So instead we just rank by the strength of the edge and use the first pair we --- find. - --- | For a given list of chains and edges try to combine chains with strong --- edges between them. -combineNeighbourhood :: [CfgEdge] -- ^ Edges to consider - -> [BlockChain] -- ^ Current chains of blocks - -> ([BlockChain], Set.Set (BlockId,BlockId)) - -- ^ Resulting list of block chains, and a set of edges which - -- were used to fuse chains and as such no longer need to be - -- considered. -combineNeighbourhood edges chains - = -- pprTraceIt "Neighbours" $ - -- pprTrace "combineNeighbours" (ppr edges) $ - applyEdges edges endFrontier startFrontier (Set.empty) - where - --Build maps from chain ends to chains - endFrontier, startFrontier :: FrontierMap - endFrontier = - mapFromList $ concatMap (\chain -> - let ends = getEnds chain :: [BlockId] - entry = (ends,chain) - in map (\x -> (x,entry)) ends ) chains - startFrontier = - mapFromList $ concatMap (\chain -> - let front = getFronts chain - entry = (front,chain) - in map (\x -> (x,entry)) front) chains - applyEdges :: [CfgEdge] -> FrontierMap -> FrontierMap -> Set.Set (BlockId, BlockId) - -> ([BlockChain], Set.Set (BlockId,BlockId)) - applyEdges [] chainEnds _chainFronts combined = - (ordNub $ map snd $ mapElems chainEnds, combined) - applyEdges ((CfgEdge from to _w):edges) chainEnds chainFronts combined - | Just (c1_e,c1) <- mapLookup from chainEnds - , Just (c2_f,c2) <- mapLookup to chainFronts - , c1 /= c2 -- Avoid trying to concat a chain with itself. - = let newChain = chainConcat c1 c2 - newChainFrontier = getFronts newChain - newChainEnds = getEnds newChain - newFronts :: FrontierMap - newFronts = - let withoutOld = - foldl' (\m b -> mapDelete b m :: FrontierMap) chainFronts (c2_f ++ getFronts c1) - entry = - (newChainFrontier,newChain) --let bound to ensure sharing - in foldl' (\m x -> mapInsert x entry m) - withoutOld newChainFrontier - - newEnds = - let withoutOld = foldl' (\m b -> mapDelete b m) chainEnds (c1_e ++ getEnds c2) - entry = (newChainEnds,newChain) --let bound to ensure sharing - in foldl' (\m x -> mapInsert x entry m) - withoutOld newChainEnds - in - -- pprTrace "ApplyEdges" - -- (text "before" $$ - -- text "fronts" <+> ppr chainFronts $$ - -- text "ends" <+> ppr chainEnds $$ - - -- text "various" $$ - -- text "newChain" <+> ppr newChain $$ - -- text "newChainFrontier" <+> ppr newChainFrontier $$ - -- text "newChainEnds" <+> ppr newChainEnds $$ - -- text "drop" <+> ppr ((c2_f ++ getFronts c1) ++ (c1_e ++ getEnds c2)) $$ - - -- text "after" $$ - -- text "fronts" <+> ppr newFronts $$ - -- text "ends" <+> ppr newEnds - -- ) - applyEdges edges newEnds newFronts (Set.insert (from,to) combined) - | otherwise - = applyEdges edges chainEnds chainFronts combined - where - - getFronts chain = takeL neighbourOverlapp chain - getEnds chain = takeR neighbourOverlapp chain - --- In the last stop we combine all chains into a single one. --- Trying to place chains with strong edges next to each other. -mergeChains :: [CfgEdge] -> [BlockChain] - -> (BlockChain) -mergeChains edges chains - = -- pprTrace "combine" (ppr edges) $ - runST $ do - let addChain m0 chain = do - ref <- newSTRef chain - return $ chainFoldl (\m' b -> mapInsert b ref m') m0 chain - chainMap' <- foldM (\m0 c -> addChain m0 c) mapEmpty chains - merge edges chainMap' - where - -- We keep a map from ALL blocks to their respective chain (sigh) - -- This is required since when looking at an edge we need to find - -- the associated chains quickly. - -- We use a map of STRefs, maintaining a invariant of one STRef per chain. - -- When merging chains we can update the - -- STRef of one chain once (instead of writing to the map for each block). - -- We then overwrite the STRefs for the other chain so there is again only - -- a single STRef for the combined chain. - -- The difference in terms of allocations saved is ~0.2% with -O so actually - -- significant compared to using a regular map. - - merge :: forall s. [CfgEdge] -> LabelMap (STRef s BlockChain) -> ST s BlockChain - merge [] chains = do - chains' <- ordNub <$> (mapM readSTRef $ mapElems chains) :: ST s [BlockChain] - return $ foldl' chainConcat (head chains') (tail chains') - merge ((CfgEdge from to _):edges) chains - -- | pprTrace "merge" (ppr (from,to) <> ppr chains) False - -- = undefined - | cFrom == cTo - = merge edges chains - | otherwise - = do - chains' <- mergeComb cFrom cTo - merge edges chains' - where - mergeComb :: STRef s BlockChain -> STRef s BlockChain -> ST s (LabelMap (STRef s BlockChain)) - mergeComb refFrom refTo = do - cRight <- readSTRef refTo - chain <- pure chainConcat <*> readSTRef refFrom <*> pure cRight - writeSTRef refFrom chain - return $ chainFoldl (\m b -> mapInsert b refFrom m) chains cRight - - cFrom = expectJust "mergeChains:chainMap:from" $ mapLookup from chains - cTo = expectJust "mergeChains:chainMap:to" $ mapLookup to chains - - --- See Note [Chain based CFG serialization] for the general idea. --- This creates and fuses chains at the same time for performance reasons. - --- Try to build chains from a list of edges. --- Edges must be sorted **descending** by their priority. --- Returns the constructed chains, along with all edges which --- are irrelevant past this point, this information doesn't need --- to be complete - it's only used to speed up the process. --- An Edge is irrelevant if the ends are part of the same chain. --- We say these edges are already linked -buildChains :: [CfgEdge] -> [BlockId] - -> ( LabelMap BlockChain -- Resulting chains, indexd by end if chain. - , Set.Set (BlockId, BlockId)) --List of fused edges. -buildChains edges blocks - = runST $ buildNext setEmpty mapEmpty mapEmpty edges Set.empty - where - -- buildNext builds up chains from edges one at a time. - - -- We keep a map from the ends of chains to the chains. - -- This we we can easily check if an block should be appended to an - -- existing chain! - -- We store them using STRefs so we don't have to rebuild the spine of both - -- maps every time we update a chain. - buildNext :: forall s. LabelSet - -> LabelMap (STRef s BlockChain) -- Map from end of chain to chain. - -> LabelMap (STRef s BlockChain) -- Map from start of chain to chain. - -> [CfgEdge] -- Edges to check - ordered by decreasing weight - -> Set.Set (BlockId, BlockId) -- Used edges - -> ST s ( LabelMap BlockChain -- Chains by end - , Set.Set (BlockId, BlockId) --List of fused edges - ) - buildNext placed _chainStarts chainEnds [] linked = do - ends' <- sequence $ mapMap readSTRef chainEnds :: ST s (LabelMap BlockChain) - -- Any remaining blocks have to be made to singleton chains. - -- They might be combined with other chains later on outside this function. - let unplaced = filter (\x -> not (setMember x placed)) blocks - singletons = map (\x -> (x,chainSingleton x)) unplaced :: [(BlockId,BlockChain)] - return (foldl' (\m (k,v) -> mapInsert k v m) ends' singletons , linked) - buildNext placed chainStarts chainEnds (edge:todo) linked - | from == to - -- We skip self edges - = buildNext placed chainStarts chainEnds todo (Set.insert (from,to) linked) - | not (alreadyPlaced from) && - not (alreadyPlaced to) - = do - --pprTraceM "Edge-Chain:" (ppr edge) - chain' <- newSTRef $ chainFromList [from,to] - buildNext - (setInsert to (setInsert from placed)) - (mapInsert from chain' chainStarts) - (mapInsert to chain' chainEnds) - todo - (Set.insert (from,to) linked) - - | (alreadyPlaced from) && - (alreadyPlaced to) - , Just predChain <- mapLookup from chainEnds - , Just succChain <- mapLookup to chainStarts - , predChain /= succChain -- Otherwise we try to create a cycle. - = do - -- pprTraceM "Fusing edge" (ppr edge) - fuseChain predChain succChain - - | (alreadyPlaced from) && - (alreadyPlaced to) - = --pprTraceM "Skipping:" (ppr edge) >> - buildNext placed chainStarts chainEnds todo linked - - | otherwise - = do -- pprTraceM "Finding chain for:" (ppr edge $$ - -- text "placed" <+> ppr placed) - findChain - where - from = edgeFrom edge - to = edgeTo edge - alreadyPlaced blkId = (setMember blkId placed) - - -- Combine two chains into a single one. - fuseChain :: STRef s BlockChain -> STRef s BlockChain - -> ST s ( LabelMap BlockChain -- Chains by end - , Set.Set (BlockId, BlockId) --List of fused edges - ) - fuseChain fromRef toRef = do - fromChain <- readSTRef fromRef - toChain <- readSTRef toRef - let newChain = chainConcat fromChain toChain - ref <- newSTRef newChain - let start = head $ takeL 1 newChain - let end = head $ takeR 1 newChain - -- chains <- sequence $ mapMap readSTRef chainStarts - -- pprTraceM "pre-fuse chains:" $ ppr chains - buildNext - placed - (mapInsert start ref $ mapDelete to $ chainStarts) - (mapInsert end ref $ mapDelete from $ chainEnds) - todo - (Set.insert (from,to) linked) - - - --Add the block to a existing chain or creates a new chain - findChain :: ST s ( LabelMap BlockChain -- Chains by end - , Set.Set (BlockId, BlockId) --List of fused edges - ) - findChain - -- We can attach the block to the end of a chain - | alreadyPlaced from - , Just predChain <- mapLookup from chainEnds - = do - chain <- readSTRef predChain - let newChain = chainSnoc chain to - writeSTRef predChain newChain - let chainEnds' = mapInsert to predChain $ mapDelete from chainEnds - -- chains <- sequence $ mapMap readSTRef chainStarts - -- pprTraceM "from chains:" $ ppr chains - buildNext (setInsert to placed) chainStarts chainEnds' todo (Set.insert (from,to) linked) - -- We can attack it to the front of a chain - | alreadyPlaced to - , Just succChain <- mapLookup to chainStarts - = do - chain <- readSTRef succChain - let newChain = from `chainCons` chain - writeSTRef succChain newChain - let chainStarts' = mapInsert from succChain $ mapDelete to chainStarts - -- chains <- sequence $ mapMap readSTRef chainStarts' - -- pprTraceM "to chains:" $ ppr chains - buildNext (setInsert from placed) chainStarts' chainEnds todo (Set.insert (from,to) linked) - -- The placed end of the edge is part of a chain already and not an end. - | otherwise - = do - let block = if alreadyPlaced to then from else to - --pprTraceM "Singleton" $ ppr block - let newChain = chainSingleton block - ref <- newSTRef newChain - buildNext (setInsert block placed) (mapInsert block ref chainStarts) - (mapInsert block ref chainEnds) todo (linked) - where - alreadyPlaced blkId = (setMember blkId placed) - --- | Place basic blocks based on the given CFG. --- See Note [Chain based CFG serialization] -sequenceChain :: forall a i. (Instruction i, Outputable i) - => LabelMap a -- ^ Keys indicate an info table on the block. - -> CFG -- ^ Control flow graph and some meta data. - -> [GenBasicBlock i] -- ^ List of basic blocks to be placed. - -> [GenBasicBlock i] -- ^ Blocks placed in sequence. -sequenceChain _info _weights [] = [] -sequenceChain _info _weights [x] = [x] -sequenceChain info weights' blocks@((BasicBlock entry _):_) = - let weights :: CFG - weights = --pprTrace "cfg'" (pprEdgeWeights cfg') - cfg' - where - (_, globalEdgeWeights) = {-# SCC mkGlobalWeights #-} mkGlobalWeights entry weights' - cfg' = {-# SCC rewriteEdges #-} - mapFoldlWithKey - (\cfg from m -> - mapFoldlWithKey - (\cfg to w -> setEdgeWeight cfg (EdgeWeight w) from to ) - cfg m ) - weights' - globalEdgeWeights - - directEdges :: [CfgEdge] - directEdges = sortBy (flip compare) $ catMaybes . map relevantWeight $ (infoEdgeList weights) - where - relevantWeight :: CfgEdge -> Maybe CfgEdge - relevantWeight edge@(CfgEdge from to edgeInfo) - | (EdgeInfo CmmSource { trans_cmmNode = CmmCall {} } _) <- edgeInfo - -- Ignore edges across calls - = Nothing - | mapMember to info - , w <- edgeWeight edgeInfo - -- The payoff is small if we jump over an info table - = Just (CfgEdge from to edgeInfo { edgeWeight = w/8 }) - | otherwise - = Just edge - - blockMap :: LabelMap (GenBasicBlock i) - blockMap - = foldl' (\m blk@(BasicBlock lbl _ins) -> - mapInsert lbl blk m) - mapEmpty blocks - - (builtChains, builtEdges) - = {-# SCC "buildChains" #-} - --pprTraceIt "generatedChains" $ - --pprTrace "blocks" (ppr (mapKeys blockMap)) $ - buildChains directEdges (mapKeys blockMap) - - rankedEdges :: [CfgEdge] - -- Sort descending by weight, remove fused edges - rankedEdges = - filter (\edge -> not (Set.member (edgeFrom edge,edgeTo edge) builtEdges)) $ - directEdges - - (neighbourChains, combined) - = ASSERT(noDups $ mapElems builtChains) - {-# SCC "groupNeighbourChains" #-} - -- pprTraceIt "NeighbourChains" $ - combineNeighbourhood rankedEdges (mapElems builtChains) - - - allEdges :: [CfgEdge] - allEdges = {-# SCC allEdges #-} - sortOn (relevantWeight) $ filter (not . deadEdge) $ (infoEdgeList weights) - where - deadEdge :: CfgEdge -> Bool - deadEdge (CfgEdge from to _) = let e = (from,to) in Set.member e combined || Set.member e builtEdges - relevantWeight :: CfgEdge -> EdgeWeight - relevantWeight (CfgEdge _ _ edgeInfo) - | EdgeInfo (CmmSource { trans_cmmNode = CmmCall {}}) _ <- edgeInfo - -- Penalize edges across calls - = weight/(64.0) - | otherwise - = weight - where - -- negate to sort descending - weight = negate (edgeWeight edgeInfo) - - masterChain = - {-# SCC "mergeChains" #-} - -- pprTraceIt "MergedChains" $ - mergeChains allEdges neighbourChains - - --Make sure the first block stays first - prepedChains - | inFront entry masterChain - = [masterChain] - | (rest,entry) <- breakChainAt entry masterChain - = [entry,rest] -#if __GLASGOW_HASKELL__ <= 810 - | otherwise = pprPanic "Entry point eliminated" $ - ppr masterChain -#endif - - blockList - = ASSERT(noDups [masterChain]) - (concatMap fromOL $ map chainBlocks prepedChains) - - --chainPlaced = setFromList $ map blockId blockList :: LabelSet - chainPlaced = setFromList $ blockList :: LabelSet - unplaced = - let blocks = mapKeys blockMap - isPlaced b = setMember (b) chainPlaced - in filter (\block -> not (isPlaced block)) blocks - - placedBlocks = - -- We want debug builds to catch this as it's a good indicator for - -- issues with CFG invariants. But we don't want to blow up production - -- builds if something slips through. - ASSERT(null unplaced) - --pprTraceIt "placedBlocks" $ - -- ++ [] is stil kinda expensive - if null unplaced then blockList else blockList ++ unplaced - getBlock bid = expectJust "Block placement" $ mapLookup bid blockMap - in - --Assert we placed all blocks given as input - ASSERT(all (\bid -> mapMember bid blockMap) placedBlocks) - dropJumps info $ map getBlock placedBlocks - -{-# SCC dropJumps #-} --- | Remove redundant jumps between blocks when we can rely on --- fall through. -dropJumps :: forall a i. Instruction i => LabelMap a -> [GenBasicBlock i] - -> [GenBasicBlock i] -dropJumps _ [] = [] -dropJumps info ((BasicBlock lbl ins):todo) - | not . null $ ins --This can happen because of shortcutting - , [dest] <- jumpDestsOfInstr (last ins) - , ((BasicBlock nextLbl _) : _) <- todo - , not (mapMember dest info) - , nextLbl == dest - = BasicBlock lbl (init ins) : dropJumps info todo - | otherwise - = BasicBlock lbl ins : dropJumps info todo - - --- ----------------------------------------------------------------------------- --- Sequencing the basic blocks - --- Cmm BasicBlocks are self-contained entities: they always end in a --- jump, either non-local or to another basic block in the same proc. --- In this phase, we attempt to place the basic blocks in a sequence --- such that as many of the local jumps as possible turn into --- fallthroughs. - -sequenceTop - :: (Instruction instr, Outputable instr) - => DynFlags -- Determine which layout algo to use - -> NcgImpl statics instr jumpDest - -> Maybe CFG -- ^ CFG if we have one. - -> NatCmmDecl statics instr -- ^ Function to serialize - -> NatCmmDecl statics instr - -sequenceTop _ _ _ top@(CmmData _ _) = top -sequenceTop dflags ncgImpl edgeWeights - (CmmProc info lbl live (ListGraph blocks)) - | (gopt Opt_CfgBlocklayout dflags) && backendMaintainsCfg dflags - --Use chain based algorithm - , Just cfg <- edgeWeights - = CmmProc info lbl live ( ListGraph $ ncgMakeFarBranches ncgImpl info $ - {-# SCC layoutBlocks #-} - sequenceChain info cfg blocks ) - | otherwise - --Use old algorithm - = let cfg = if dontUseCfg then Nothing else edgeWeights - in CmmProc info lbl live ( ListGraph $ ncgMakeFarBranches ncgImpl info $ - {-# SCC layoutBlocks #-} - sequenceBlocks cfg info blocks) - where - dontUseCfg = gopt Opt_WeightlessBlocklayout dflags || - (not $ backendMaintainsCfg dflags) - --- The old algorithm: --- It is very simple (and stupid): We make a graph out of --- the blocks where there is an edge from one block to another iff the --- first block ends by jumping to the second. Then we topologically --- sort this graph. Then traverse the list: for each block, we first --- output the block, then if it has an out edge, we move the --- destination of the out edge to the front of the list, and continue. - --- FYI, the classic layout for basic blocks uses postorder DFS; this --- algorithm is implemented in Hoopl. - -sequenceBlocks :: Instruction inst => Maybe CFG -> LabelMap a - -> [GenBasicBlock inst] -> [GenBasicBlock inst] -sequenceBlocks _edgeWeight _ [] = [] -sequenceBlocks edgeWeights infos (entry:blocks) = - let entryNode = mkNode edgeWeights entry - bodyNodes = reverse - (flattenSCCs (sccBlocks edgeWeights blocks)) - in dropJumps infos . seqBlocks infos $ ( entryNode : bodyNodes) - -- the first block is the entry point ==> it must remain at the start. - -sccBlocks - :: Instruction instr - => Maybe CFG -> [NatBasicBlock instr] - -> [SCC (Node BlockId (NatBasicBlock instr))] -sccBlocks edgeWeights blocks = - stronglyConnCompFromEdgedVerticesUniqR - (map (mkNode edgeWeights) blocks) - -mkNode :: (Instruction t) - => Maybe CFG -> GenBasicBlock t - -> Node BlockId (GenBasicBlock t) -mkNode edgeWeights block@(BasicBlock id instrs) = - DigraphNode block id outEdges - where - outEdges :: [BlockId] - outEdges - --Select the heaviest successor, ignore weights <= zero - = successor - where - successor - | Just successors <- fmap (`getSuccEdgesSorted` id) - edgeWeights -- :: Maybe [(Label, EdgeInfo)] - = case successors of - [] -> [] - ((target,info):_) - | length successors > 2 || edgeWeight info <= 0 -> [] - | otherwise -> [target] - | otherwise - = case jumpDestsOfInstr (last instrs) of - [one] -> [one] - _many -> [] - - -seqBlocks :: LabelMap i -> [Node BlockId (GenBasicBlock t1)] - -> [GenBasicBlock t1] -seqBlocks infos blocks = placeNext pullable0 todo0 - where - -- pullable: Blocks that are not yet placed - -- todo: Original order of blocks, to be followed if we have no good - -- reason not to; - -- may include blocks that have already been placed, but then - -- these are not in pullable - pullable0 = listToUFM [ (i,(b,n)) | DigraphNode b i n <- blocks ] - todo0 = map node_key blocks - - placeNext _ [] = [] - placeNext pullable (i:rest) - | Just (block, pullable') <- lookupDeleteUFM pullable i - = place pullable' rest block - | otherwise - -- We already placed this block, so ignore - = placeNext pullable rest - - place pullable todo (block,[]) - = block : placeNext pullable todo - place pullable todo (block@(BasicBlock id instrs),[next]) - | mapMember next infos - = block : placeNext pullable todo - | Just (nextBlock, pullable') <- lookupDeleteUFM pullable next - = BasicBlock id instrs : place pullable' todo nextBlock - | otherwise - = block : placeNext pullable todo - place _ _ (_,tooManyNextNodes) - = pprPanic "seqBlocks" (ppr tooManyNextNodes) - - -lookupDeleteUFM :: Uniquable key => UniqFM elt -> key - -> Maybe (elt, UniqFM elt) -lookupDeleteUFM m k = do -- Maybe monad - v <- lookupUFM m k - return (v, delFromUFM m k) diff --git a/compiler/nativeGen/CFG.hs b/compiler/nativeGen/CFG.hs deleted file mode 100644 index 7e2c2de095..0000000000 --- a/compiler/nativeGen/CFG.hs +++ /dev/null @@ -1,1320 +0,0 @@ --- --- Copyright (c) 2018 Andreas Klebinger --- - -{-# LANGUAGE TypeFamilies, ScopedTypeVariables #-} -{-# LANGUAGE TupleSections #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} -{-# LANGUAGE CPP #-} -{-# LANGUAGE Rank2Types #-} -{-# LANGUAGE BangPatterns #-} -{-# LANGUAGE DataKinds #-} - -module CFG - ( CFG, CfgEdge(..), EdgeInfo(..), EdgeWeight(..) - , TransitionSource(..) - - --Modify the CFG - , addWeightEdge, addEdge - , delEdge, delNode - , addNodesBetween, shortcutWeightMap - , reverseEdges, filterEdges - , addImmediateSuccessor - , mkWeightInfo, adjustEdgeWeight, setEdgeWeight - - --Query the CFG - , infoEdgeList, edgeList - , getSuccessorEdges, getSuccessors - , getSuccEdgesSorted - , getEdgeInfo - , getCfgNodes, hasNode - - -- Loop Information - , loopMembers, loopLevels, loopInfo - - --Construction/Misc - , getCfg, getCfgProc, pprEdgeWeights, sanityCheckCfg - - --Find backedges and update their weight - , optimizeCFG - , mkGlobalWeights - - ) -where - -#include "HsVersions.h" - -import GhcPrelude - -import GHC.Cmm.BlockId -import GHC.Cmm as Cmm - -import GHC.Cmm.Utils -import GHC.Cmm.Switch -import GHC.Cmm.Dataflow.Collections -import GHC.Cmm.Dataflow.Label -import GHC.Cmm.Dataflow.Block -import qualified GHC.Cmm.Dataflow.Graph as G - -import Util -import Digraph -import Maybes - -import Unique -import qualified Dominators as Dom -import Data.IntMap.Strict (IntMap) -import Data.IntSet (IntSet) - -import qualified Data.IntMap.Strict as IM -import qualified Data.Map as M -import qualified Data.IntSet as IS -import qualified Data.Set as S -import Data.Tree -import Data.Bifunctor - -import Outputable --- DEBUGGING ONLY ---import GHC.Cmm.DebugBlock ---import OrdList ---import GHC.Cmm.DebugBlock.Trace -import GHC.Cmm.Ppr () -- For Outputable instances -import qualified GHC.Driver.Session as D - -import Data.List (sort, nub, partition) -import Data.STRef.Strict -import Control.Monad.ST - -import Data.Array.MArray -import Data.Array.ST -import Data.Array.IArray -import Data.Array.Unsafe (unsafeFreeze) -import Data.Array.Base (unsafeRead, unsafeWrite) - -import Control.Monad - -type Prob = Double - -type Edge = (BlockId, BlockId) -type Edges = [Edge] - -newtype EdgeWeight - = EdgeWeight { weightToDouble :: Double } - deriving (Eq,Ord,Enum,Num,Real,Fractional) - -instance Outputable EdgeWeight where - ppr (EdgeWeight w) = doublePrec 5 w - -type EdgeInfoMap edgeInfo = LabelMap (LabelMap edgeInfo) - --- | A control flow graph where edges have been annotated with a weight. --- Implemented as IntMap (IntMap <edgeData>) --- We must uphold the invariant that for each edge A -> B we must have: --- A entry B in the outer map. --- A entry B in the map we get when looking up A. --- Maintaining this invariant is useful as any failed lookup now indicates --- an actual error in code which might go unnoticed for a while --- otherwise. -type CFG = EdgeInfoMap EdgeInfo - -data CfgEdge - = CfgEdge - { edgeFrom :: !BlockId - , edgeTo :: !BlockId - , edgeInfo :: !EdgeInfo - } - --- | Careful! Since we assume there is at most one edge from A to B --- the Eq instance does not consider weight. -instance Eq CfgEdge where - (==) (CfgEdge from1 to1 _) (CfgEdge from2 to2 _) - = from1 == from2 && to1 == to2 - --- | Edges are sorted ascending pointwise by weight, source and destination -instance Ord CfgEdge where - compare (CfgEdge from1 to1 (EdgeInfo {edgeWeight = weight1})) - (CfgEdge from2 to2 (EdgeInfo {edgeWeight = weight2})) - | weight1 < weight2 || weight1 == weight2 && from1 < from2 || - weight1 == weight2 && from1 == from2 && to1 < to2 - = LT - | from1 == from2 && to1 == to2 && weight1 == weight2 - = EQ - | otherwise - = GT - -instance Outputable CfgEdge where - ppr (CfgEdge from1 to1 edgeInfo) - = parens (ppr from1 <+> text "-(" <> ppr edgeInfo <> text ")->" <+> ppr to1) - --- | Can we trace back a edge to a specific Cmm Node --- or has it been introduced during assembly codegen. We use this to maintain --- some information which would otherwise be lost during the --- Cmm <-> asm transition. --- See also Note [Inverting Conditional Branches] -data TransitionSource - = CmmSource { trans_cmmNode :: (CmmNode O C) - , trans_info :: BranchInfo } - | AsmCodeGen - deriving (Eq) - -data BranchInfo = NoInfo -- ^ Unknown, but not heap or stack check. - | HeapStackCheck -- ^ Heap or stack check - deriving Eq - -instance Outputable BranchInfo where - ppr NoInfo = text "regular" - ppr HeapStackCheck = text "heap/stack" - -isHeapOrStackCheck :: TransitionSource -> Bool -isHeapOrStackCheck (CmmSource { trans_info = HeapStackCheck}) = True -isHeapOrStackCheck _ = False - --- | Information about edges -data EdgeInfo - = EdgeInfo - { transitionSource :: !TransitionSource - , edgeWeight :: !EdgeWeight - } deriving (Eq) - -instance Outputable EdgeInfo where - ppr edgeInfo = text "weight:" <+> ppr (edgeWeight edgeInfo) - --- | Convenience function, generate edge info based --- on weight not originating from cmm. -mkWeightInfo :: EdgeWeight -> EdgeInfo -mkWeightInfo = EdgeInfo AsmCodeGen - --- | Adjust the weight between the blocks using the given function. --- If there is no such edge returns the original map. -adjustEdgeWeight :: CFG -> (EdgeWeight -> EdgeWeight) - -> BlockId -> BlockId -> CFG -adjustEdgeWeight cfg f from to - | Just info <- getEdgeInfo from to cfg - , !weight <- edgeWeight info - , !newWeight <- f weight - = addEdge from to (info { edgeWeight = newWeight}) cfg - | otherwise = cfg - --- | Set the weight between the blocks to the given weight. --- If there is no such edge returns the original map. -setEdgeWeight :: CFG -> EdgeWeight - -> BlockId -> BlockId -> CFG -setEdgeWeight cfg !weight from to - | Just info <- getEdgeInfo from to cfg - = addEdge from to (info { edgeWeight = weight}) cfg - | otherwise = cfg - - -getCfgNodes :: CFG -> [BlockId] -getCfgNodes m = - mapKeys m - --- | Is this block part of this graph? -hasNode :: CFG -> BlockId -> Bool -hasNode m node = - -- Check the invariant that each node must exist in the first map or not at all. - ASSERT( found || not (any (mapMember node) m)) - found - where - found = mapMember node m - - - --- | Check if the nodes in the cfg and the set of blocks are the same. --- In a case of a missmatch we panic and show the difference. -sanityCheckCfg :: CFG -> LabelSet -> SDoc -> Bool -sanityCheckCfg m blockSet msg - | blockSet == cfgNodes - = True - | otherwise = - pprPanic "Block list and cfg nodes don't match" ( - text "difference:" <+> ppr diff $$ - text "blocks:" <+> ppr blockSet $$ - text "cfg:" <+> pprEdgeWeights m $$ - msg ) - False - where - cfgNodes = setFromList $ getCfgNodes m :: LabelSet - diff = (setUnion cfgNodes blockSet) `setDifference` (setIntersection cfgNodes blockSet) :: LabelSet - --- | Filter the CFG with a custom function f. --- Paramaeters are `f from to edgeInfo` -filterEdges :: (BlockId -> BlockId -> EdgeInfo -> Bool) -> CFG -> CFG -filterEdges f cfg = - mapMapWithKey filterSources cfg - where - filterSources from m = - mapFilterWithKey (\to w -> f from to w) m - - -{- Note [Updating the CFG during shortcutting] - -See Note [What is shortcutting] in the control flow optimization -code (GHC.Cmm.ContFlowOpt) for a slightly more in depth explanation on shortcutting. - -In the native backend we shortcut jumps at the assembly level. (AsmCodeGen.hs) -This means we remove blocks containing only one jump from the code -and instead redirecting all jumps targeting this block to the deleted -blocks jump target. - -However we want to have an accurate representation of control -flow in the CFG. So we add/remove edges accordingly to account -for the eliminated blocks and new edges. - -If we shortcut A -> B -> C to A -> C: -* We delete edges A -> B and B -> C -* Replacing them with the edge A -> C - -We also try to preserve jump weights while doing so. - -Note that: -* The edge B -> C can't have interesting weights since - the block B consists of a single unconditional jump without branching. -* We delete the edge A -> B and add the edge A -> C. -* The edge A -> B can be one of many edges originating from A so likely - has edge weights we want to preserve. - -For this reason we simply store the edge info from the original A -> B -edge and apply this information to the new edge A -> C. - -Sometimes we have a scenario where jump target C is not represented by an -BlockId but an immediate value. I'm only aware of this happening without -tables next to code currently. - -Then we go from A ---> B - -> IMM to A - -> IMM where the dashed arrows -are not stored in the CFG. - -In that case we simply delete the edge A -> B. - -In terms of implementation the native backend first builds a mapping -from blocks suitable for shortcutting to their jump targets. -Then it redirects all jump instructions to these blocks using the -built up mapping. -This function (shortcutWeightMap) takes the same mapping and -applies the mapping to the CFG in the way laid out above. - --} -shortcutWeightMap :: LabelMap (Maybe BlockId) -> CFG -> CFG -shortcutWeightMap cuts cfg = - foldl' applyMapping cfg $ mapToList cuts - where --- takes the tuple (B,C) from the notation in [Updating the CFG during shortcutting] - applyMapping :: CFG -> (BlockId,Maybe BlockId) -> CFG - --Shortcut immediate - applyMapping m (from, Nothing) = - mapDelete from . - fmap (mapDelete from) $ m - --Regular shortcut - applyMapping m (from, Just to) = - let updatedMap :: CFG - updatedMap - = fmap (shortcutEdge (from,to)) $ - (mapDelete from m :: CFG ) - --Sometimes we can shortcut multiple blocks like so: - -- A -> B -> C -> D -> E => A -> E - -- so we check for such chains. - in case mapLookup to cuts of - Nothing -> updatedMap - Just dest -> applyMapping updatedMap (to, dest) - --Redirect edge from B to C - shortcutEdge :: (BlockId, BlockId) -> LabelMap EdgeInfo -> LabelMap EdgeInfo - shortcutEdge (from, to) m = - case mapLookup from m of - Just info -> mapInsert to info $ mapDelete from m - Nothing -> m - --- | Sometimes we insert a block which should unconditionally be executed --- after a given block. This function updates the CFG for these cases. --- So we get A -> B => A -> A' -> B --- \ \ --- -> C => -> C --- -addImmediateSuccessor :: BlockId -> BlockId -> CFG -> CFG -addImmediateSuccessor node follower cfg - = updateEdges . addWeightEdge node follower uncondWeight $ cfg - where - uncondWeight = fromIntegral . D.uncondWeight . - D.cfgWeightInfo $ D.unsafeGlobalDynFlags - targets = getSuccessorEdges cfg node - successors = map fst targets :: [BlockId] - updateEdges = addNewSuccs . remOldSuccs - remOldSuccs m = foldl' (flip (delEdge node)) m successors - addNewSuccs m = - foldl' (\m' (t,info) -> addEdge follower t info m') m targets - --- | Adds a new edge, overwrites existing edges if present -addEdge :: BlockId -> BlockId -> EdgeInfo -> CFG -> CFG -addEdge from to info cfg = - mapAlter addFromToEdge from $ - mapAlter addDestNode to cfg - where - -- Simply insert the edge into the edge list. - addFromToEdge Nothing = Just $ mapSingleton to info - addFromToEdge (Just wm) = Just $ mapInsert to info wm - -- We must add the destination node explicitly - addDestNode Nothing = Just $ mapEmpty - addDestNode n@(Just _) = n - - --- | Adds a edge with the given weight to the cfg --- If there already existed an edge it is overwritten. --- `addWeightEdge from to weight cfg` -addWeightEdge :: BlockId -> BlockId -> EdgeWeight -> CFG -> CFG -addWeightEdge from to weight cfg = - addEdge from to (mkWeightInfo weight) cfg - -delEdge :: BlockId -> BlockId -> CFG -> CFG -delEdge from to m = - mapAlter remDest from m - where - remDest Nothing = Nothing - remDest (Just wm) = Just $ mapDelete to wm - -delNode :: BlockId -> CFG -> CFG -delNode node cfg = - fmap (mapDelete node) -- < Edges to the node - (mapDelete node cfg) -- < Edges from the node - --- | Destinations from bid ordered by weight (descending) -getSuccEdgesSorted :: CFG -> BlockId -> [(BlockId,EdgeInfo)] -getSuccEdgesSorted m bid = - let destMap = mapFindWithDefault mapEmpty bid m - cfgEdges = mapToList destMap - sortedEdges = sortWith (negate . edgeWeight . snd) cfgEdges - in --pprTrace "getSuccEdgesSorted" (ppr bid <+> text "map:" <+> ppr m) - sortedEdges - --- | Get successors of a given node with edge weights. -getSuccessorEdges :: HasDebugCallStack => CFG -> BlockId -> [(BlockId,EdgeInfo)] -getSuccessorEdges m bid = maybe lookupError mapToList (mapLookup bid m) - where - lookupError = pprPanic "getSuccessorEdges: Block does not exist" $ - ppr bid <+> pprEdgeWeights m - -getEdgeInfo :: BlockId -> BlockId -> CFG -> Maybe EdgeInfo -getEdgeInfo from to m - | Just wm <- mapLookup from m - , Just info <- mapLookup to wm - = Just $! info - | otherwise - = Nothing - -getEdgeWeight :: CFG -> BlockId -> BlockId -> EdgeWeight -getEdgeWeight cfg from to = - edgeWeight $ expectJust "Edgeweight for noexisting block" $ - getEdgeInfo from to cfg - -getTransitionSource :: BlockId -> BlockId -> CFG -> TransitionSource -getTransitionSource from to cfg = transitionSource $ expectJust "Source info for noexisting block" $ - getEdgeInfo from to cfg - -reverseEdges :: CFG -> CFG -reverseEdges cfg = mapFoldlWithKey (\cfg from toMap -> go (addNode cfg from) from toMap) mapEmpty cfg - where - -- We must preserve nodes without outgoing edges! - addNode :: CFG -> BlockId -> CFG - addNode cfg b = mapInsertWith mapUnion b mapEmpty cfg - go :: CFG -> BlockId -> (LabelMap EdgeInfo) -> CFG - go cfg from toMap = mapFoldlWithKey (\cfg to info -> addEdge to from info cfg) cfg toMap :: CFG - - --- | Returns a unordered list of all edges with info -infoEdgeList :: CFG -> [CfgEdge] -infoEdgeList m = - go (mapToList m) [] - where - -- We avoid foldMap to avoid thunk buildup - go :: [(BlockId,LabelMap EdgeInfo)] -> [CfgEdge] -> [CfgEdge] - go [] acc = acc - go ((from,toMap):xs) acc - = go' xs from (mapToList toMap) acc - go' :: [(BlockId,LabelMap EdgeInfo)] -> BlockId -> [(BlockId,EdgeInfo)] -> [CfgEdge] -> [CfgEdge] - go' froms _ [] acc = go froms acc - go' froms from ((to,info):tos) acc - = go' froms from tos (CfgEdge from to info : acc) - --- | Returns a unordered list of all edges without weights -edgeList :: CFG -> [Edge] -edgeList m = - go (mapToList m) [] - where - -- We avoid foldMap to avoid thunk buildup - go :: [(BlockId,LabelMap EdgeInfo)] -> [Edge] -> [Edge] - go [] acc = acc - go ((from,toMap):xs) acc - = go' xs from (mapKeys toMap) acc - go' :: [(BlockId,LabelMap EdgeInfo)] -> BlockId -> [BlockId] -> [Edge] -> [Edge] - go' froms _ [] acc = go froms acc - go' froms from (to:tos) acc - = go' froms from tos ((from,to) : acc) - --- | Get successors of a given node without edge weights. -getSuccessors :: HasDebugCallStack => CFG -> BlockId -> [BlockId] -getSuccessors m bid - | Just wm <- mapLookup bid m - = mapKeys wm - | otherwise = lookupError - where - lookupError = pprPanic "getSuccessors: Block does not exist" $ - ppr bid <+> pprEdgeWeights m - -pprEdgeWeights :: CFG -> SDoc -pprEdgeWeights m = - let edges = sort $ infoEdgeList m :: [CfgEdge] - printEdge (CfgEdge from to (EdgeInfo { edgeWeight = weight })) - = text "\t" <> ppr from <+> text "->" <+> ppr to <> - text "[label=\"" <> ppr weight <> text "\",weight=\"" <> - ppr weight <> text "\"];\n" - --for the case that there are no edges from/to this node. - --This should rarely happen but it can save a lot of time - --to immediately see it when it does. - printNode node - = text "\t" <> ppr node <> text ";\n" - getEdgeNodes (CfgEdge from to _) = [from,to] - edgeNodes = setFromList $ concatMap getEdgeNodes edges :: LabelSet - nodes = filter (\n -> (not . setMember n) edgeNodes) . mapKeys $ mapFilter null m - in - text "digraph {\n" <> - (foldl' (<>) empty (map printEdge edges)) <> - (foldl' (<>) empty (map printNode nodes)) <> - text "}\n" - -{-# INLINE updateEdgeWeight #-} --Allows eliminating the tuple when possible --- | Invariant: The edge **must** exist already in the graph. -updateEdgeWeight :: (EdgeWeight -> EdgeWeight) -> Edge -> CFG -> CFG -updateEdgeWeight f (from, to) cfg - | Just oldInfo <- getEdgeInfo from to cfg - = let !oldWeight = edgeWeight oldInfo - !newWeight = f oldWeight - in addEdge from to (oldInfo {edgeWeight = newWeight}) cfg - | otherwise - = panic "Trying to update invalid edge" - --- from to oldWeight => newWeight -mapWeights :: (BlockId -> BlockId -> EdgeWeight -> EdgeWeight) -> CFG -> CFG -mapWeights f cfg = - foldl' (\cfg (CfgEdge from to info) -> - let oldWeight = edgeWeight info - newWeight = f from to oldWeight - in addEdge from to (info {edgeWeight = newWeight}) cfg) - cfg (infoEdgeList cfg) - - --- | Insert a block in the control flow between two other blocks. --- We pass a list of tuples (A,B,C) where --- * A -> C: Old edge --- * A -> B -> C : New Arc, where B is the new block. --- It's possible that a block has two jumps to the same block --- in the assembly code. However we still only store a single edge for --- these cases. --- We assign the old edge info to the edge A -> B and assign B -> C the --- weight of an unconditional jump. -addNodesBetween :: CFG -> [(BlockId,BlockId,BlockId)] -> CFG -addNodesBetween m updates = - foldl' updateWeight m . - weightUpdates $ updates - where - weight = fromIntegral . D.uncondWeight . - D.cfgWeightInfo $ D.unsafeGlobalDynFlags - -- We might add two blocks for different jumps along a single - -- edge. So we end up with edges: A -> B -> C , A -> D -> C - -- in this case after applying the first update the weight for A -> C - -- is no longer available. So we calculate future weights before updates. - weightUpdates = map getWeight - getWeight :: (BlockId,BlockId,BlockId) -> (BlockId,BlockId,BlockId,EdgeInfo) - getWeight (from,between,old) - | Just edgeInfo <- getEdgeInfo from old m - = (from,between,old,edgeInfo) - | otherwise - = pprPanic "Can't find weight for edge that should have one" ( - text "triple" <+> ppr (from,between,old) $$ - text "updates" <+> ppr updates $$ - text "cfg:" <+> pprEdgeWeights m ) - updateWeight :: CFG -> (BlockId,BlockId,BlockId,EdgeInfo) -> CFG - updateWeight m (from,between,old,edgeInfo) - = addEdge from between edgeInfo . - addWeightEdge between old weight . - delEdge from old $ m - -{- - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ~~~ Note [CFG Edge Weights] ~~~ - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - Edge weights assigned do not currently represent a specific - cost model and rather just a ranking of which blocks should - be placed next to each other given their connection type in - the CFG. - This is especially relevant if we whenever two blocks will - jump to the same target. - - A B - \ / - C - - Should A or B be placed in front of C? The block layout algorithm - decides this based on which edge (A,C)/(B,C) is heavier. So we - make a educated guess on which branch should be preferred. - - We rank edges in this order: - * Unconditional Control Transfer - They will always - transfer control to their target. Unless there is a info table - we can turn the jump into a fallthrough as well. - We use 20k as default, so it's easy to spot if values have been - modified but unlikely that we run into issues with overflow. - * If branches (likely) - We assume branches marked as likely - are taken more than 80% of the time. - By ranking them below unconditional jumps we make sure we - prefer the unconditional if there is a conditional and - unconditional edge towards a block. - * If branches (regular) - The false branch can potentially be turned - into a fallthrough so we prefer it slightly over the true branch. - * Unlikely branches - These can be assumed to be taken less than 20% - of the time. So we given them one of the lowest priorities. - * Switches - Switches at this level are implemented as jump tables - so have a larger number of successors. So without more information - we can only say that each individual successor is unlikely to be - jumped to and we rank them accordingly. - * Calls - We currently ignore calls completely: - * By the time we return from a call there is a good chance - that the address we return to has already been evicted from - cache eliminating a main advantage sequential placement brings. - * Calls always require a info table in front of their return - address. This reduces the chance that we return to the same - cache line further. - --} --- | Generate weights for a Cmm proc based on some simple heuristics. -getCfgProc :: D.CfgWeights -> RawCmmDecl -> CFG -getCfgProc _ (CmmData {}) = mapEmpty -getCfgProc weights (CmmProc _info _lab _live graph) = getCfg weights graph - -getCfg :: D.CfgWeights -> CmmGraph -> CFG -getCfg weights graph = - foldl' insertEdge edgelessCfg $ concatMap getBlockEdges blocks - where - D.CFGWeights - { D.uncondWeight = uncondWeight - , D.condBranchWeight = condBranchWeight - , D.switchWeight = switchWeight - , D.callWeight = callWeight - , D.likelyCondWeight = likelyCondWeight - , D.unlikelyCondWeight = unlikelyCondWeight - -- Last two are used in other places - --, D.infoTablePenalty = infoTablePenalty - --, D.backEdgeBonus = backEdgeBonus - } = weights - -- Explicitly add all nodes to the cfg to ensure they are part of the - -- CFG. - edgelessCfg = mapFromList $ zip (map G.entryLabel blocks) (repeat mapEmpty) - insertEdge :: CFG -> ((BlockId,BlockId),EdgeInfo) -> CFG - insertEdge m ((from,to),weight) = - mapAlter f from m - where - f :: Maybe (LabelMap EdgeInfo) -> Maybe (LabelMap EdgeInfo) - f Nothing = Just $ mapSingleton to weight - f (Just destMap) = Just $ mapInsert to weight destMap - getBlockEdges :: CmmBlock -> [((BlockId,BlockId),EdgeInfo)] - getBlockEdges block = - case branch of - CmmBranch dest -> [mkEdge dest uncondWeight] - CmmCondBranch cond t f l - | l == Nothing -> - [mkEdge f condBranchWeight, mkEdge t condBranchWeight] - | l == Just True -> - [mkEdge f unlikelyCondWeight, mkEdge t likelyCondWeight] - | l == Just False -> - [mkEdge f likelyCondWeight, mkEdge t unlikelyCondWeight] - where - mkEdgeInfo = -- pprTrace "Info" (ppr branchInfo <+> ppr cond) - EdgeInfo (CmmSource branch branchInfo) . fromIntegral - mkEdge target weight = ((bid,target), mkEdgeInfo weight) - branchInfo = - foldRegsUsed - (panic "foldRegsDynFlags") - (\info r -> if r == SpLim || r == HpLim || r == BaseReg - then HeapStackCheck else info) - NoInfo cond - - (CmmSwitch _e ids) -> - let switchTargets = switchTargetsToList ids - --Compiler performance hack - for very wide switches don't - --consider targets for layout. - adjustedWeight = - if (length switchTargets > 10) then -1 else switchWeight - in map (\x -> mkEdge x adjustedWeight) switchTargets - (CmmCall { cml_cont = Just cont}) -> [mkEdge cont callWeight] - (CmmForeignCall {Cmm.succ = cont}) -> [mkEdge cont callWeight] - (CmmCall { cml_cont = Nothing }) -> [] - other -> - panic "Foo" $ - ASSERT2(False, ppr "Unknown successor cause:" <> - (ppr branch <+> text "=>" <> ppr (G.successors other))) - map (\x -> ((bid,x),mkEdgeInfo 0)) $ G.successors other - where - bid = G.entryLabel block - mkEdgeInfo = EdgeInfo (CmmSource branch NoInfo) . fromIntegral - mkEdge target weight = ((bid,target), mkEdgeInfo weight) - branch = lastNode block :: CmmNode O C - - blocks = revPostorder graph :: [CmmBlock] - ---Find back edges by BFS -findBackEdges :: HasDebugCallStack => BlockId -> CFG -> Edges -findBackEdges root cfg = - --pprTraceIt "Backedges:" $ - map fst . - filter (\x -> snd x == Backward) $ typedEdges - where - edges = edgeList cfg :: [(BlockId,BlockId)] - getSuccs = getSuccessors cfg :: BlockId -> [BlockId] - typedEdges = - classifyEdges root getSuccs edges :: [((BlockId,BlockId),EdgeType)] - - -optimizeCFG :: D.CfgWeights -> RawCmmDecl -> CFG -> CFG -optimizeCFG _ (CmmData {}) cfg = cfg -optimizeCFG weights (CmmProc info _lab _live graph) cfg = - {-# SCC optimizeCFG #-} - -- pprTrace "Initial:" (pprEdgeWeights cfg) $ - -- pprTrace "Initial:" (ppr $ mkGlobalWeights (g_entry graph) cfg) $ - - -- pprTrace "LoopInfo:" (ppr $ loopInfo cfg (g_entry graph)) $ - favourFewerPreds . - penalizeInfoTables info . - increaseBackEdgeWeight (g_entry graph) $ cfg - where - - -- | Increase the weight of all backedges in the CFG - -- this helps to make loop jumpbacks the heaviest edges - increaseBackEdgeWeight :: BlockId -> CFG -> CFG - increaseBackEdgeWeight root cfg = - let backedges = findBackEdges root cfg - update weight - --Keep irrelevant edges irrelevant - | weight <= 0 = 0 - | otherwise - = weight + fromIntegral (D.backEdgeBonus weights) - in foldl' (\cfg edge -> updateEdgeWeight update edge cfg) - cfg backedges - - -- | Since we cant fall through info tables we penalize these. - penalizeInfoTables :: LabelMap a -> CFG -> CFG - penalizeInfoTables info cfg = - mapWeights fupdate cfg - where - fupdate :: BlockId -> BlockId -> EdgeWeight -> EdgeWeight - fupdate _ to weight - | mapMember to info - = weight - (fromIntegral $ D.infoTablePenalty weights) - | otherwise = weight - - -- | If a block has two successors, favour the one with fewer - -- predecessors and/or the one allowing fall through. - favourFewerPreds :: CFG -> CFG - favourFewerPreds cfg = - let - revCfg = - reverseEdges $ filterEdges - (\_from -> fallthroughTarget) cfg - - predCount n = length $ getSuccessorEdges revCfg n - nodes = getCfgNodes cfg - - modifiers :: Int -> Int -> (EdgeWeight, EdgeWeight) - modifiers preds1 preds2 - | preds1 < preds2 = ( 1,-1) - | preds1 == preds2 = ( 0, 0) - | otherwise = (-1, 1) - - update :: CFG -> BlockId -> CFG - update cfg node - | [(s1,e1),(s2,e2)] <- getSuccessorEdges cfg node - , !w1 <- edgeWeight e1 - , !w2 <- edgeWeight e2 - --Only change the weights if there isn't already a ordering. - , w1 == w2 - , (mod1,mod2) <- modifiers (predCount s1) (predCount s2) - = (\cfg' -> - (adjustEdgeWeight cfg' (+mod2) node s2)) - (adjustEdgeWeight cfg (+mod1) node s1) - | otherwise - = cfg - in foldl' update cfg nodes - where - fallthroughTarget :: BlockId -> EdgeInfo -> Bool - fallthroughTarget to (EdgeInfo source _weight) - | mapMember to info = False - | AsmCodeGen <- source = True - | CmmSource { trans_cmmNode = CmmBranch {} } <- source = True - | CmmSource { trans_cmmNode = CmmCondBranch {} } <- source = True - | otherwise = False - --- | Determine loop membership of blocks based on SCC analysis --- This is faster but only gives yes/no answers. -loopMembers :: HasDebugCallStack => CFG -> LabelMap Bool -loopMembers cfg = - foldl' (flip setLevel) mapEmpty sccs - where - mkNode :: BlockId -> Node BlockId BlockId - mkNode bid = DigraphNode bid bid (getSuccessors cfg bid) - nodes = map mkNode (getCfgNodes cfg) - - sccs = stronglyConnCompFromEdgedVerticesOrd nodes - - setLevel :: SCC BlockId -> LabelMap Bool -> LabelMap Bool - setLevel (AcyclicSCC bid) m = mapInsert bid False m - setLevel (CyclicSCC bids) m = foldl' (\m k -> mapInsert k True m) m bids - -loopLevels :: CFG -> BlockId -> LabelMap Int -loopLevels cfg root = liLevels loopInfos - where - loopInfos = loopInfo cfg root - -data LoopInfo = LoopInfo - { liBackEdges :: [(Edge)] -- ^ List of back edges - , liLevels :: LabelMap Int -- ^ BlockId -> LoopLevel mapping - , liLoops :: [(Edge, LabelSet)] -- ^ (backEdge, loopBody), body includes header - } - -instance Outputable LoopInfo where - ppr (LoopInfo _ _lvls loops) = - text "Loops:(backEdge, bodyNodes)" $$ - (vcat $ map ppr loops) - -{- Note [Determining the loop body] - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - Starting with the knowledge that: - * head dominates the loop - * `tail` -> `head` is a backedge - - We can determine all nodes by: - * Deleting the loop head from the graph. - * Collect all blocks which are reachable from the `tail`. - - We do so by performing bfs from the tail node towards the head. - -} - --- | Determine loop membership of blocks based on Dominator analysis. --- This is slower but gives loop levels instead of just loop membership. --- However it only detects natural loops. Irreducible control flow is not --- recognized even if it loops. But that is rare enough that we don't have --- to care about that special case. -loopInfo :: HasDebugCallStack => CFG -> BlockId -> LoopInfo -loopInfo cfg root = LoopInfo { liBackEdges = backEdges - , liLevels = mapFromList loopCounts - , liLoops = loopBodies } - where - revCfg = reverseEdges cfg - - graph = -- pprTrace "CFG - loopInfo" (pprEdgeWeights cfg) $ - fmap (setFromList . mapKeys ) cfg :: LabelMap LabelSet - - - --TODO - This should be a no op: Export constructors? Use unsafeCoerce? ... - rooted = ( fromBlockId root - , toIntMap $ fmap toIntSet graph) :: (Int, IntMap IntSet) - tree = fmap toBlockId $ Dom.domTree rooted :: Tree BlockId - - -- Map from Nodes to their dominators - domMap :: LabelMap LabelSet - domMap = mkDomMap tree - - edges = edgeList cfg :: [(BlockId, BlockId)] - -- We can't recompute nodes from edges, there might be blocks not connected via edges. - nodes = getCfgNodes cfg :: [BlockId] - - -- identify back edges - isBackEdge (from,to) - | Just doms <- mapLookup from domMap - , setMember to doms - = True - | otherwise = False - - -- See Note [Determining the loop body] - -- Get the loop body associated with a back edge. - findBody edge@(tail, head) - = ( edge, setInsert head $ go (setSingleton tail) (setSingleton tail) ) - where - -- See Note [Determining the loop body] - cfg' = delNode head revCfg - - go :: LabelSet -> LabelSet -> LabelSet - go found current - | setNull current = found - | otherwise = go (setUnion newSuccessors found) - newSuccessors - where - -- Really predecessors, since we use the reversed cfg. - newSuccessors = setFilter (\n -> not $ setMember n found) successors :: LabelSet - successors = setFromList $ concatMap - (getSuccessors cfg') - -- we filter head as it's no longer part of the cfg. - (filter (/= head) $ setElems current) :: LabelSet - - backEdges = filter isBackEdge edges - loopBodies = map findBody backEdges :: [(Edge, LabelSet)] - - -- Block b is part of n loop bodies => loop nest level of n - loopCounts = - let bodies = map (first snd) loopBodies -- [(Header, Body)] - loopCount n = length $ nub . map fst . filter (setMember n . snd) $ bodies - in map (\n -> (n, loopCount n)) $ nodes :: [(BlockId, Int)] - - toIntSet :: LabelSet -> IntSet - toIntSet s = IS.fromList . map fromBlockId . setElems $ s - toIntMap :: LabelMap a -> IntMap a - toIntMap m = IM.fromList $ map (\(x,y) -> (fromBlockId x,y)) $ mapToList m - - mkDomMap :: Tree BlockId -> LabelMap LabelSet - mkDomMap root = mapFromList $ go setEmpty root - where - go :: LabelSet -> Tree BlockId -> [(Label,LabelSet)] - go parents (Node lbl []) - = [(lbl, parents)] - go parents (Node _ leaves) - = let nodes = map rootLabel leaves - entries = map (\x -> (x,parents)) nodes - in entries ++ concatMap - (\n -> go (setInsert (rootLabel n) parents) n) - leaves - - fromBlockId :: BlockId -> Int - fromBlockId = getKey . getUnique - - toBlockId :: Int -> BlockId - toBlockId = mkBlockId . mkUniqueGrimily - --- We make the CFG a Hoopl Graph, so we can reuse revPostOrder. -newtype BlockNode (e :: Extensibility) (x :: Extensibility) = BN (BlockId,[BlockId]) - -instance G.NonLocal (BlockNode) where - entryLabel (BN (lbl,_)) = lbl - successors (BN (_,succs)) = succs - -revPostorderFrom :: HasDebugCallStack => CFG -> BlockId -> [BlockId] -revPostorderFrom cfg root = - map fromNode $ G.revPostorderFrom hooplGraph root - where - nodes = getCfgNodes cfg - hooplGraph = foldl' (\m n -> mapInsert n (toNode n) m) mapEmpty nodes - - fromNode :: BlockNode C C -> BlockId - fromNode (BN x) = fst x - - toNode :: BlockId -> BlockNode C C - toNode bid = - BN (bid,getSuccessors cfg $ bid) - - --- | We take in a CFG which has on its edges weights which are --- relative only to other edges originating from the same node. --- --- We return a CFG for which each edge represents a GLOBAL weight. --- This means edge weights are comparable across the whole graph. --- --- For irreducible control flow results might be imprecise, otherwise they --- are reliable. --- --- The algorithm is based on the Paper --- "Static Branch Prediction and Program Profile Analysis" by Y Wu, JR Larus --- The only big change is that we go over the nodes in the body of loops in --- reverse post order. Which is required for diamond control flow to work probably. --- --- We also apply a few prediction heuristics (based on the same paper) - -{-# NOINLINE mkGlobalWeights #-} -{-# SCC mkGlobalWeights #-} -mkGlobalWeights :: HasDebugCallStack => BlockId -> CFG -> (LabelMap Double, LabelMap (LabelMap Double)) -mkGlobalWeights root localCfg - | null localCfg = panic "Error - Empty CFG" - | otherwise - = (blockFreqs', edgeFreqs') - where - -- Calculate fixpoints - (blockFreqs, edgeFreqs) = calcFreqs nodeProbs backEdges' bodies' revOrder' - blockFreqs' = mapFromList $ map (first fromVertex) (assocs blockFreqs) :: LabelMap Double - edgeFreqs' = fmap fromVertexMap $ fromVertexMap edgeFreqs - - fromVertexMap :: IM.IntMap x -> LabelMap x - fromVertexMap m = mapFromList . map (first fromVertex) $ IM.toList m - - revOrder = revPostorderFrom localCfg root :: [BlockId] - loopResults@(LoopInfo backedges _levels bodies) = loopInfo localCfg root - - revOrder' = map toVertex revOrder - backEdges' = map (bimap toVertex toVertex) backedges - bodies' = map calcBody bodies - - estimatedCfg = staticBranchPrediction root loopResults localCfg - -- Normalize the weights to probabilities and apply heuristics - nodeProbs = cfgEdgeProbabilities estimatedCfg toVertex - - -- By mapping vertices to numbers in reverse post order we can bring any subset into reverse post - -- order simply by sorting. - -- TODO: The sort is redundant if we can guarantee that setElems returns elements ascending - calcBody (backedge, blocks) = - (toVertex $ snd backedge, sort . map toVertex $ (setElems blocks)) - - vertexMapping = mapFromList $ zip revOrder [0..] :: LabelMap Int - blockMapping = listArray (0,mapSize vertexMapping - 1) revOrder :: Array Int BlockId - -- Map from blockId to indices starting at zero - toVertex :: BlockId -> Int - toVertex blockId = expectJust "mkGlobalWeights" $ mapLookup blockId vertexMapping - -- Map from indices starting at zero to blockIds - fromVertex :: Int -> BlockId - fromVertex vertex = blockMapping ! vertex - -{- Note [Static Branch Prediction] - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The work here has been based on the paper -"Static Branch Prediction and Program Profile Analysis" by Y Wu, JR Larus. - -The primary differences are that if we branch on the result of a heap -check we do not apply any of the heuristics. -The reason is simple: They look like loops in the control flow graph -but are usually never entered, and if at most once. - -Currently implemented is a heuristic to predict that we do not exit -loops (lehPredicts) and one to predict that backedges are more likely -than any other edge. - -The back edge case is special as it superceeds any other heuristic if it -applies. - -Do NOT rely solely on nofib results for benchmarking this. I recommend at least -comparing megaparsec and container benchmarks. Nofib does not seeem to have -many instances of "loopy" Cmm where these make a difference. - -TODO: -* The paper containers more benchmarks which should be implemented. -* If we turn the likelihood on if/else branches into a probability - instead of true/false we could implement this as a Cmm pass. - + The complete Cmm code still exists and can be accessed by the heuristics - + There is no chance of register allocation/codegen inserting branches/blocks - + making the TransitionSource info wrong. - + potential to use this information in CmmPasses. - - Requires refactoring of all the code relying on the binary nature of likelihood. - - Requires refactoring `loopInfo` to work on both, Cmm Graphs and the backend CFG. --} - --- | Combination of target node id and information about the branch --- we are looking at. -type TargetNodeInfo = (BlockId, EdgeInfo) - - --- | Update branch weights based on certain heuristics. --- See Note [Static Branch Prediction] --- TODO: This should be combined with optimizeCFG -{-# SCC staticBranchPrediction #-} -staticBranchPrediction :: BlockId -> LoopInfo -> CFG -> CFG -staticBranchPrediction _root (LoopInfo l_backEdges loopLevels l_loops) cfg = - -- pprTrace "staticEstimatesOn" (ppr (cfg)) $ - foldl' update cfg nodes - where - nodes = getCfgNodes cfg - backedges = S.fromList $ l_backEdges - -- Loops keyed by their back edge - loops = M.fromList $ l_loops :: M.Map Edge LabelSet - loopHeads = S.fromList $ map snd $ M.keys loops - - update :: CFG -> BlockId -> CFG - update cfg node - -- No successors, nothing to do. - | null successors = cfg - - -- Mix of backedges and others: - -- Always predict the backedges. - | not (null m) && length m < length successors - -- Heap/Stack checks "loop", but only once. - -- So we simply exclude any case involving them. - , not $ any (isHeapOrStackCheck . transitionSource . snd) successors - = let loopChance = repeat $! pred_LBH / (fromIntegral $ length m) - exitChance = repeat $! (1 - pred_LBH) / fromIntegral (length not_m) - updates = zip (map fst m) loopChance ++ zip (map fst not_m) exitChance - in -- pprTrace "mix" (ppr (node,successors)) $ - foldl' (\cfg (to,weight) -> setEdgeWeight cfg weight node to) cfg updates - - -- For (regular) non-binary branches we keep the weights from the STG -> Cmm translation. - | length successors /= 2 - = cfg - - -- Only backedges - no need to adjust - | length m > 0 - = cfg - - -- A regular binary branch, we can plug addition predictors in here. - | [(s1,s1_info),(s2,s2_info)] <- successors - , not $ any (isHeapOrStackCheck . transitionSource . snd) successors - = -- Normalize weights to total of 1 - let !w1 = max (edgeWeight s1_info) (0) - !w2 = max (edgeWeight s2_info) (0) - -- Of both weights are <= 0 we set both to 0.5 - normalizeWeight w = if w1 + w2 == 0 then 0.5 else w/(w1+w2) - !cfg' = setEdgeWeight cfg (normalizeWeight w1) node s1 - !cfg'' = setEdgeWeight cfg' (normalizeWeight w2) node s2 - - -- Figure out which heuristics apply to these successors - heuristics = map ($ ((s1,s1_info),(s2,s2_info))) - [lehPredicts, phPredicts, ohPredicts, ghPredicts, lhhPredicts, chPredicts - , shPredicts, rhPredicts] - -- Apply result of a heuristic. Argument is the likelihood - -- predicted for s1. - applyHeuristic :: CFG -> Maybe Prob -> CFG - applyHeuristic cfg Nothing = cfg - applyHeuristic cfg (Just (s1_pred :: Double)) - | s1_old == 0 || s2_old == 0 || - isHeapOrStackCheck (transitionSource s1_info) || - isHeapOrStackCheck (transitionSource s2_info) - = cfg - | otherwise = - let -- Predictions from heuristic - s1_prob = EdgeWeight s1_pred :: EdgeWeight - s2_prob = 1.0 - s1_prob - -- Update - d = (s1_old * s1_prob) + (s2_old * s2_prob) :: EdgeWeight - s1_prob' = s1_old * s1_prob / d - !s2_prob' = s2_old * s2_prob / d - !cfg_s1 = setEdgeWeight cfg s1_prob' node s1 - in -- pprTrace "Applying heuristic!" (ppr (node,s1,s2) $$ ppr (s1_prob', s2_prob')) $ - setEdgeWeight cfg_s1 s2_prob' node s2 - where - -- Old weights - s1_old = getEdgeWeight cfg node s1 - s2_old = getEdgeWeight cfg node s2 - - in - -- pprTraceIt "RegularCfgResult" $ - foldl' applyHeuristic cfg'' heuristics - - -- Branch on heap/stack check - | otherwise = cfg - - where - -- Chance that loops are taken. - pred_LBH = 0.875 - -- successors - successors = getSuccessorEdges cfg node - -- backedges - (m,not_m) = partition (\succ -> S.member (node, fst succ) backedges) successors - - -- Heuristics return nothing if they don't say anything about this branch - -- or Just (prob_s1) where prob_s1 is the likelihood for s1 to be the - -- taken branch. s1 is the branch in the true case. - - -- Loop exit heuristic. - -- We are unlikely to leave a loop unless it's to enter another one. - pred_LEH = 0.75 - -- If and only if no successor is a loopheader, - -- then we will likely not exit the current loop body. - lehPredicts :: (TargetNodeInfo,TargetNodeInfo) -> Maybe Prob - lehPredicts ((s1,_s1_info),(s2,_s2_info)) - | S.member s1 loopHeads || S.member s2 loopHeads - = Nothing - - | otherwise - = --pprTrace "lehPredict:" (ppr $ compare s1Level s2Level) $ - case compare s1Level s2Level of - EQ -> Nothing - LT -> Just (1-pred_LEH) --s1 exits to a shallower loop level (exits loop) - GT -> Just (pred_LEH) --s1 exits to a deeper loop level - where - s1Level = mapLookup s1 loopLevels - s2Level = mapLookup s2 loopLevels - - -- Comparing to a constant is unlikely to be equal. - ohPredicts (s1,_s2) - | CmmSource { trans_cmmNode = src1 } <- getTransitionSource node (fst s1) cfg - , CmmCondBranch cond ltrue _lfalse likely <- src1 - , likely == Nothing - , CmmMachOp mop args <- cond - , MO_Eq {} <- mop - , not (null [x | x@CmmLit{} <- args]) - = if fst s1 == ltrue then Just 0.3 else Just 0.7 - - | otherwise - = Nothing - - -- TODO: These are all the other heuristics from the paper. - -- Not all will apply, for now we just stub them out as Nothing. - phPredicts = const Nothing - ghPredicts = const Nothing - lhhPredicts = const Nothing - chPredicts = const Nothing - shPredicts = const Nothing - rhPredicts = const Nothing - --- We normalize all edge weights as probabilities between 0 and 1. --- Ignoring rounding errors all outgoing edges sum up to 1. -cfgEdgeProbabilities :: CFG -> (BlockId -> Int) -> IM.IntMap (IM.IntMap Prob) -cfgEdgeProbabilities cfg toVertex - = mapFoldlWithKey foldEdges IM.empty cfg - where - foldEdges = (\m from toMap -> IM.insert (toVertex from) (normalize toMap) m) - - normalize :: (LabelMap EdgeInfo) -> (IM.IntMap Prob) - normalize weightMap - | edgeCount <= 1 = mapFoldlWithKey (\m k _ -> IM.insert (toVertex k) 1.0 m) IM.empty weightMap - | otherwise = mapFoldlWithKey (\m k _ -> IM.insert (toVertex k) (normalWeight k) m) IM.empty weightMap - where - edgeCount = mapSize weightMap - -- Negative weights are generally allowed but are mapped to zero. - -- We then check if there is at least one non-zero edge and if not - -- assign uniform weights to all branches. - minWeight = 0 :: Prob - weightMap' = fmap (\w -> max (weightToDouble . edgeWeight $ w) minWeight) weightMap - totalWeight = sum weightMap' - - normalWeight :: BlockId -> Prob - normalWeight bid - | totalWeight == 0 - = 1.0 / fromIntegral edgeCount - | Just w <- mapLookup bid weightMap' - = w/totalWeight - | otherwise = panic "impossible" - --- This is the fixpoint algorithm from --- "Static Branch Prediction and Program Profile Analysis" by Y Wu, JR Larus --- The adaption to Haskell is my own. -calcFreqs :: IM.IntMap (IM.IntMap Prob) -> [(Int,Int)] -> [(Int, [Int])] -> [Int] - -> (Array Int Double, IM.IntMap (IM.IntMap Prob)) -calcFreqs graph backEdges loops revPostOrder = runST $ do - visitedNodes <- newArray (0,nodeCount-1) False :: ST s (STUArray s Int Bool) - blockFreqs <- newArray (0,nodeCount-1) 0.0 :: ST s (STUArray s Int Double) - edgeProbs <- newSTRef graph - edgeBackProbs <- newSTRef graph - - -- let traceArray a = do - -- vs <- forM [0..nodeCount-1] $ \i -> readArray a i >>= (\v -> return (i,v)) - -- trace ("array: " ++ show vs) $ return () - - let -- See #1600, we need to inline or unboxing makes perf worse. - -- {-# INLINE getFreq #-} - {-# INLINE visited #-} - visited b = unsafeRead visitedNodes b - getFreq b = unsafeRead blockFreqs b - -- setFreq :: forall s. Int -> Double -> ST s () - setFreq b f = unsafeWrite blockFreqs b f - -- setVisited :: forall s. Node -> ST s () - setVisited b = unsafeWrite visitedNodes b True - -- Frequency/probability that edge is taken. - getProb' arr b1 b2 = readSTRef arr >>= - (\graph -> - return . - fromMaybe (error "getFreq 1") . - IM.lookup b2 . - fromMaybe (error "getFreq 2") $ - (IM.lookup b1 graph) - ) - setProb' arr b1 b2 prob = do - g <- readSTRef arr - let !m = fromMaybe (error "Foo") $ IM.lookup b1 g - !m' = IM.insert b2 prob m - writeSTRef arr $! (IM.insert b1 m' g) - - getEdgeFreq b1 b2 = getProb' edgeProbs b1 b2 - setEdgeFreq b1 b2 = setProb' edgeProbs b1 b2 - getProb b1 b2 = fromMaybe (error "getProb") $ do - m' <- IM.lookup b1 graph - IM.lookup b2 m' - - getBackProb b1 b2 = getProb' edgeBackProbs b1 b2 - setBackProb b1 b2 = setProb' edgeBackProbs b1 b2 - - - let -- calcOutFreqs :: Node -> ST s () - calcOutFreqs bhead block = do - !f <- getFreq block - forM (successors block) $ \bi -> do - let !prob = getProb block bi - let !succFreq = f * prob - setEdgeFreq block bi succFreq - -- traceM $ "SetOut: " ++ show (block, bi, f, prob, succFreq) - when (bi == bhead) $ setBackProb block bi succFreq - - - let propFreq block head = do - -- traceM ("prop:" ++ show (block,head)) - -- traceShowM block - - !v <- visited block - if v then - return () --Dont look at nodes twice - else if block == head then - setFreq block 1.0 -- Loop header frequency is always 1 - else do - let preds = IS.elems $ predecessors block - irreducible <- (fmap or) $ forM preds $ \bp -> do - !bp_visited <- visited bp - let bp_backedge = isBackEdge bp block - return (not bp_visited && not bp_backedge) - - if irreducible - then return () -- Rare we don't care - else do - setFreq block 0 - !cycleProb <- sum <$> (forM preds $ \pred -> do - if isBackEdge pred block - then - getBackProb pred block - else do - !f <- getFreq block - !prob <- getEdgeFreq pred block - setFreq block $! f + prob - return 0) - -- traceM $ "cycleProb:" ++ show cycleProb - let limit = 1 - 1/512 -- Paper uses 1 - epsilon, but this works. - -- determines how large likelyhoods in loops can grow. - !cycleProb <- return $ min cycleProb limit -- <- return $ if cycleProb > limit then limit else cycleProb - -- traceM $ "cycleProb:" ++ show cycleProb - - !f <- getFreq block - setFreq block (f / (1.0 - cycleProb)) - - setVisited block - calcOutFreqs head block - - -- Loops, by nesting, inner to outer - forM_ loops $ \(head, body) -> do - forM_ [0 .. nodeCount - 1] (\i -> unsafeWrite visitedNodes i True) -- Mark all nodes as visited. - forM_ body (\i -> unsafeWrite visitedNodes i False) -- Mark all blocks reachable from head as not visited - forM_ body $ \block -> propFreq block head - - -- After dealing with all loops, deal with non-looping parts of the CFG - forM_ [0 .. nodeCount - 1] (\i -> unsafeWrite visitedNodes i False) -- Everything in revPostOrder is reachable - forM_ revPostOrder $ \block -> propFreq block (head revPostOrder) - - -- trace ("Final freqs:") $ return () - -- let freqString = pprFreqs freqs - -- trace (unlines freqString) $ return () - -- trace (pprFre) $ return () - graph' <- readSTRef edgeProbs - freqs' <- unsafeFreeze blockFreqs - - return (freqs', graph') - where - -- How can these lookups fail? Consider the CFG [A -> B] - predecessors :: Int -> IS.IntSet - predecessors b = fromMaybe IS.empty $ IM.lookup b revGraph - successors :: Int -> [Int] - successors b = fromMaybe (lookupError "succ" b graph)$ IM.keys <$> IM.lookup b graph - lookupError s b g = pprPanic ("Lookup error " ++ s) $ - ( text "node" <+> ppr b $$ - text "graph" <+> - vcat (map (\(k,m) -> ppr (k,m :: IM.IntMap Double)) $ IM.toList g) - ) - - nodeCount = IM.foldl' (\count toMap -> IM.foldlWithKey' countTargets count toMap) (IM.size graph) graph - where - countTargets = (\count k _ -> countNode k + count ) - countNode n = if IM.member n graph then 0 else 1 - - isBackEdge from to = S.member (from,to) backEdgeSet - backEdgeSet = S.fromList backEdges - - revGraph :: IntMap IntSet - revGraph = IM.foldlWithKey' (\m from toMap -> addEdges m from toMap) IM.empty graph - where - addEdges m0 from toMap = IM.foldlWithKey' (\m k _ -> addEdge m from k) m0 toMap - addEdge m0 from to = IM.insertWith IS.union to (IS.singleton from) m0 diff --git a/compiler/nativeGen/CPrim.hs b/compiler/nativeGen/CPrim.hs deleted file mode 100644 index 344e62d53c..0000000000 --- a/compiler/nativeGen/CPrim.hs +++ /dev/null @@ -1,133 +0,0 @@ --- | Generating C symbol names emitted by the compiler. -module CPrim - ( atomicReadLabel - , atomicWriteLabel - , atomicRMWLabel - , cmpxchgLabel - , popCntLabel - , pdepLabel - , pextLabel - , bSwapLabel - , bRevLabel - , clzLabel - , ctzLabel - , word2FloatLabel - ) where - -import GhcPrelude - -import GHC.Cmm.Type -import GHC.Cmm.MachOp -import Outputable - -popCntLabel :: Width -> String -popCntLabel w = "hs_popcnt" ++ pprWidth w - where - pprWidth W8 = "8" - pprWidth W16 = "16" - pprWidth W32 = "32" - pprWidth W64 = "64" - pprWidth w = pprPanic "popCntLabel: Unsupported word width " (ppr w) - -pdepLabel :: Width -> String -pdepLabel w = "hs_pdep" ++ pprWidth w - where - pprWidth W8 = "8" - pprWidth W16 = "16" - pprWidth W32 = "32" - pprWidth W64 = "64" - pprWidth w = pprPanic "pdepLabel: Unsupported word width " (ppr w) - -pextLabel :: Width -> String -pextLabel w = "hs_pext" ++ pprWidth w - where - pprWidth W8 = "8" - pprWidth W16 = "16" - pprWidth W32 = "32" - pprWidth W64 = "64" - pprWidth w = pprPanic "pextLabel: Unsupported word width " (ppr w) - -bSwapLabel :: Width -> String -bSwapLabel w = "hs_bswap" ++ pprWidth w - where - pprWidth W16 = "16" - pprWidth W32 = "32" - pprWidth W64 = "64" - pprWidth w = pprPanic "bSwapLabel: Unsupported word width " (ppr w) - -bRevLabel :: Width -> String -bRevLabel w = "hs_bitrev" ++ pprWidth w - where - pprWidth W8 = "8" - pprWidth W16 = "16" - pprWidth W32 = "32" - pprWidth W64 = "64" - pprWidth w = pprPanic "bRevLabel: Unsupported word width " (ppr w) - -clzLabel :: Width -> String -clzLabel w = "hs_clz" ++ pprWidth w - where - pprWidth W8 = "8" - pprWidth W16 = "16" - pprWidth W32 = "32" - pprWidth W64 = "64" - pprWidth w = pprPanic "clzLabel: Unsupported word width " (ppr w) - -ctzLabel :: Width -> String -ctzLabel w = "hs_ctz" ++ pprWidth w - where - pprWidth W8 = "8" - pprWidth W16 = "16" - pprWidth W32 = "32" - pprWidth W64 = "64" - pprWidth w = pprPanic "ctzLabel: Unsupported word width " (ppr w) - -word2FloatLabel :: Width -> String -word2FloatLabel w = "hs_word2float" ++ pprWidth w - where - pprWidth W32 = "32" - pprWidth W64 = "64" - pprWidth w = pprPanic "word2FloatLabel: Unsupported word width " (ppr w) - -atomicRMWLabel :: Width -> AtomicMachOp -> String -atomicRMWLabel w amop = "hs_atomic_" ++ pprFunName amop ++ pprWidth w - where - pprWidth W8 = "8" - pprWidth W16 = "16" - pprWidth W32 = "32" - pprWidth W64 = "64" - pprWidth w = pprPanic "atomicRMWLabel: Unsupported word width " (ppr w) - - pprFunName AMO_Add = "add" - pprFunName AMO_Sub = "sub" - pprFunName AMO_And = "and" - pprFunName AMO_Nand = "nand" - pprFunName AMO_Or = "or" - pprFunName AMO_Xor = "xor" - -cmpxchgLabel :: Width -> String -cmpxchgLabel w = "hs_cmpxchg" ++ pprWidth w - where - pprWidth W8 = "8" - pprWidth W16 = "16" - pprWidth W32 = "32" - pprWidth W64 = "64" - pprWidth w = pprPanic "cmpxchgLabel: Unsupported word width " (ppr w) - -atomicReadLabel :: Width -> String -atomicReadLabel w = "hs_atomicread" ++ pprWidth w - where - pprWidth W8 = "8" - pprWidth W16 = "16" - pprWidth W32 = "32" - pprWidth W64 = "64" - pprWidth w = pprPanic "atomicReadLabel: Unsupported word width " (ppr w) - -atomicWriteLabel :: Width -> String -atomicWriteLabel w = "hs_atomicwrite" ++ pprWidth w - where - pprWidth W8 = "8" - pprWidth W16 = "16" - pprWidth W32 = "32" - pprWidth W64 = "64" - pprWidth w = pprPanic "atomicWriteLabel: Unsupported word width " (ppr w) diff --git a/compiler/nativeGen/Dwarf.hs b/compiler/nativeGen/Dwarf.hs deleted file mode 100644 index 5bd62a7234..0000000000 --- a/compiler/nativeGen/Dwarf.hs +++ /dev/null @@ -1,269 +0,0 @@ -module Dwarf ( - dwarfGen - ) where - -import GhcPrelude - -import GHC.Cmm.CLabel -import GHC.Cmm.Expr ( GlobalReg(..) ) -import Config ( cProjectName, cProjectVersion ) -import CoreSyn ( Tickish(..) ) -import GHC.Cmm.DebugBlock -import GHC.Driver.Session -import Module -import Outputable -import GHC.Platform -import Unique -import UniqSupply - -import Dwarf.Constants -import Dwarf.Types - -import Control.Arrow ( first ) -import Control.Monad ( mfilter ) -import Data.Maybe -import Data.List ( sortBy ) -import Data.Ord ( comparing ) -import qualified Data.Map as Map -import System.FilePath -import System.Directory ( getCurrentDirectory ) - -import qualified GHC.Cmm.Dataflow.Label as H -import qualified GHC.Cmm.Dataflow.Collections as H - --- | Generate DWARF/debug information -dwarfGen :: DynFlags -> ModLocation -> UniqSupply -> [DebugBlock] - -> IO (SDoc, UniqSupply) -dwarfGen _ _ us [] = return (empty, us) -dwarfGen df modLoc us blocks = do - - -- Convert debug data structures to DWARF info records - -- We strip out block information when running with -g0 or -g1. - let procs = debugSplitProcs blocks - stripBlocks dbg - | debugLevel df < 2 = dbg { dblBlocks = [] } - | otherwise = dbg - compPath <- getCurrentDirectory - let lowLabel = dblCLabel $ head procs - highLabel = mkAsmTempEndLabel $ dblCLabel $ last procs - dwarfUnit = DwarfCompileUnit - { dwChildren = map (procToDwarf df) (map stripBlocks procs) - , dwName = fromMaybe "" (ml_hs_file modLoc) - , dwCompDir = addTrailingPathSeparator compPath - , dwProducer = cProjectName ++ " " ++ cProjectVersion - , dwLowLabel = lowLabel - , dwHighLabel = highLabel - , dwLineLabel = dwarfLineLabel - } - - -- Check whether we have any source code information, so we do not - -- end up writing a pointer to an empty .debug_line section - -- (dsymutil on Mac Os gets confused by this). - let haveSrcIn blk = isJust (dblSourceTick blk) && isJust (dblPosition blk) - || any haveSrcIn (dblBlocks blk) - haveSrc = any haveSrcIn procs - - -- .debug_abbrev section: Declare the format we're using - let abbrevSct = pprAbbrevDecls haveSrc - - -- .debug_info section: Information records on procedures and blocks - let -- unique to identify start and end compilation unit .debug_inf - (unitU, us') = takeUniqFromSupply us - infoSct = vcat [ ptext dwarfInfoLabel <> colon - , dwarfInfoSection - , compileUnitHeader unitU - , pprDwarfInfo haveSrc dwarfUnit - , compileUnitFooter unitU - ] - - -- .debug_line section: Generated mainly by the assembler, but we - -- need to label it - let lineSct = dwarfLineSection $$ - ptext dwarfLineLabel <> colon - - -- .debug_frame section: Information about the layout of the GHC stack - let (framesU, us'') = takeUniqFromSupply us' - frameSct = dwarfFrameSection $$ - ptext dwarfFrameLabel <> colon $$ - pprDwarfFrame (debugFrame framesU procs) - - -- .aranges section: Information about the bounds of compilation units - let aranges' | gopt Opt_SplitSections df = map mkDwarfARange procs - | otherwise = [DwarfARange lowLabel highLabel] - let aranges = dwarfARangesSection $$ pprDwarfARanges aranges' unitU - - return (infoSct $$ abbrevSct $$ lineSct $$ frameSct $$ aranges, us'') - --- | Build an address range entry for one proc. --- With split sections, each proc needs its own entry, since they may get --- scattered in the final binary. Without split sections, we could make a --- single arange based on the first/last proc. -mkDwarfARange :: DebugBlock -> DwarfARange -mkDwarfARange proc = DwarfARange start end - where - start = dblCLabel proc - end = mkAsmTempEndLabel start - --- | Header for a compilation unit, establishing global format --- parameters -compileUnitHeader :: Unique -> SDoc -compileUnitHeader unitU = sdocWithPlatform $ \plat -> - let cuLabel = mkAsmTempLabel unitU -- sits right before initialLength field - length = ppr (mkAsmTempEndLabel cuLabel) <> char '-' <> ppr cuLabel - <> text "-4" -- length of initialLength field - in vcat [ ppr cuLabel <> colon - , text "\t.long " <> length -- compilation unit size - , pprHalf 3 -- DWARF version - , sectionOffset (ptext dwarfAbbrevLabel) (ptext dwarfAbbrevLabel) - -- abbrevs offset - , text "\t.byte " <> ppr (platformWordSizeInBytes plat) -- word size - ] - --- | Compilation unit footer, mainly establishing size of debug sections -compileUnitFooter :: Unique -> SDoc -compileUnitFooter unitU = - let cuEndLabel = mkAsmTempEndLabel $ mkAsmTempLabel unitU - in ppr cuEndLabel <> colon - --- | Splits the blocks by procedures. In the result all nested blocks --- will come from the same procedure as the top-level block. See --- Note [Splitting DebugBlocks] for details. -debugSplitProcs :: [DebugBlock] -> [DebugBlock] -debugSplitProcs b = concat $ H.mapElems $ mergeMaps $ map (split Nothing) b - where mergeMaps = foldr (H.mapUnionWithKey (const (++))) H.mapEmpty - split :: Maybe DebugBlock -> DebugBlock -> H.LabelMap [DebugBlock] - split parent blk = H.mapInsert prc [blk'] nested - where prc = dblProcedure blk - blk' = blk { dblBlocks = own_blks - , dblParent = parent - } - own_blks = fromMaybe [] $ H.mapLookup prc nested - nested = mergeMaps $ map (split parent') $ dblBlocks blk - -- Figure out who should be the parent of nested blocks. - -- If @blk@ is optimized out then it isn't a good choice - -- and we just use its parent. - parent' - | Nothing <- dblPosition blk = parent - | otherwise = Just blk - -{- -Note [Splitting DebugBlocks] - -DWARF requires that we break up the nested DebugBlocks produced from -the C-- AST. For instance, we begin with tick trees containing nested procs. -For example, - - proc A [tick1, tick2] - block B [tick3] - proc C [tick4] - -when producing DWARF we need to procs (which are represented in DWARF as -TAG_subprogram DIEs) to be top-level DIEs. debugSplitProcs is responsible for -this transform, pulling out the nested procs into top-level procs. - -However, in doing this we need to be careful to preserve the parentage of the -nested procs. This is the reason DebugBlocks carry the dblParent field, allowing -us to reorganize the above tree as, - - proc A [tick1, tick2] - block B [tick3] - proc C [tick4] parent=B - -Here we have annotated the new proc C with an attribute giving its original -parent, B. --} - --- | Generate DWARF info for a procedure debug block -procToDwarf :: DynFlags -> DebugBlock -> DwarfInfo -procToDwarf df prc - = DwarfSubprogram { dwChildren = map (blockToDwarf df) (dblBlocks prc) - , dwName = case dblSourceTick prc of - Just s@SourceNote{} -> sourceName s - _otherwise -> showSDocDump df $ ppr $ dblLabel prc - , dwLabel = dblCLabel prc - , dwParent = fmap mkAsmTempDieLabel - $ mfilter goodParent - $ fmap dblCLabel (dblParent prc) - } - where - goodParent a | a == dblCLabel prc = False - -- Omit parent if it would be self-referential - goodParent a | not (externallyVisibleCLabel a) - , debugLevel df < 2 = False - -- We strip block information when running -g0 or -g1, don't - -- refer to blocks in that case. Fixes #14894. - goodParent _ = True - --- | Generate DWARF info for a block -blockToDwarf :: DynFlags -> DebugBlock -> DwarfInfo -blockToDwarf df blk - = DwarfBlock { dwChildren = concatMap (tickToDwarf df) (dblTicks blk) - ++ map (blockToDwarf df) (dblBlocks blk) - , dwLabel = dblCLabel blk - , dwMarker = marker - } - where - marker - | Just _ <- dblPosition blk = Just $ mkAsmTempLabel $ dblLabel blk - | otherwise = Nothing -- block was optimized out - -tickToDwarf :: DynFlags -> Tickish () -> [DwarfInfo] -tickToDwarf _ (SourceNote ss _) = [DwarfSrcNote ss] -tickToDwarf _ _ = [] - --- | Generates the data for the debug frame section, which encodes the --- desired stack unwind behaviour for the debugger -debugFrame :: Unique -> [DebugBlock] -> DwarfFrame -debugFrame u procs - = DwarfFrame { dwCieLabel = mkAsmTempLabel u - , dwCieInit = initUws - , dwCieProcs = map (procToFrame initUws) procs - } - where - initUws :: UnwindTable - initUws = Map.fromList [(Sp, Just (UwReg Sp 0))] - --- | Generates unwind information for a procedure debug block -procToFrame :: UnwindTable -> DebugBlock -> DwarfFrameProc -procToFrame initUws blk - = DwarfFrameProc { dwFdeProc = dblCLabel blk - , dwFdeHasInfo = dblHasInfoTbl blk - , dwFdeBlocks = map (uncurry blockToFrame) - (setHasInfo blockUws) - } - where blockUws :: [(DebugBlock, [UnwindPoint])] - blockUws = map snd $ sortBy (comparing fst) $ flatten blk - - flatten :: DebugBlock - -> [(Int, (DebugBlock, [UnwindPoint]))] - flatten b@DebugBlock{ dblPosition=pos, dblUnwind=uws, dblBlocks=blocks } - | Just p <- pos = (p, (b, uws')):nested - | otherwise = nested -- block was optimized out - where uws' = addDefaultUnwindings initUws uws - nested = concatMap flatten blocks - - -- | If the current procedure has an info table, then we also say that - -- its first block has one to ensure that it gets the necessary -1 - -- offset applied to its start address. - -- See Note [Info Offset] in Dwarf.Types. - setHasInfo :: [(DebugBlock, [UnwindPoint])] - -> [(DebugBlock, [UnwindPoint])] - setHasInfo [] = [] - setHasInfo (c0:cs) = first setIt c0 : cs - where - setIt child = - child { dblHasInfoTbl = dblHasInfoTbl child - || dblHasInfoTbl blk } - -blockToFrame :: DebugBlock -> [UnwindPoint] -> DwarfFrameBlock -blockToFrame blk uws - = DwarfFrameBlock { dwFdeBlkHasInfo = dblHasInfoTbl blk - , dwFdeUnwind = uws - } - -addDefaultUnwindings :: UnwindTable -> [UnwindPoint] -> [UnwindPoint] -addDefaultUnwindings tbl pts = - [ UnwindPoint lbl (tbl' `mappend` tbl) - -- mappend is left-biased - | UnwindPoint lbl tbl' <- pts - ] diff --git a/compiler/nativeGen/Dwarf/Constants.hs b/compiler/nativeGen/Dwarf/Constants.hs deleted file mode 100644 index 01b85c47bc..0000000000 --- a/compiler/nativeGen/Dwarf/Constants.hs +++ /dev/null @@ -1,229 +0,0 @@ --- | Constants describing the DWARF format. Most of this simply --- mirrors /usr/include/dwarf.h. - -module Dwarf.Constants where - -import GhcPrelude - -import AsmUtils -import FastString -import GHC.Platform -import Outputable - -import Reg -import X86.Regs - -import Data.Word - --- | Language ID used for Haskell. -dW_LANG_Haskell :: Word -dW_LANG_Haskell = 0x18 - -- Thanks to Nathan Howell for getting us our very own language ID! - --- * Dwarf tags -dW_TAG_compile_unit, dW_TAG_subroutine_type, - dW_TAG_file_type, dW_TAG_subprogram, dW_TAG_lexical_block, - dW_TAG_base_type, dW_TAG_structure_type, dW_TAG_pointer_type, - dW_TAG_array_type, dW_TAG_subrange_type, dW_TAG_typedef, - dW_TAG_variable, dW_TAG_arg_variable, dW_TAG_auto_variable, - dW_TAG_ghc_src_note :: Word -dW_TAG_array_type = 1 -dW_TAG_lexical_block = 11 -dW_TAG_pointer_type = 15 -dW_TAG_compile_unit = 17 -dW_TAG_structure_type = 19 -dW_TAG_typedef = 22 -dW_TAG_subroutine_type = 32 -dW_TAG_subrange_type = 33 -dW_TAG_base_type = 36 -dW_TAG_file_type = 41 -dW_TAG_subprogram = 46 -dW_TAG_variable = 52 -dW_TAG_auto_variable = 256 -dW_TAG_arg_variable = 257 - -dW_TAG_ghc_src_note = 0x5b00 - --- * Dwarf attributes -dW_AT_name, dW_AT_stmt_list, dW_AT_low_pc, dW_AT_high_pc, dW_AT_language, - dW_AT_comp_dir, dW_AT_producer, dW_AT_external, dW_AT_frame_base, - dW_AT_use_UTF8, dW_AT_MIPS_linkage_name :: Word -dW_AT_name = 0x03 -dW_AT_stmt_list = 0x10 -dW_AT_low_pc = 0x11 -dW_AT_high_pc = 0x12 -dW_AT_language = 0x13 -dW_AT_comp_dir = 0x1b -dW_AT_producer = 0x25 -dW_AT_external = 0x3f -dW_AT_frame_base = 0x40 -dW_AT_use_UTF8 = 0x53 -dW_AT_MIPS_linkage_name = 0x2007 - --- * Custom DWARF attributes --- Chosen a more or less random section of the vendor-extensible region - --- ** Describing C-- blocks --- These appear in DW_TAG_lexical_scope DIEs corresponding to C-- blocks -dW_AT_ghc_tick_parent :: Word -dW_AT_ghc_tick_parent = 0x2b20 - --- ** Describing source notes --- These appear in DW_TAG_ghc_src_note DIEs -dW_AT_ghc_span_file, dW_AT_ghc_span_start_line, - dW_AT_ghc_span_start_col, dW_AT_ghc_span_end_line, - dW_AT_ghc_span_end_col :: Word -dW_AT_ghc_span_file = 0x2b00 -dW_AT_ghc_span_start_line = 0x2b01 -dW_AT_ghc_span_start_col = 0x2b02 -dW_AT_ghc_span_end_line = 0x2b03 -dW_AT_ghc_span_end_col = 0x2b04 - - --- * Abbrev declarations -dW_CHILDREN_no, dW_CHILDREN_yes :: Word8 -dW_CHILDREN_no = 0 -dW_CHILDREN_yes = 1 - -dW_FORM_addr, dW_FORM_data2, dW_FORM_data4, dW_FORM_string, dW_FORM_flag, - dW_FORM_block1, dW_FORM_ref4, dW_FORM_ref_addr, dW_FORM_flag_present :: Word -dW_FORM_addr = 0x01 -dW_FORM_data2 = 0x05 -dW_FORM_data4 = 0x06 -dW_FORM_string = 0x08 -dW_FORM_flag = 0x0c -dW_FORM_block1 = 0x0a -dW_FORM_ref_addr = 0x10 -dW_FORM_ref4 = 0x13 -dW_FORM_flag_present = 0x19 - --- * Dwarf native types -dW_ATE_address, dW_ATE_boolean, dW_ATE_float, dW_ATE_signed, - dW_ATE_signed_char, dW_ATE_unsigned, dW_ATE_unsigned_char :: Word -dW_ATE_address = 1 -dW_ATE_boolean = 2 -dW_ATE_float = 4 -dW_ATE_signed = 5 -dW_ATE_signed_char = 6 -dW_ATE_unsigned = 7 -dW_ATE_unsigned_char = 8 - --- * Call frame information -dW_CFA_set_loc, dW_CFA_undefined, dW_CFA_same_value, - dW_CFA_def_cfa, dW_CFA_def_cfa_offset, dW_CFA_def_cfa_expression, - dW_CFA_expression, dW_CFA_offset_extended_sf, dW_CFA_def_cfa_offset_sf, - dW_CFA_def_cfa_sf, dW_CFA_val_offset, dW_CFA_val_expression, - dW_CFA_offset :: Word8 -dW_CFA_set_loc = 0x01 -dW_CFA_undefined = 0x07 -dW_CFA_same_value = 0x08 -dW_CFA_def_cfa = 0x0c -dW_CFA_def_cfa_offset = 0x0e -dW_CFA_def_cfa_expression = 0x0f -dW_CFA_expression = 0x10 -dW_CFA_offset_extended_sf = 0x11 -dW_CFA_def_cfa_sf = 0x12 -dW_CFA_def_cfa_offset_sf = 0x13 -dW_CFA_val_offset = 0x14 -dW_CFA_val_expression = 0x16 -dW_CFA_offset = 0x80 - --- * Operations -dW_OP_addr, dW_OP_deref, dW_OP_consts, - dW_OP_minus, dW_OP_mul, dW_OP_plus, - dW_OP_lit0, dW_OP_breg0, dW_OP_call_frame_cfa :: Word8 -dW_OP_addr = 0x03 -dW_OP_deref = 0x06 -dW_OP_consts = 0x11 -dW_OP_minus = 0x1c -dW_OP_mul = 0x1e -dW_OP_plus = 0x22 -dW_OP_lit0 = 0x30 -dW_OP_breg0 = 0x70 -dW_OP_call_frame_cfa = 0x9c - --- * Dwarf section declarations -dwarfInfoSection, dwarfAbbrevSection, dwarfLineSection, - dwarfFrameSection, dwarfGhcSection, dwarfARangesSection :: SDoc -dwarfInfoSection = dwarfSection "info" -dwarfAbbrevSection = dwarfSection "abbrev" -dwarfLineSection = dwarfSection "line" -dwarfFrameSection = dwarfSection "frame" -dwarfGhcSection = dwarfSection "ghc" -dwarfARangesSection = dwarfSection "aranges" - -dwarfSection :: String -> SDoc -dwarfSection name = sdocWithPlatform $ \plat -> - case platformOS plat of - os | osElfTarget os - -> text "\t.section .debug_" <> text name <> text ",\"\"," - <> sectionType "progbits" - | osMachOTarget os - -> text "\t.section __DWARF,__debug_" <> text name <> text ",regular,debug" - | otherwise - -> text "\t.section .debug_" <> text name <> text ",\"dr\"" - --- * Dwarf section labels -dwarfInfoLabel, dwarfAbbrevLabel, dwarfLineLabel, dwarfFrameLabel :: PtrString -dwarfInfoLabel = sLit ".Lsection_info" -dwarfAbbrevLabel = sLit ".Lsection_abbrev" -dwarfLineLabel = sLit ".Lsection_line" -dwarfFrameLabel = sLit ".Lsection_frame" - --- | Mapping of registers to DWARF register numbers -dwarfRegNo :: Platform -> Reg -> Word8 -dwarfRegNo p r = case platformArch p of - ArchX86 - | r == eax -> 0 - | r == ecx -> 1 -- yes, no typo - | r == edx -> 2 - | r == ebx -> 3 - | r == esp -> 4 - | r == ebp -> 5 - | r == esi -> 6 - | r == edi -> 7 - ArchX86_64 - | r == rax -> 0 - | r == rdx -> 1 -- this neither. The order GCC allocates registers in? - | r == rcx -> 2 - | r == rbx -> 3 - | r == rsi -> 4 - | r == rdi -> 5 - | r == rbp -> 6 - | r == rsp -> 7 - | r == r8 -> 8 - | r == r9 -> 9 - | r == r10 -> 10 - | r == r11 -> 11 - | r == r12 -> 12 - | r == r13 -> 13 - | r == r14 -> 14 - | r == r15 -> 15 - | r == xmm0 -> 17 - | r == xmm1 -> 18 - | r == xmm2 -> 19 - | r == xmm3 -> 20 - | r == xmm4 -> 21 - | r == xmm5 -> 22 - | r == xmm6 -> 23 - | r == xmm7 -> 24 - | r == xmm8 -> 25 - | r == xmm9 -> 26 - | r == xmm10 -> 27 - | r == xmm11 -> 28 - | r == xmm12 -> 29 - | r == xmm13 -> 30 - | r == xmm14 -> 31 - | r == xmm15 -> 32 - _other -> error "dwarfRegNo: Unsupported platform or unknown register!" - --- | Virtual register number to use for return address. -dwarfReturnRegNo :: Platform -> Word8 -dwarfReturnRegNo p - -- We "overwrite" IP with our pseudo register - that makes sense, as - -- when using this mechanism gdb already knows the IP anyway. Clang - -- does this too, so it must be safe. - = case platformArch p of - ArchX86 -> 8 -- eip - ArchX86_64 -> 16 -- rip - _other -> error "dwarfReturnRegNo: Unsupported platform!" diff --git a/compiler/nativeGen/Dwarf/Types.hs b/compiler/nativeGen/Dwarf/Types.hs deleted file mode 100644 index c006081872..0000000000 --- a/compiler/nativeGen/Dwarf/Types.hs +++ /dev/null @@ -1,612 +0,0 @@ -module Dwarf.Types - ( -- * Dwarf information - DwarfInfo(..) - , pprDwarfInfo - , pprAbbrevDecls - -- * Dwarf address range table - , DwarfARange(..) - , pprDwarfARanges - -- * Dwarf frame - , DwarfFrame(..), DwarfFrameProc(..), DwarfFrameBlock(..) - , pprDwarfFrame - -- * Utilities - , pprByte - , pprHalf - , pprData4' - , pprDwWord - , pprWord - , pprLEBWord - , pprLEBInt - , wordAlign - , sectionOffset - ) - where - -import GhcPrelude - -import GHC.Cmm.DebugBlock -import GHC.Cmm.CLabel -import GHC.Cmm.Expr ( GlobalReg(..) ) -import Encoding -import FastString -import Outputable -import GHC.Platform -import Unique -import Reg -import SrcLoc -import Util - -import Dwarf.Constants - -import qualified Data.ByteString as BS -import qualified Control.Monad.Trans.State.Strict as S -import Control.Monad (zipWithM, join) -import Data.Bits -import qualified Data.Map as Map -import Data.Word -import Data.Char - -import GHC.Platform.Regs - --- | Individual dwarf records. Each one will be encoded as an entry in --- the @.debug_info@ section. -data DwarfInfo - = DwarfCompileUnit { dwChildren :: [DwarfInfo] - , dwName :: String - , dwProducer :: String - , dwCompDir :: String - , dwLowLabel :: CLabel - , dwHighLabel :: CLabel - , dwLineLabel :: PtrString } - | DwarfSubprogram { dwChildren :: [DwarfInfo] - , dwName :: String - , dwLabel :: CLabel - , dwParent :: Maybe CLabel - -- ^ label of DIE belonging to the parent tick - } - | DwarfBlock { dwChildren :: [DwarfInfo] - , dwLabel :: CLabel - , dwMarker :: Maybe CLabel - } - | DwarfSrcNote { dwSrcSpan :: RealSrcSpan - } - --- | Abbreviation codes used for encoding above records in the --- @.debug_info@ section. -data DwarfAbbrev - = DwAbbrNull -- ^ Pseudo, used for marking the end of lists - | DwAbbrCompileUnit - | DwAbbrSubprogram - | DwAbbrSubprogramWithParent - | DwAbbrBlockWithoutCode - | DwAbbrBlock - | DwAbbrGhcSrcNote - deriving (Eq, Enum) - --- | Generate assembly for the given abbreviation code -pprAbbrev :: DwarfAbbrev -> SDoc -pprAbbrev = pprLEBWord . fromIntegral . fromEnum - --- | Abbreviation declaration. This explains the binary encoding we --- use for representing 'DwarfInfo'. Be aware that this must be updated --- along with 'pprDwarfInfo'. -pprAbbrevDecls :: Bool -> SDoc -pprAbbrevDecls haveDebugLine = - let mkAbbrev abbr tag chld flds = - let fld (tag, form) = pprLEBWord tag $$ pprLEBWord form - in pprAbbrev abbr $$ pprLEBWord tag $$ pprByte chld $$ - vcat (map fld flds) $$ pprByte 0 $$ pprByte 0 - -- These are shared between DwAbbrSubprogram and - -- DwAbbrSubprogramWithParent - subprogramAttrs = - [ (dW_AT_name, dW_FORM_string) - , (dW_AT_MIPS_linkage_name, dW_FORM_string) - , (dW_AT_external, dW_FORM_flag) - , (dW_AT_low_pc, dW_FORM_addr) - , (dW_AT_high_pc, dW_FORM_addr) - , (dW_AT_frame_base, dW_FORM_block1) - ] - in dwarfAbbrevSection $$ - ptext dwarfAbbrevLabel <> colon $$ - mkAbbrev DwAbbrCompileUnit dW_TAG_compile_unit dW_CHILDREN_yes - ([(dW_AT_name, dW_FORM_string) - , (dW_AT_producer, dW_FORM_string) - , (dW_AT_language, dW_FORM_data4) - , (dW_AT_comp_dir, dW_FORM_string) - , (dW_AT_use_UTF8, dW_FORM_flag_present) -- not represented in body - , (dW_AT_low_pc, dW_FORM_addr) - , (dW_AT_high_pc, dW_FORM_addr) - ] ++ - (if haveDebugLine - then [ (dW_AT_stmt_list, dW_FORM_data4) ] - else [])) $$ - mkAbbrev DwAbbrSubprogram dW_TAG_subprogram dW_CHILDREN_yes - subprogramAttrs $$ - mkAbbrev DwAbbrSubprogramWithParent dW_TAG_subprogram dW_CHILDREN_yes - (subprogramAttrs ++ [(dW_AT_ghc_tick_parent, dW_FORM_ref_addr)]) $$ - mkAbbrev DwAbbrBlockWithoutCode dW_TAG_lexical_block dW_CHILDREN_yes - [ (dW_AT_name, dW_FORM_string) - ] $$ - mkAbbrev DwAbbrBlock dW_TAG_lexical_block dW_CHILDREN_yes - [ (dW_AT_name, dW_FORM_string) - , (dW_AT_low_pc, dW_FORM_addr) - , (dW_AT_high_pc, dW_FORM_addr) - ] $$ - mkAbbrev DwAbbrGhcSrcNote dW_TAG_ghc_src_note dW_CHILDREN_no - [ (dW_AT_ghc_span_file, dW_FORM_string) - , (dW_AT_ghc_span_start_line, dW_FORM_data4) - , (dW_AT_ghc_span_start_col, dW_FORM_data2) - , (dW_AT_ghc_span_end_line, dW_FORM_data4) - , (dW_AT_ghc_span_end_col, dW_FORM_data2) - ] $$ - pprByte 0 - --- | Generate assembly for DWARF data -pprDwarfInfo :: Bool -> DwarfInfo -> SDoc -pprDwarfInfo haveSrc d - = case d of - DwarfCompileUnit {} -> hasChildren - DwarfSubprogram {} -> hasChildren - DwarfBlock {} -> hasChildren - DwarfSrcNote {} -> noChildren - where - hasChildren = - pprDwarfInfoOpen haveSrc d $$ - vcat (map (pprDwarfInfo haveSrc) (dwChildren d)) $$ - pprDwarfInfoClose - noChildren = pprDwarfInfoOpen haveSrc d - --- | Prints assembler data corresponding to DWARF info records. Note --- that the binary format of this is parameterized in @abbrevDecls@ and --- has to be kept in synch. -pprDwarfInfoOpen :: Bool -> DwarfInfo -> SDoc -pprDwarfInfoOpen haveSrc (DwarfCompileUnit _ name producer compDir lowLabel - highLabel lineLbl) = - pprAbbrev DwAbbrCompileUnit - $$ pprString name - $$ pprString producer - $$ pprData4 dW_LANG_Haskell - $$ pprString compDir - $$ pprWord (ppr lowLabel) - $$ pprWord (ppr highLabel) - $$ if haveSrc - then sectionOffset (ptext lineLbl) (ptext dwarfLineLabel) - else empty -pprDwarfInfoOpen _ (DwarfSubprogram _ name label - parent) = sdocWithDynFlags $ \df -> - ppr (mkAsmTempDieLabel label) <> colon - $$ pprAbbrev abbrev - $$ pprString name - $$ pprString (renderWithStyle (initSDocContext df (mkCodeStyle CStyle)) (ppr label)) - $$ pprFlag (externallyVisibleCLabel label) - $$ pprWord (ppr label) - $$ pprWord (ppr $ mkAsmTempEndLabel label) - $$ pprByte 1 - $$ pprByte dW_OP_call_frame_cfa - $$ parentValue - where - abbrev = case parent of Nothing -> DwAbbrSubprogram - Just _ -> DwAbbrSubprogramWithParent - parentValue = maybe empty pprParentDie parent - pprParentDie sym = sectionOffset (ppr sym) (ptext dwarfInfoLabel) -pprDwarfInfoOpen _ (DwarfBlock _ label Nothing) = sdocWithDynFlags $ \df -> - ppr (mkAsmTempDieLabel label) <> colon - $$ pprAbbrev DwAbbrBlockWithoutCode - $$ pprString (renderWithStyle (initSDocContext df (mkCodeStyle CStyle)) (ppr label)) -pprDwarfInfoOpen _ (DwarfBlock _ label (Just marker)) = sdocWithDynFlags $ \df -> - ppr (mkAsmTempDieLabel label) <> colon - $$ pprAbbrev DwAbbrBlock - $$ pprString (renderWithStyle (initSDocContext df (mkCodeStyle CStyle)) (ppr label)) - $$ pprWord (ppr marker) - $$ pprWord (ppr $ mkAsmTempEndLabel marker) -pprDwarfInfoOpen _ (DwarfSrcNote ss) = - pprAbbrev DwAbbrGhcSrcNote - $$ pprString' (ftext $ srcSpanFile ss) - $$ pprData4 (fromIntegral $ srcSpanStartLine ss) - $$ pprHalf (fromIntegral $ srcSpanStartCol ss) - $$ pprData4 (fromIntegral $ srcSpanEndLine ss) - $$ pprHalf (fromIntegral $ srcSpanEndCol ss) - --- | Close a DWARF info record with children -pprDwarfInfoClose :: SDoc -pprDwarfInfoClose = pprAbbrev DwAbbrNull - --- | A DWARF address range. This is used by the debugger to quickly locate --- which compilation unit a given address belongs to. This type assumes --- a non-segmented address-space. -data DwarfARange - = DwarfARange - { dwArngStartLabel :: CLabel - , dwArngEndLabel :: CLabel - } - --- | Print assembler directives corresponding to a DWARF @.debug_aranges@ --- address table entry. -pprDwarfARanges :: [DwarfARange] -> Unique -> SDoc -pprDwarfARanges arngs unitU = sdocWithPlatform $ \plat -> - let wordSize = platformWordSizeInBytes plat - paddingSize = 4 :: Int - -- header is 12 bytes long. - -- entry is 8 bytes (32-bit platform) or 16 bytes (64-bit platform). - -- pad such that first entry begins at multiple of entry size. - pad n = vcat $ replicate n $ pprByte 0 - -- Fix for #17428 - initialLength = 8 + paddingSize + (1 + length arngs) * 2 * wordSize - in pprDwWord (ppr initialLength) - $$ pprHalf 2 - $$ sectionOffset (ppr $ mkAsmTempLabel $ unitU) - (ptext dwarfInfoLabel) - $$ pprByte (fromIntegral wordSize) - $$ pprByte 0 - $$ pad paddingSize - -- body - $$ vcat (map pprDwarfARange arngs) - -- terminus - $$ pprWord (char '0') - $$ pprWord (char '0') - -pprDwarfARange :: DwarfARange -> SDoc -pprDwarfARange arng = pprWord (ppr $ dwArngStartLabel arng) $$ pprWord length - where - length = ppr (dwArngEndLabel arng) - <> char '-' <> ppr (dwArngStartLabel arng) - --- | Information about unwind instructions for a procedure. This --- corresponds to a "Common Information Entry" (CIE) in DWARF. -data DwarfFrame - = DwarfFrame - { dwCieLabel :: CLabel - , dwCieInit :: UnwindTable - , dwCieProcs :: [DwarfFrameProc] - } - --- | Unwind instructions for an individual procedure. Corresponds to a --- "Frame Description Entry" (FDE) in DWARF. -data DwarfFrameProc - = DwarfFrameProc - { dwFdeProc :: CLabel - , dwFdeHasInfo :: Bool - , dwFdeBlocks :: [DwarfFrameBlock] - -- ^ List of blocks. Order must match asm! - } - --- | Unwind instructions for a block. Will become part of the --- containing FDE. -data DwarfFrameBlock - = DwarfFrameBlock - { dwFdeBlkHasInfo :: Bool - , dwFdeUnwind :: [UnwindPoint] - -- ^ these unwind points must occur in the same order as they occur - -- in the block - } - -instance Outputable DwarfFrameBlock where - ppr (DwarfFrameBlock hasInfo unwinds) = braces $ ppr hasInfo <+> ppr unwinds - --- | Header for the @.debug_frame@ section. Here we emit the "Common --- Information Entry" record that establishes general call frame --- parameters and the default stack layout. -pprDwarfFrame :: DwarfFrame -> SDoc -pprDwarfFrame DwarfFrame{dwCieLabel=cieLabel,dwCieInit=cieInit,dwCieProcs=procs} - = sdocWithPlatform $ \plat -> - let cieStartLabel= mkAsmTempDerivedLabel cieLabel (fsLit "_start") - cieEndLabel = mkAsmTempEndLabel cieLabel - length = ppr cieEndLabel <> char '-' <> ppr cieStartLabel - spReg = dwarfGlobalRegNo plat Sp - retReg = dwarfReturnRegNo plat - wordSize = platformWordSizeInBytes plat - pprInit :: (GlobalReg, Maybe UnwindExpr) -> SDoc - pprInit (g, uw) = pprSetUnwind plat g (Nothing, uw) - - -- Preserve C stack pointer: This necessary to override that default - -- unwinding behavior of setting $sp = CFA. - preserveSp = case platformArch plat of - ArchX86 -> pprByte dW_CFA_same_value $$ pprLEBWord 4 - ArchX86_64 -> pprByte dW_CFA_same_value $$ pprLEBWord 7 - _ -> empty - in vcat [ ppr cieLabel <> colon - , pprData4' length -- Length of CIE - , ppr cieStartLabel <> colon - , pprData4' (text "-1") - -- Common Information Entry marker (-1 = 0xf..f) - , pprByte 3 -- CIE version (we require DWARF 3) - , pprByte 0 -- Augmentation (none) - , pprByte 1 -- Code offset multiplicator - , pprByte (128-fromIntegral wordSize) - -- Data offset multiplicator - -- (stacks grow down => "-w" in signed LEB128) - , pprByte retReg -- virtual register holding return address - ] $$ - -- Initial unwind table - vcat (map pprInit $ Map.toList cieInit) $$ - vcat [ -- RET = *CFA - pprByte (dW_CFA_offset+retReg) - , pprByte 0 - - -- Preserve C stack pointer - , preserveSp - - -- Sp' = CFA - -- (we need to set this manually as our (STG) Sp register is - -- often not the architecture's default stack register) - , pprByte dW_CFA_val_offset - , pprLEBWord (fromIntegral spReg) - , pprLEBWord 0 - ] $$ - wordAlign $$ - ppr cieEndLabel <> colon $$ - -- Procedure unwind tables - vcat (map (pprFrameProc cieLabel cieInit) procs) - --- | Writes a "Frame Description Entry" for a procedure. This consists --- mainly of referencing the CIE and writing state machine --- instructions to describe how the frame base (CFA) changes. -pprFrameProc :: CLabel -> UnwindTable -> DwarfFrameProc -> SDoc -pprFrameProc frameLbl initUw (DwarfFrameProc procLbl hasInfo blocks) - = let fdeLabel = mkAsmTempDerivedLabel procLbl (fsLit "_fde") - fdeEndLabel = mkAsmTempDerivedLabel procLbl (fsLit "_fde_end") - procEnd = mkAsmTempEndLabel procLbl - ifInfo str = if hasInfo then text str else empty - -- see [Note: Info Offset] - in vcat [ whenPprDebug $ text "# Unwinding for" <+> ppr procLbl <> colon - , pprData4' (ppr fdeEndLabel <> char '-' <> ppr fdeLabel) - , ppr fdeLabel <> colon - , pprData4' (ppr frameLbl <> char '-' <> - ptext dwarfFrameLabel) -- Reference to CIE - , pprWord (ppr procLbl <> ifInfo "-1") -- Code pointer - , pprWord (ppr procEnd <> char '-' <> - ppr procLbl <> ifInfo "+1") -- Block byte length - ] $$ - vcat (S.evalState (mapM pprFrameBlock blocks) initUw) $$ - wordAlign $$ - ppr fdeEndLabel <> colon - --- | Generates unwind information for a block. We only generate --- instructions where unwind information actually changes. This small --- optimisations saves a lot of space, as subsequent blocks often have --- the same unwind information. -pprFrameBlock :: DwarfFrameBlock -> S.State UnwindTable SDoc -pprFrameBlock (DwarfFrameBlock hasInfo uws0) = - vcat <$> zipWithM pprFrameDecl (True : repeat False) uws0 - where - pprFrameDecl :: Bool -> UnwindPoint -> S.State UnwindTable SDoc - pprFrameDecl firstDecl (UnwindPoint lbl uws) = S.state $ \oldUws -> - let -- Did a register's unwind expression change? - isChanged :: GlobalReg -> Maybe UnwindExpr - -> Maybe (Maybe UnwindExpr, Maybe UnwindExpr) - isChanged g new - -- the value didn't change - | Just new == old = Nothing - -- the value was and still is undefined - | Nothing <- old - , Nothing <- new = Nothing - -- the value changed - | otherwise = Just (join old, new) - where - old = Map.lookup g oldUws - - changed = Map.toList $ Map.mapMaybeWithKey isChanged uws - - in if oldUws == uws - then (empty, oldUws) - else let -- see [Note: Info Offset] - needsOffset = firstDecl && hasInfo - lblDoc = ppr lbl <> - if needsOffset then text "-1" else empty - doc = sdocWithPlatform $ \plat -> - pprByte dW_CFA_set_loc $$ pprWord lblDoc $$ - vcat (map (uncurry $ pprSetUnwind plat) changed) - in (doc, uws) - --- Note [Info Offset] --- --- GDB was pretty much written with C-like programs in mind, and as a --- result they assume that once you have a return address, it is a --- good idea to look at (PC-1) to unwind further - as that's where the --- "call" instruction is supposed to be. --- --- Now on one hand, code generated by GHC looks nothing like what GDB --- expects, and in fact going up from a return pointer is guaranteed --- to land us inside an info table! On the other hand, that actually --- gives us some wiggle room, as we expect IP to never *actually* end --- up inside the info table, so we can "cheat" by putting whatever GDB --- expects to see there. This is probably pretty safe, as GDB cannot --- assume (PC-1) to be a valid code pointer in the first place - and I --- have seen no code trying to correct this. --- --- Note that this will not prevent GDB from failing to look-up the --- correct function name for the frame, as that uses the symbol table, --- which we can not manipulate as easily. --- --- There's a GDB patch to address this at [1]. At the moment of writing --- it's not merged, so I recommend building GDB with the patch if you --- care about unwinding. The hack above doesn't cover every case. --- --- [1] https://sourceware.org/ml/gdb-patches/2018-02/msg00055.html - --- | Get DWARF register ID for a given GlobalReg -dwarfGlobalRegNo :: Platform -> GlobalReg -> Word8 -dwarfGlobalRegNo p UnwindReturnReg = dwarfReturnRegNo p -dwarfGlobalRegNo p reg = maybe 0 (dwarfRegNo p . RegReal) $ globalRegMaybe p reg - --- | Generate code for setting the unwind information for a register, --- optimized using its known old value in the table. Note that "Sp" is --- special: We see it as synonym for the CFA. -pprSetUnwind :: Platform - -> GlobalReg - -- ^ the register to produce an unwinding table entry for - -> (Maybe UnwindExpr, Maybe UnwindExpr) - -- ^ the old and new values of the register - -> SDoc -pprSetUnwind plat g (_, Nothing) - = pprUndefUnwind plat g -pprSetUnwind _ Sp (Just (UwReg s _), Just (UwReg s' o')) | s == s' - = if o' >= 0 - then pprByte dW_CFA_def_cfa_offset $$ pprLEBWord (fromIntegral o') - else pprByte dW_CFA_def_cfa_offset_sf $$ pprLEBInt o' -pprSetUnwind plat Sp (_, Just (UwReg s' o')) - = if o' >= 0 - then pprByte dW_CFA_def_cfa $$ - pprLEBRegNo plat s' $$ - pprLEBWord (fromIntegral o') - else pprByte dW_CFA_def_cfa_sf $$ - pprLEBRegNo plat s' $$ - pprLEBInt o' -pprSetUnwind _ Sp (_, Just uw) - = pprByte dW_CFA_def_cfa_expression $$ pprUnwindExpr False uw -pprSetUnwind plat g (_, Just (UwDeref (UwReg Sp o))) - | o < 0 && ((-o) `mod` platformWordSizeInBytes plat) == 0 -- expected case - = pprByte (dW_CFA_offset + dwarfGlobalRegNo plat g) $$ - pprLEBWord (fromIntegral ((-o) `div` platformWordSizeInBytes plat)) - | otherwise - = pprByte dW_CFA_offset_extended_sf $$ - pprLEBRegNo plat g $$ - pprLEBInt o -pprSetUnwind plat g (_, Just (UwDeref uw)) - = pprByte dW_CFA_expression $$ - pprLEBRegNo plat g $$ - pprUnwindExpr True uw -pprSetUnwind plat g (_, Just (UwReg g' 0)) - | g == g' - = pprByte dW_CFA_same_value $$ - pprLEBRegNo plat g -pprSetUnwind plat g (_, Just uw) - = pprByte dW_CFA_val_expression $$ - pprLEBRegNo plat g $$ - pprUnwindExpr True uw - --- | Print the register number of the given 'GlobalReg' as an unsigned LEB128 --- encoded number. -pprLEBRegNo :: Platform -> GlobalReg -> SDoc -pprLEBRegNo plat = pprLEBWord . fromIntegral . dwarfGlobalRegNo plat - --- | Generates a DWARF expression for the given unwind expression. If --- @spIsCFA@ is true, we see @Sp@ as the frame base CFA where it gets --- mentioned. -pprUnwindExpr :: Bool -> UnwindExpr -> SDoc -pprUnwindExpr spIsCFA expr - = sdocWithPlatform $ \plat -> - let pprE (UwConst i) - | i >= 0 && i < 32 = pprByte (dW_OP_lit0 + fromIntegral i) - | otherwise = pprByte dW_OP_consts $$ pprLEBInt i -- lazy... - pprE (UwReg Sp i) | spIsCFA - = if i == 0 - then pprByte dW_OP_call_frame_cfa - else pprE (UwPlus (UwReg Sp 0) (UwConst i)) - pprE (UwReg g i) = pprByte (dW_OP_breg0+dwarfGlobalRegNo plat g) $$ - pprLEBInt i - pprE (UwDeref u) = pprE u $$ pprByte dW_OP_deref - pprE (UwLabel l) = pprByte dW_OP_addr $$ pprWord (ppr l) - pprE (UwPlus u1 u2) = pprE u1 $$ pprE u2 $$ pprByte dW_OP_plus - pprE (UwMinus u1 u2) = pprE u1 $$ pprE u2 $$ pprByte dW_OP_minus - pprE (UwTimes u1 u2) = pprE u1 $$ pprE u2 $$ pprByte dW_OP_mul - in text "\t.uleb128 2f-1f" $$ -- DW_FORM_block length - -- computed as the difference of the following local labels 2: and 1: - text "1:" $$ - pprE expr $$ - text "2:" - --- | Generate code for re-setting the unwind information for a --- register to @undefined@ -pprUndefUnwind :: Platform -> GlobalReg -> SDoc -pprUndefUnwind plat g = pprByte dW_CFA_undefined $$ - pprLEBRegNo plat g - - --- | Align assembly at (machine) word boundary -wordAlign :: SDoc -wordAlign = sdocWithPlatform $ \plat -> - text "\t.align " <> case platformOS plat of - OSDarwin -> case platformWordSize plat of - PW8 -> char '3' - PW4 -> char '2' - _other -> ppr (platformWordSizeInBytes plat) - --- | Assembly for a single byte of constant DWARF data -pprByte :: Word8 -> SDoc -pprByte x = text "\t.byte " <> ppr (fromIntegral x :: Word) - --- | Assembly for a two-byte constant integer -pprHalf :: Word16 -> SDoc -pprHalf x = text "\t.short" <+> ppr (fromIntegral x :: Word) - --- | Assembly for a constant DWARF flag -pprFlag :: Bool -> SDoc -pprFlag f = pprByte (if f then 0xff else 0x00) - --- | Assembly for 4 bytes of dynamic DWARF data -pprData4' :: SDoc -> SDoc -pprData4' x = text "\t.long " <> x - --- | Assembly for 4 bytes of constant DWARF data -pprData4 :: Word -> SDoc -pprData4 = pprData4' . ppr - --- | Assembly for a DWARF word of dynamic data. This means 32 bit, as --- we are generating 32 bit DWARF. -pprDwWord :: SDoc -> SDoc -pprDwWord = pprData4' - --- | Assembly for a machine word of dynamic data. Depends on the --- architecture we are currently generating code for. -pprWord :: SDoc -> SDoc -pprWord s = (<> s) . sdocWithPlatform $ \plat -> - case platformWordSize plat of - PW4 -> text "\t.long " - PW8 -> text "\t.quad " - --- | Prints a number in "little endian base 128" format. The idea is --- to optimize for small numbers by stopping once all further bytes --- would be 0. The highest bit in every byte signals whether there --- are further bytes to read. -pprLEBWord :: Word -> SDoc -pprLEBWord x | x < 128 = pprByte (fromIntegral x) - | otherwise = pprByte (fromIntegral $ 128 .|. (x .&. 127)) $$ - pprLEBWord (x `shiftR` 7) - --- | Same as @pprLEBWord@, but for a signed number -pprLEBInt :: Int -> SDoc -pprLEBInt x | x >= -64 && x < 64 - = pprByte (fromIntegral (x .&. 127)) - | otherwise = pprByte (fromIntegral $ 128 .|. (x .&. 127)) $$ - pprLEBInt (x `shiftR` 7) - --- | Generates a dynamic null-terminated string. If required the --- caller needs to make sure that the string is escaped properly. -pprString' :: SDoc -> SDoc -pprString' str = text "\t.asciz \"" <> str <> char '"' - --- | Generate a string constant. We take care to escape the string. -pprString :: String -> SDoc -pprString str - = pprString' $ hcat $ map escapeChar $ - if str `lengthIs` utf8EncodedLength str - then str - else map (chr . fromIntegral) $ BS.unpack $ bytesFS $ mkFastString str - --- | Escape a single non-unicode character -escapeChar :: Char -> SDoc -escapeChar '\\' = text "\\\\" -escapeChar '\"' = text "\\\"" -escapeChar '\n' = text "\\n" -escapeChar c - | isAscii c && isPrint c && c /= '?' -- prevents trigraph warnings - = char c - | otherwise - = char '\\' <> char (intToDigit (ch `div` 64)) <> - char (intToDigit ((ch `div` 8) `mod` 8)) <> - char (intToDigit (ch `mod` 8)) - where ch = ord c - --- | Generate an offset into another section. This is tricky because --- this is handled differently depending on platform: Mac Os expects --- us to calculate the offset using assembler arithmetic. Linux expects --- us to just reference the target directly, and will figure out on --- their own that we actually need an offset. Finally, Windows has --- a special directive to refer to relative offsets. Fun. -sectionOffset :: SDoc -> SDoc -> SDoc -sectionOffset target section = sdocWithPlatform $ \plat -> - case platformOS plat of - OSDarwin -> pprDwWord (target <> char '-' <> section) - OSMinGW32 -> text "\t.secrel32 " <> target - _other -> pprDwWord target diff --git a/compiler/nativeGen/Format.hs b/compiler/nativeGen/Format.hs deleted file mode 100644 index d7b6f6b868..0000000000 --- a/compiler/nativeGen/Format.hs +++ /dev/null @@ -1,105 +0,0 @@ --- | Formats on this architecture --- A Format is a combination of width and class --- --- TODO: Signed vs unsigned? --- --- TODO: This module is currently shared by all architectures because --- NCGMonad need to know about it to make a VReg. It would be better --- to have architecture specific formats, and do the overloading --- properly. eg SPARC doesn't care about FF80. --- -module Format ( - Format(..), - intFormat, - floatFormat, - isFloatFormat, - cmmTypeFormat, - formatToWidth, - formatInBytes -) - -where - -import GhcPrelude - -import GHC.Cmm -import Outputable - --- It looks very like the old MachRep, but it's now of purely local --- significance, here in the native code generator. You can change it --- without global consequences. --- --- A major use is as an opcode qualifier; thus the opcode --- mov.l a b --- might be encoded --- MOV II32 a b --- where the Format field encodes the ".l" part. - --- ToDo: it's not clear to me that we need separate signed-vs-unsigned formats --- here. I've removed them from the x86 version, we'll see what happens --SDM - --- ToDo: quite a few occurrences of Format could usefully be replaced by Width - -data Format - = II8 - | II16 - | II32 - | II64 - | FF32 - | FF64 - deriving (Show, Eq) - - --- | Get the integer format of this width. -intFormat :: Width -> Format -intFormat width - = case width of - W8 -> II8 - W16 -> II16 - W32 -> II32 - W64 -> II64 - other -> sorry $ "The native code generator cannot " ++ - "produce code for Format.intFormat " ++ show other - ++ "\n\tConsider using the llvm backend with -fllvm" - - --- | Get the float format of this width. -floatFormat :: Width -> Format -floatFormat width - = case width of - W32 -> FF32 - W64 -> FF64 - - other -> pprPanic "Format.floatFormat" (ppr other) - - --- | Check if a format represents a floating point value. -isFloatFormat :: Format -> Bool -isFloatFormat format - = case format of - FF32 -> True - FF64 -> True - _ -> False - - --- | Convert a Cmm type to a Format. -cmmTypeFormat :: CmmType -> Format -cmmTypeFormat ty - | isFloatType ty = floatFormat (typeWidth ty) - | otherwise = intFormat (typeWidth ty) - - --- | Get the Width of a Format. -formatToWidth :: Format -> Width -formatToWidth format - = case format of - II8 -> W8 - II16 -> W16 - II32 -> W32 - II64 -> W64 - FF32 -> W32 - FF64 -> W64 - - -formatInBytes :: Format -> Int -formatInBytes = widthInBytes . formatToWidth diff --git a/compiler/nativeGen/Instruction.hs b/compiler/nativeGen/Instruction.hs deleted file mode 100644 index 23c5ced1d8..0000000000 --- a/compiler/nativeGen/Instruction.hs +++ /dev/null @@ -1,202 +0,0 @@ - -module Instruction ( - RegUsage(..), - noUsage, - GenBasicBlock(..), blockId, - ListGraph(..), - NatCmm, - NatCmmDecl, - NatBasicBlock, - topInfoTable, - entryBlocks, - Instruction(..) -) - -where - -import GhcPrelude - -import Reg - -import GHC.Cmm.BlockId -import GHC.Cmm.Dataflow.Collections -import GHC.Cmm.Dataflow.Label -import GHC.Driver.Session -import GHC.Cmm hiding (topInfoTable) -import GHC.Platform - --- | Holds a list of source and destination registers used by a --- particular instruction. --- --- Machine registers that are pre-allocated to stgRegs are filtered --- out, because they are uninteresting from a register allocation --- standpoint. (We wouldn't want them to end up on the free list!) --- --- As far as we are concerned, the fixed registers simply don't exist --- (for allocation purposes, anyway). --- -data RegUsage - = RU [Reg] [Reg] - --- | No regs read or written to. -noUsage :: RegUsage -noUsage = RU [] [] - --- Our flavours of the Cmm types --- Type synonyms for Cmm populated with native code -type NatCmm instr - = GenCmmGroup - RawCmmStatics - (LabelMap RawCmmStatics) - (ListGraph instr) - -type NatCmmDecl statics instr - = GenCmmDecl - statics - (LabelMap RawCmmStatics) - (ListGraph instr) - - -type NatBasicBlock instr - = GenBasicBlock instr - - --- | Returns the info table associated with the CmmDecl's entry point, --- if any. -topInfoTable :: GenCmmDecl a (LabelMap i) (ListGraph b) -> Maybe i -topInfoTable (CmmProc infos _ _ (ListGraph (b:_))) - = mapLookup (blockId b) infos -topInfoTable _ - = Nothing - --- | Return the list of BlockIds in a CmmDecl that are entry points --- for this proc (i.e. they may be jumped to from outside this proc). -entryBlocks :: GenCmmDecl a (LabelMap i) (ListGraph b) -> [BlockId] -entryBlocks (CmmProc info _ _ (ListGraph code)) = entries - where - infos = mapKeys info - entries = case code of - [] -> infos - BasicBlock entry _ : _ -- first block is the entry point - | entry `elem` infos -> infos - | otherwise -> entry : infos -entryBlocks _ = [] - --- | Common things that we can do with instructions, on all architectures. --- These are used by the shared parts of the native code generator, --- specifically the register allocators. --- -class Instruction instr where - - -- | Get the registers that are being used by this instruction. - -- regUsage doesn't need to do any trickery for jumps and such. - -- Just state precisely the regs read and written by that insn. - -- The consequences of control flow transfers, as far as register - -- allocation goes, are taken care of by the register allocator. - -- - regUsageOfInstr - :: Platform - -> instr - -> RegUsage - - - -- | Apply a given mapping to all the register references in this - -- instruction. - patchRegsOfInstr - :: instr - -> (Reg -> Reg) - -> instr - - - -- | Checks whether this instruction is a jump/branch instruction. - -- One that can change the flow of control in a way that the - -- register allocator needs to worry about. - isJumpishInstr - :: instr -> Bool - - - -- | Give the possible destinations of this jump instruction. - -- Must be defined for all jumpish instructions. - jumpDestsOfInstr - :: instr -> [BlockId] - - - -- | Change the destination of this jump instruction. - -- Used in the linear allocator when adding fixup blocks for join - -- points. - patchJumpInstr - :: instr - -> (BlockId -> BlockId) - -> instr - - - -- | An instruction to spill a register into a spill slot. - mkSpillInstr - :: DynFlags - -> Reg -- ^ the reg to spill - -> Int -- ^ the current stack delta - -> Int -- ^ spill slot to use - -> instr - - - -- | An instruction to reload a register from a spill slot. - mkLoadInstr - :: DynFlags - -> Reg -- ^ the reg to reload. - -> Int -- ^ the current stack delta - -> Int -- ^ the spill slot to use - -> instr - - -- | See if this instruction is telling us the current C stack delta - takeDeltaInstr - :: instr - -> Maybe Int - - -- | Check whether this instruction is some meta thing inserted into - -- the instruction stream for other purposes. - -- - -- Not something that has to be treated as a real machine instruction - -- and have its registers allocated. - -- - -- eg, comments, delta, ldata, etc. - isMetaInstr - :: instr - -> Bool - - - - -- | Copy the value in a register to another one. - -- Must work for all register classes. - mkRegRegMoveInstr - :: Platform - -> Reg -- ^ source register - -> Reg -- ^ destination register - -> instr - - -- | Take the source and destination from this reg -> reg move instruction - -- or Nothing if it's not one - takeRegRegMoveInstr - :: instr - -> Maybe (Reg, Reg) - - -- | Make an unconditional jump instruction. - -- For architectures with branch delay slots, its ok to put - -- a NOP after the jump. Don't fill the delay slot with an - -- instruction that references regs or you'll confuse the - -- linear allocator. - mkJumpInstr - :: BlockId - -> [instr] - - - -- Subtract an amount from the C stack pointer - mkStackAllocInstr - :: Platform - -> Int - -> [instr] - - -- Add an amount to the C stack pointer - mkStackDeallocInstr - :: Platform - -> Int - -> [instr] diff --git a/compiler/nativeGen/NCGMonad.hs b/compiler/nativeGen/NCGMonad.hs deleted file mode 100644 index 5f2af49d4c..0000000000 --- a/compiler/nativeGen/NCGMonad.hs +++ /dev/null @@ -1,294 +0,0 @@ -{-# LANGUAGE CPP #-} -{-# LANGUAGE DeriveFunctor #-} -{-# LANGUAGE BangPatterns #-} - --- ----------------------------------------------------------------------------- --- --- (c) The University of Glasgow 1993-2004 --- --- The native code generator's monad. --- --- ----------------------------------------------------------------------------- - -module NCGMonad ( - NcgImpl(..), - NatM_State(..), mkNatM_State, - - NatM, -- instance Monad - initNat, - addImportNat, - addNodeBetweenNat, - addImmediateSuccessorNat, - updateCfgNat, - getUniqueNat, - mapAccumLNat, - setDeltaNat, - getDeltaNat, - getThisModuleNat, - getBlockIdNat, - getNewLabelNat, - getNewRegNat, - getNewRegPairNat, - getPicBaseMaybeNat, - getPicBaseNat, - getDynFlags, - getModLoc, - getFileId, - getDebugBlock, - - DwarfFiles -) - -where - -#include "HsVersions.h" - -import GhcPrelude - -import Reg -import Format -import TargetReg - -import GHC.Cmm.BlockId -import GHC.Cmm.Dataflow.Collections -import GHC.Cmm.Dataflow.Label -import GHC.Cmm.CLabel ( CLabel ) -import GHC.Cmm.DebugBlock -import FastString ( FastString ) -import UniqFM -import UniqSupply -import Unique ( Unique ) -import GHC.Driver.Session -import Module - -import Control.Monad ( ap ) - -import Instruction -import Outputable (SDoc, pprPanic, ppr) -import GHC.Cmm (RawCmmDecl, RawCmmStatics) -import CFG - -data NcgImpl statics instr jumpDest = NcgImpl { - cmmTopCodeGen :: RawCmmDecl -> NatM [NatCmmDecl statics instr], - generateJumpTableForInstr :: instr -> Maybe (NatCmmDecl statics instr), - getJumpDestBlockId :: jumpDest -> Maybe BlockId, - canShortcut :: instr -> Maybe jumpDest, - shortcutStatics :: (BlockId -> Maybe jumpDest) -> statics -> statics, - shortcutJump :: (BlockId -> Maybe jumpDest) -> instr -> instr, - pprNatCmmDecl :: NatCmmDecl statics instr -> SDoc, - maxSpillSlots :: Int, - allocatableRegs :: [RealReg], - ncgExpandTop :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr], - ncgAllocMoreStack :: Int -> NatCmmDecl statics instr - -> UniqSM (NatCmmDecl statics instr, [(BlockId,BlockId)]), - -- ^ The list of block ids records the redirected jumps to allow us to update - -- the CFG. - ncgMakeFarBranches :: LabelMap RawCmmStatics -> [NatBasicBlock instr] -> [NatBasicBlock instr], - extractUnwindPoints :: [instr] -> [UnwindPoint], - -- ^ given the instruction sequence of a block, produce a list of - -- the block's 'UnwindPoint's - -- See Note [What is this unwinding business?] in Debug - -- and Note [Unwinding information in the NCG] in this module. - invertCondBranches :: Maybe CFG -> LabelMap RawCmmStatics -> [NatBasicBlock instr] - -> [NatBasicBlock instr] - -- ^ Turn the sequence of `jcc l1; jmp l2` into `jncc l2; <block_l1>` - -- when possible. - } - -data NatM_State - = NatM_State { - natm_us :: UniqSupply, - natm_delta :: Int, - natm_imports :: [(CLabel)], - natm_pic :: Maybe Reg, - natm_dflags :: DynFlags, - natm_this_module :: Module, - natm_modloc :: ModLocation, - natm_fileid :: DwarfFiles, - natm_debug_map :: LabelMap DebugBlock, - natm_cfg :: CFG - -- ^ Having a CFG with additional information is essential for some - -- operations. However we can't reconstruct all information once we - -- generated instructions. So instead we update the CFG as we go. - } - -type DwarfFiles = UniqFM (FastString, Int) - -newtype NatM result = NatM (NatM_State -> (result, NatM_State)) - deriving (Functor) - -unNat :: NatM a -> NatM_State -> (a, NatM_State) -unNat (NatM a) = a - -mkNatM_State :: UniqSupply -> Int -> DynFlags -> Module -> ModLocation -> - DwarfFiles -> LabelMap DebugBlock -> CFG -> NatM_State -mkNatM_State us delta dflags this_mod - = \loc dwf dbg cfg -> - NatM_State - { natm_us = us - , natm_delta = delta - , natm_imports = [] - , natm_pic = Nothing - , natm_dflags = dflags - , natm_this_module = this_mod - , natm_modloc = loc - , natm_fileid = dwf - , natm_debug_map = dbg - , natm_cfg = cfg - } - -initNat :: NatM_State -> NatM a -> (a, NatM_State) -initNat init_st m - = case unNat m init_st of { (r,st) -> (r,st) } - -instance Applicative NatM where - pure = returnNat - (<*>) = ap - -instance Monad NatM where - (>>=) = thenNat - -instance MonadUnique NatM where - getUniqueSupplyM = NatM $ \st -> - case splitUniqSupply (natm_us st) of - (us1, us2) -> (us1, st {natm_us = us2}) - - getUniqueM = NatM $ \st -> - case takeUniqFromSupply (natm_us st) of - (uniq, us') -> (uniq, st {natm_us = us'}) - -thenNat :: NatM a -> (a -> NatM b) -> NatM b -thenNat expr cont - = NatM $ \st -> case unNat expr st of - (result, st') -> unNat (cont result) st' - -returnNat :: a -> NatM a -returnNat result - = NatM $ \st -> (result, st) - -mapAccumLNat :: (acc -> x -> NatM (acc, y)) - -> acc - -> [x] - -> NatM (acc, [y]) - -mapAccumLNat _ b [] - = return (b, []) -mapAccumLNat f b (x:xs) - = do (b__2, x__2) <- f b x - (b__3, xs__2) <- mapAccumLNat f b__2 xs - return (b__3, x__2:xs__2) - -getUniqueNat :: NatM Unique -getUniqueNat = NatM $ \ st -> - case takeUniqFromSupply $ natm_us st of - (uniq, us') -> (uniq, st {natm_us = us'}) - -instance HasDynFlags NatM where - getDynFlags = NatM $ \ st -> (natm_dflags st, st) - - -getDeltaNat :: NatM Int -getDeltaNat = NatM $ \ st -> (natm_delta st, st) - - -setDeltaNat :: Int -> NatM () -setDeltaNat delta = NatM $ \ st -> ((), st {natm_delta = delta}) - - -getThisModuleNat :: NatM Module -getThisModuleNat = NatM $ \ st -> (natm_this_module st, st) - - -addImportNat :: CLabel -> NatM () -addImportNat imp - = NatM $ \ st -> ((), st {natm_imports = imp : natm_imports st}) - -updateCfgNat :: (CFG -> CFG) -> NatM () -updateCfgNat f - = NatM $ \ st -> let !cfg' = f (natm_cfg st) - in ((), st { natm_cfg = cfg'}) - --- | Record that we added a block between `from` and `old`. -addNodeBetweenNat :: BlockId -> BlockId -> BlockId -> NatM () -addNodeBetweenNat from between to - = do df <- getDynFlags - let jmpWeight = fromIntegral . uncondWeight . - cfgWeightInfo $ df - updateCfgNat (updateCfg jmpWeight from between to) - where - -- When transforming A -> B to A -> A' -> B - -- A -> A' keeps the old edge info while - -- A' -> B gets the info for an unconditional - -- jump. - updateCfg weight from between old m - | Just info <- getEdgeInfo from old m - = addEdge from between info . - addWeightEdge between old weight . - delEdge from old $ m - | otherwise - = pprPanic "Failed to update cfg: Untracked edge" (ppr (from,to)) - - --- | Place `succ` after `block` and change any edges --- block -> X to `succ` -> X -addImmediateSuccessorNat :: BlockId -> BlockId -> NatM () -addImmediateSuccessorNat block succ - = updateCfgNat (addImmediateSuccessor block succ) - -getBlockIdNat :: NatM BlockId -getBlockIdNat - = do u <- getUniqueNat - return (mkBlockId u) - - -getNewLabelNat :: NatM CLabel -getNewLabelNat - = blockLbl <$> getBlockIdNat - - -getNewRegNat :: Format -> NatM Reg -getNewRegNat rep - = do u <- getUniqueNat - dflags <- getDynFlags - return (RegVirtual $ targetMkVirtualReg (targetPlatform dflags) u rep) - - -getNewRegPairNat :: Format -> NatM (Reg,Reg) -getNewRegPairNat rep - = do u <- getUniqueNat - dflags <- getDynFlags - let vLo = targetMkVirtualReg (targetPlatform dflags) u rep - let lo = RegVirtual $ targetMkVirtualReg (targetPlatform dflags) u rep - let hi = RegVirtual $ getHiVirtualRegFromLo vLo - return (lo, hi) - - -getPicBaseMaybeNat :: NatM (Maybe Reg) -getPicBaseMaybeNat - = NatM (\state -> (natm_pic state, state)) - - -getPicBaseNat :: Format -> NatM Reg -getPicBaseNat rep - = do mbPicBase <- getPicBaseMaybeNat - case mbPicBase of - Just picBase -> return picBase - Nothing - -> do - reg <- getNewRegNat rep - NatM (\state -> (reg, state { natm_pic = Just reg })) - -getModLoc :: NatM ModLocation -getModLoc - = NatM $ \ st -> (natm_modloc st, st) - -getFileId :: FastString -> NatM Int -getFileId f = NatM $ \st -> - case lookupUFM (natm_fileid st) f of - Just (_,n) -> (n, st) - Nothing -> let n = 1 + sizeUFM (natm_fileid st) - fids = addToUFM (natm_fileid st) f (f,n) - in n `seq` fids `seq` (n, st { natm_fileid = fids }) - -getDebugBlock :: Label -> NatM (Maybe DebugBlock) -getDebugBlock l = NatM $ \st -> (mapLookup l (natm_debug_map st), st) diff --git a/compiler/nativeGen/NOTES b/compiler/nativeGen/NOTES deleted file mode 100644 index 9068a7fc2c..0000000000 --- a/compiler/nativeGen/NOTES +++ /dev/null @@ -1,41 +0,0 @@ -TODO in new NCG -~~~~~~~~~~~~~~~ - -- Are we being careful enough about narrowing those out-of-range CmmInts? - -- Register allocator: - - fixup code - - keep track of free stack slots - - Optimisations: - - - picking the assignment on entry to a block: better to defer this - until we know all the assignments. In a loop, we should pick - the assignment from the looping jump (fixpointing?), so that any - fixup code ends up *outside* the loop. Otherwise, we should - pick the assignment that results in the least fixup code. - -- splitting? - --- ----------------------------------------------------------------------------- --- x86 ToDos - -- x86 genCCall needs to tack on the @size for stdcalls (might not be in the - foreignlabel). - -- x86: should really clean up that IMUL64 stuff, and tell the code gen about - Intel imul instructions. - -- x86: we're not careful enough about making sure that we only use - byte-addressable registers in byte instructions. Should we do it this - way, or stick to using 32-bit registers everywhere? - -- Use SSE for floating point, optionally. - ------------------------------------------------------------------------------- --- Further optimisations: - -- We might be able to extend the scope of the inlining phase so it can - skip over more statements that don't affect the value of the inlined - expr. - diff --git a/compiler/nativeGen/PIC.hs b/compiler/nativeGen/PIC.hs deleted file mode 100644 index 5c217f2fe6..0000000000 --- a/compiler/nativeGen/PIC.hs +++ /dev/null @@ -1,838 +0,0 @@ -{- - This module handles generation of position independent code and - dynamic-linking related issues for the native code generator. - - This depends both the architecture and OS, so we define it here - instead of in one of the architecture specific modules. - - Things outside this module which are related to this: - - + module CLabel - - PIC base label (pretty printed as local label 1) - - DynamicLinkerLabels - several kinds: - CodeStub, SymbolPtr, GotSymbolPtr, GotSymbolOffset - - labelDynamic predicate - + module Cmm - - The GlobalReg datatype has a PicBaseReg constructor - - The CmmLit datatype has a CmmLabelDiffOff constructor - + codeGen & RTS - - When tablesNextToCode, no absolute addresses are stored in info tables - any more. Instead, offsets from the info label are used. - - For Win32 only, SRTs might contain addresses of __imp_ symbol pointers - because Win32 doesn't support external references in data sections. - TODO: make sure this still works, it might be bitrotted - + NCG - - The cmmToCmm pass in AsmCodeGen calls cmmMakeDynamicReference for all - labels. - - nativeCodeGen calls pprImportedSymbol and pprGotDeclaration to output - all the necessary stuff for imported symbols. - - The NCG monad keeps track of a list of imported symbols. - - MachCodeGen invokes initializePicBase to generate code to initialize - the PIC base register when needed. - - MachCodeGen calls cmmMakeDynamicReference whenever it uses a CLabel - that wasn't in the original Cmm code (e.g. floating point literals). --} - -module PIC ( - cmmMakeDynamicReference, - CmmMakeDynamicReferenceM(..), - ReferenceKind(..), - needImportedSymbols, - pprImportedSymbol, - pprGotDeclaration, - - initializePicBase_ppc, - initializePicBase_x86 -) - -where - -import GhcPrelude - -import qualified PPC.Instr as PPC -import qualified PPC.Regs as PPC - -import qualified X86.Instr as X86 - -import GHC.Platform -import Instruction -import Reg -import NCGMonad - - -import GHC.Cmm.Dataflow.Collections -import GHC.Cmm -import GHC.Cmm.CLabel ( CLabel, ForeignLabelSource(..), pprCLabel, - mkDynamicLinkerLabel, DynamicLinkerLabelInfo(..), - dynamicLinkerLabelInfo, mkPicBaseLabel, - labelDynamic, externallyVisibleCLabel ) - -import GHC.Cmm.CLabel ( mkForeignLabel ) - - -import BasicTypes -import Module - -import Outputable - -import GHC.Driver.Session -import FastString - - - --------------------------------------------------------------------------------- --- It gets called by the cmmToCmm pass for every CmmLabel in the Cmm --- code. It does The Right Thing(tm) to convert the CmmLabel into a --- position-independent, dynamic-linking-aware reference to the thing --- in question. --- Note that this also has to be called from MachCodeGen in order to --- access static data like floating point literals (labels that were --- created after the cmmToCmm pass). --- The function must run in a monad that can keep track of imported symbols --- A function for recording an imported symbol must be passed in: --- - addImportCmmOpt for the CmmOptM monad --- - addImportNat for the NatM monad. - -data ReferenceKind - = DataReference - | CallReference - | JumpReference - deriving(Eq) - -class Monad m => CmmMakeDynamicReferenceM m where - addImport :: CLabel -> m () - getThisModule :: m Module - -instance CmmMakeDynamicReferenceM NatM where - addImport = addImportNat - getThisModule = getThisModuleNat - -cmmMakeDynamicReference - :: CmmMakeDynamicReferenceM m - => DynFlags - -> ReferenceKind -- whether this is the target of a jump - -> CLabel -- the label - -> m CmmExpr - -cmmMakeDynamicReference dflags referenceKind lbl - | Just _ <- dynamicLinkerLabelInfo lbl - = return $ CmmLit $ CmmLabel lbl -- already processed it, pass through - - | otherwise - = do this_mod <- getThisModule - case howToAccessLabel - dflags - (platformArch $ targetPlatform dflags) - (platformOS $ targetPlatform dflags) - this_mod - referenceKind lbl of - - AccessViaStub -> do - let stub = mkDynamicLinkerLabel CodeStub lbl - addImport stub - return $ CmmLit $ CmmLabel stub - - AccessViaSymbolPtr -> do - let symbolPtr = mkDynamicLinkerLabel SymbolPtr lbl - addImport symbolPtr - return $ CmmLoad (cmmMakePicReference dflags symbolPtr) (bWord dflags) - - AccessDirectly -> case referenceKind of - -- for data, we might have to make some calculations: - DataReference -> return $ cmmMakePicReference dflags lbl - -- all currently supported processors support - -- PC-relative branch and call instructions, - -- so just jump there if it's a call or a jump - _ -> return $ CmmLit $ CmmLabel lbl - - --- ----------------------------------------------------------------------------- --- Create a position independent reference to a label. --- (but do not bother with dynamic linking). --- We calculate the label's address by adding some (platform-dependent) --- offset to our base register; this offset is calculated by --- the function picRelative in the platform-dependent part below. - -cmmMakePicReference :: DynFlags -> CLabel -> CmmExpr -cmmMakePicReference dflags lbl - - -- Windows doesn't need PIC, - -- everything gets relocated at runtime - | OSMinGW32 <- platformOS $ targetPlatform dflags - = CmmLit $ CmmLabel lbl - - | OSAIX <- platformOS $ targetPlatform dflags - = CmmMachOp (MO_Add W32) - [ CmmReg (CmmGlobal PicBaseReg) - , CmmLit $ picRelative dflags - (platformArch $ targetPlatform dflags) - (platformOS $ targetPlatform dflags) - lbl ] - - -- both ABI versions default to medium code model - | ArchPPC_64 _ <- platformArch $ targetPlatform dflags - = CmmMachOp (MO_Add W32) -- code model medium - [ CmmReg (CmmGlobal PicBaseReg) - , CmmLit $ picRelative dflags - (platformArch $ targetPlatform dflags) - (platformOS $ targetPlatform dflags) - lbl ] - - | (positionIndependent dflags || gopt Opt_ExternalDynamicRefs dflags) - && absoluteLabel lbl - = CmmMachOp (MO_Add (wordWidth dflags)) - [ CmmReg (CmmGlobal PicBaseReg) - , CmmLit $ picRelative dflags - (platformArch $ targetPlatform dflags) - (platformOS $ targetPlatform dflags) - lbl ] - - | otherwise - = CmmLit $ CmmLabel lbl - - -absoluteLabel :: CLabel -> Bool -absoluteLabel lbl - = case dynamicLinkerLabelInfo lbl of - Just (GotSymbolPtr, _) -> False - Just (GotSymbolOffset, _) -> False - _ -> True - - --------------------------------------------------------------------------------- --- Knowledge about how special dynamic linker labels like symbol --- pointers, code stubs and GOT offsets look like is located in the --- module CLabel. - --- We have to decide which labels need to be accessed --- indirectly or via a piece of stub code. -data LabelAccessStyle - = AccessViaStub - | AccessViaSymbolPtr - | AccessDirectly - -howToAccessLabel - :: DynFlags -> Arch -> OS -> Module -> ReferenceKind -> CLabel -> LabelAccessStyle - - --- Windows --- In Windows speak, a "module" is a set of objects linked into the --- same Portable Executable (PE) file. (both .exe and .dll files are PEs). --- --- If we're compiling a multi-module program then symbols from other modules --- are accessed by a symbol pointer named __imp_SYMBOL. At runtime we have the --- following. --- --- (in the local module) --- __imp_SYMBOL: addr of SYMBOL --- --- (in the other module) --- SYMBOL: the real function / data. --- --- To access the function at SYMBOL from our local module, we just need to --- dereference the local __imp_SYMBOL. --- --- If not compiling with -dynamic we assume that all our code will be linked --- into the same .exe file. In this case we always access symbols directly, --- and never use __imp_SYMBOL. --- -howToAccessLabel dflags _ OSMinGW32 this_mod _ lbl - - -- Assume all symbols will be in the same PE, so just access them directly. - | not (gopt Opt_ExternalDynamicRefs dflags) - = AccessDirectly - - -- If the target symbol is in another PE we need to access it via the - -- appropriate __imp_SYMBOL pointer. - | labelDynamic dflags this_mod lbl - = AccessViaSymbolPtr - - -- Target symbol is in the same PE as the caller, so just access it directly. - | otherwise - = AccessDirectly - - --- Mach-O (Darwin, Mac OS X) --- --- Indirect access is required in the following cases: --- * things imported from a dynamic library --- * (not on x86_64) data from a different module, if we're generating PIC code --- It is always possible to access something indirectly, --- even when it's not necessary. --- -howToAccessLabel dflags arch OSDarwin this_mod DataReference lbl - -- data access to a dynamic library goes via a symbol pointer - | labelDynamic dflags this_mod lbl - = AccessViaSymbolPtr - - -- when generating PIC code, all cross-module data references must - -- must go via a symbol pointer, too, because the assembler - -- cannot generate code for a label difference where one - -- label is undefined. Doesn't apply t x86_64. - -- Unfortunately, we don't know whether it's cross-module, - -- so we do it for all externally visible labels. - -- This is a slight waste of time and space, but otherwise - -- we'd need to pass the current Module all the way in to - -- this function. - | arch /= ArchX86_64 - , positionIndependent dflags && externallyVisibleCLabel lbl - = AccessViaSymbolPtr - - | otherwise - = AccessDirectly - -howToAccessLabel dflags arch OSDarwin this_mod JumpReference lbl - -- dyld code stubs don't work for tailcalls because the - -- stack alignment is only right for regular calls. - -- Therefore, we have to go via a symbol pointer: - | arch == ArchX86 || arch == ArchX86_64 - , labelDynamic dflags this_mod lbl - = AccessViaSymbolPtr - - -howToAccessLabel dflags arch OSDarwin this_mod _ lbl - -- Code stubs are the usual method of choice for imported code; - -- not needed on x86_64 because Apple's new linker, ld64, generates - -- them automatically. - | arch /= ArchX86_64 - , labelDynamic dflags this_mod lbl - = AccessViaStub - - | otherwise - = AccessDirectly - - ----------------------------------------------------------------------------- --- AIX - --- quite simple (for now) -howToAccessLabel _dflags _arch OSAIX _this_mod kind _lbl - = case kind of - DataReference -> AccessViaSymbolPtr - CallReference -> AccessDirectly - JumpReference -> AccessDirectly - --- ELF (Linux) --- --- ELF tries to pretend to the main application code that dynamic linking does --- not exist. While this may sound convenient, it tends to mess things up in --- very bad ways, so we have to be careful when we generate code for a non-PIE --- main program (-dynamic but no -fPIC). --- --- Indirect access is required for references to imported symbols --- from position independent code. It is also required from the main program --- when dynamic libraries containing Haskell code are used. - -howToAccessLabel _ (ArchPPC_64 _) os _ kind _ - | osElfTarget os - = case kind of - -- ELF PPC64 (powerpc64-linux), AIX, MacOS 9, BeOS/PPC - DataReference -> AccessViaSymbolPtr - -- RTLD does not generate stubs for function descriptors - -- in tail calls. Create a symbol pointer and generate - -- the code to load the function descriptor at the call site. - JumpReference -> AccessViaSymbolPtr - -- regular calls are handled by the runtime linker - _ -> AccessDirectly - -howToAccessLabel dflags _ os _ _ _ - -- no PIC -> the dynamic linker does everything for us; - -- if we don't dynamically link to Haskell code, - -- it actually manages to do so without messing things up. - | osElfTarget os - , not (positionIndependent dflags) && - not (gopt Opt_ExternalDynamicRefs dflags) - = AccessDirectly - -howToAccessLabel dflags arch os this_mod DataReference lbl - | osElfTarget os - = case () of - -- A dynamic label needs to be accessed via a symbol pointer. - _ | labelDynamic dflags this_mod lbl - -> AccessViaSymbolPtr - - -- For PowerPC32 -fPIC, we have to access even static data - -- via a symbol pointer (see below for an explanation why - -- PowerPC32 Linux is especially broken). - | arch == ArchPPC - , positionIndependent dflags - -> AccessViaSymbolPtr - - | otherwise - -> AccessDirectly - - - -- In most cases, we have to avoid symbol stubs on ELF, for the following reasons: - -- on i386, the position-independent symbol stubs in the Procedure Linkage Table - -- require the address of the GOT to be loaded into register %ebx on entry. - -- The linker will take any reference to the symbol stub as a hint that - -- the label in question is a code label. When linking executables, this - -- will cause the linker to replace even data references to the label with - -- references to the symbol stub. - - -- This leaves calling a (foreign) function from non-PIC code - -- (AccessDirectly, because we get an implicit symbol stub) - -- and calling functions from PIC code on non-i386 platforms (via a symbol stub) - -howToAccessLabel dflags arch os this_mod CallReference lbl - | osElfTarget os - , labelDynamic dflags this_mod lbl && not (positionIndependent dflags) - = AccessDirectly - - | osElfTarget os - , arch /= ArchX86 - , labelDynamic dflags this_mod lbl - , positionIndependent dflags - = AccessViaStub - -howToAccessLabel dflags _ os this_mod _ lbl - | osElfTarget os - = if labelDynamic dflags this_mod lbl - then AccessViaSymbolPtr - else AccessDirectly - --- all other platforms -howToAccessLabel dflags _ _ _ _ _ - | not (positionIndependent dflags) - = AccessDirectly - - | otherwise - = panic "howToAccessLabel: PIC not defined for this platform" - - - --- ------------------------------------------------------------------- --- | Says what we have to add to our 'PIC base register' in order to --- get the address of a label. - -picRelative :: DynFlags -> Arch -> OS -> CLabel -> CmmLit - --- Darwin, but not x86_64: --- The PIC base register points to the PIC base label at the beginning --- of the current CmmDecl. We just have to use a label difference to --- get the offset. --- We have already made sure that all labels that are not from the current --- module are accessed indirectly ('as' can't calculate differences between --- undefined labels). -picRelative dflags arch OSDarwin lbl - | arch /= ArchX86_64 - = CmmLabelDiffOff lbl mkPicBaseLabel 0 (wordWidth dflags) - --- On AIX we use an indirect local TOC anchored by 'gotLabel'. --- This way we use up only one global TOC entry per compilation-unit --- (this is quite similar to GCC's @-mminimal-toc@ compilation mode) -picRelative dflags _ OSAIX lbl - = CmmLabelDiffOff lbl gotLabel 0 (wordWidth dflags) - --- PowerPC Linux: --- The PIC base register points to our fake GOT. Use a label difference --- to get the offset. --- We have made sure that *everything* is accessed indirectly, so this --- is only used for offsets from the GOT to symbol pointers inside the --- GOT. -picRelative dflags ArchPPC os lbl - | osElfTarget os - = CmmLabelDiffOff lbl gotLabel 0 (wordWidth dflags) - - --- Most Linux versions: --- The PIC base register points to the GOT. Use foo@got for symbol --- pointers, and foo@gotoff for everything else. --- Linux and Darwin on x86_64: --- The PIC base register is %rip, we use foo@gotpcrel for symbol pointers, --- and a GotSymbolOffset label for other things. --- For reasons of tradition, the symbol offset label is written as a plain label. -picRelative _ arch os lbl - | osElfTarget os || (os == OSDarwin && arch == ArchX86_64) - = let result - | Just (SymbolPtr, lbl') <- dynamicLinkerLabelInfo lbl - = CmmLabel $ mkDynamicLinkerLabel GotSymbolPtr lbl' - - | otherwise - = CmmLabel $ mkDynamicLinkerLabel GotSymbolOffset lbl - - in result - -picRelative _ _ _ _ - = panic "PositionIndependentCode.picRelative undefined for this platform" - - - --------------------------------------------------------------------------------- - -needImportedSymbols :: DynFlags -> Arch -> OS -> Bool -needImportedSymbols dflags arch os - | os == OSDarwin - , arch /= ArchX86_64 - = True - - | os == OSAIX - = True - - -- PowerPC Linux: -fPIC or -dynamic - | osElfTarget os - , arch == ArchPPC - = positionIndependent dflags || gopt Opt_ExternalDynamicRefs dflags - - -- PowerPC 64 Linux: always - | osElfTarget os - , arch == ArchPPC_64 ELF_V1 || arch == ArchPPC_64 ELF_V2 - = True - - -- i386 (and others?): -dynamic but not -fPIC - | osElfTarget os - , arch /= ArchPPC_64 ELF_V1 && arch /= ArchPPC_64 ELF_V2 - = gopt Opt_ExternalDynamicRefs dflags && - not (positionIndependent dflags) - - | otherwise - = False - --- gotLabel --- The label used to refer to our "fake GOT" from --- position-independent code. -gotLabel :: CLabel -gotLabel - -- HACK: this label isn't really foreign - = mkForeignLabel - (fsLit ".LCTOC1") - Nothing ForeignLabelInThisPackage IsData - - - --------------------------------------------------------------------------------- --- We don't need to declare any offset tables. --- However, for PIC on x86, we need a small helper function. -pprGotDeclaration :: DynFlags -> Arch -> OS -> SDoc -pprGotDeclaration dflags ArchX86 OSDarwin - | positionIndependent dflags - = vcat [ - text ".section __TEXT,__textcoal_nt,coalesced,no_toc", - text ".weak_definition ___i686.get_pc_thunk.ax", - text ".private_extern ___i686.get_pc_thunk.ax", - text "___i686.get_pc_thunk.ax:", - text "\tmovl (%esp), %eax", - text "\tret" ] - -pprGotDeclaration _ _ OSDarwin - = empty - --- Emit XCOFF TOC section -pprGotDeclaration _ _ OSAIX - = vcat $ [ text ".toc" - , text ".tc ghc_toc_table[TC],.LCTOC1" - , text ".csect ghc_toc_table[RW]" - -- See Note [.LCTOC1 in PPC PIC code] - , text ".set .LCTOC1,$+0x8000" - ] - - --- PPC 64 ELF v1 needs a Table Of Contents (TOC) -pprGotDeclaration _ (ArchPPC_64 ELF_V1) _ - = text ".section \".toc\",\"aw\"" --- In ELF v2 we also need to tell the assembler that we want ABI --- version 2. This would normally be done at the top of the file --- right after a file directive, but I could not figure out how --- to do that. -pprGotDeclaration _ (ArchPPC_64 ELF_V2) _ - = vcat [ text ".abiversion 2", - text ".section \".toc\",\"aw\"" - ] - --- Emit GOT declaration --- Output whatever needs to be output once per .s file. -pprGotDeclaration dflags arch os - | osElfTarget os - , arch /= ArchPPC_64 ELF_V1 && arch /= ArchPPC_64 ELF_V2 - , not (positionIndependent dflags) - = empty - - | osElfTarget os - , arch /= ArchPPC_64 ELF_V1 && arch /= ArchPPC_64 ELF_V2 - = vcat [ - -- See Note [.LCTOC1 in PPC PIC code] - text ".section \".got2\",\"aw\"", - text ".LCTOC1 = .+32768" ] - -pprGotDeclaration _ _ _ - = panic "pprGotDeclaration: no match" - - --------------------------------------------------------------------------------- --- On Darwin, we have to generate our own stub code for lazy binding.. --- For each processor architecture, there are two versions, one for PIC --- and one for non-PIC. --- - -pprImportedSymbol :: DynFlags -> Platform -> CLabel -> SDoc -pprImportedSymbol dflags (Platform { platformMini = PlatformMini { platformMini_arch = ArchX86, platformMini_os = OSDarwin } }) importedLbl - | Just (CodeStub, lbl) <- dynamicLinkerLabelInfo importedLbl - = case positionIndependent dflags of - False -> - vcat [ - text ".symbol_stub", - text "L" <> pprCLabel dflags lbl <> ptext (sLit "$stub:"), - text "\t.indirect_symbol" <+> pprCLabel dflags lbl, - text "\tjmp *L" <> pprCLabel dflags lbl - <> text "$lazy_ptr", - text "L" <> pprCLabel dflags lbl - <> text "$stub_binder:", - text "\tpushl $L" <> pprCLabel dflags lbl - <> text "$lazy_ptr", - text "\tjmp dyld_stub_binding_helper" - ] - True -> - vcat [ - text ".section __TEXT,__picsymbolstub2," - <> text "symbol_stubs,pure_instructions,25", - text "L" <> pprCLabel dflags lbl <> ptext (sLit "$stub:"), - text "\t.indirect_symbol" <+> pprCLabel dflags lbl, - text "\tcall ___i686.get_pc_thunk.ax", - text "1:", - text "\tmovl L" <> pprCLabel dflags lbl - <> text "$lazy_ptr-1b(%eax),%edx", - text "\tjmp *%edx", - text "L" <> pprCLabel dflags lbl - <> text "$stub_binder:", - text "\tlea L" <> pprCLabel dflags lbl - <> text "$lazy_ptr-1b(%eax),%eax", - text "\tpushl %eax", - text "\tjmp dyld_stub_binding_helper" - ] - $+$ vcat [ text ".section __DATA, __la_sym_ptr" - <> (if positionIndependent dflags then int 2 else int 3) - <> text ",lazy_symbol_pointers", - text "L" <> pprCLabel dflags lbl <> ptext (sLit "$lazy_ptr:"), - text "\t.indirect_symbol" <+> pprCLabel dflags lbl, - text "\t.long L" <> pprCLabel dflags lbl - <> text "$stub_binder"] - - | Just (SymbolPtr, lbl) <- dynamicLinkerLabelInfo importedLbl - = vcat [ - text ".non_lazy_symbol_pointer", - char 'L' <> pprCLabel dflags lbl <> text "$non_lazy_ptr:", - text "\t.indirect_symbol" <+> pprCLabel dflags lbl, - text "\t.long\t0"] - - | otherwise - = empty - - -pprImportedSymbol _ (Platform { platformMini = PlatformMini { platformMini_os = OSDarwin } }) _ - = empty - --- XCOFF / AIX --- --- Similar to PPC64 ELF v1, there's dedicated TOC register (r2). To --- workaround the limitation of a global TOC we use an indirect TOC --- with the label `ghc_toc_table`. --- --- See also GCC's `-mminimal-toc` compilation mode or --- http://www.ibm.com/developerworks/rational/library/overview-toc-aix/ --- --- NB: No DSO-support yet - -pprImportedSymbol dflags (Platform { platformMini = PlatformMini { platformMini_os = OSAIX } }) importedLbl - = case dynamicLinkerLabelInfo importedLbl of - Just (SymbolPtr, lbl) - -> vcat [ - text "LC.." <> pprCLabel dflags lbl <> char ':', - text "\t.long" <+> pprCLabel dflags lbl ] - _ -> empty - --- ELF / Linux --- --- In theory, we don't need to generate any stubs or symbol pointers --- by hand for Linux. --- --- Reality differs from this in two areas. --- --- 1) If we just use a dynamically imported symbol directly in a read-only --- section of the main executable (as GCC does), ld generates R_*_COPY --- relocations, which are fundamentally incompatible with reversed info --- tables. Therefore, we need a table of imported addresses in a writable --- section. --- The "official" GOT mechanism (label@got) isn't intended to be used --- in position dependent code, so we have to create our own "fake GOT" --- when not Opt_PIC && WayDyn `elem` ways dflags. --- --- 2) PowerPC Linux is just plain broken. --- While it's theoretically possible to use GOT offsets larger --- than 16 bit, the standard crt*.o files don't, which leads to --- linker errors as soon as the GOT size exceeds 16 bit. --- Also, the assembler doesn't support @gotoff labels. --- In order to be able to use a larger GOT, we have to circumvent the --- entire GOT mechanism and do it ourselves (this is also what GCC does). - - --- When needImportedSymbols is defined, --- the NCG will keep track of all DynamicLinkerLabels it uses --- and output each of them using pprImportedSymbol. - -pprImportedSymbol dflags platform@(Platform { platformMini = PlatformMini { platformMini_arch = ArchPPC_64 _ } }) - importedLbl - | osElfTarget (platformOS platform) - = case dynamicLinkerLabelInfo importedLbl of - Just (SymbolPtr, lbl) - -> vcat [ - text ".section \".toc\", \"aw\"", - text ".LC_" <> pprCLabel dflags lbl <> char ':', - text "\t.quad" <+> pprCLabel dflags lbl ] - _ -> empty - -pprImportedSymbol dflags platform importedLbl - | osElfTarget (platformOS platform) - = case dynamicLinkerLabelInfo importedLbl of - Just (SymbolPtr, lbl) - -> let symbolSize = case wordWidth dflags of - W32 -> sLit "\t.long" - W64 -> sLit "\t.quad" - _ -> panic "Unknown wordRep in pprImportedSymbol" - - in vcat [ - text ".section \".got2\", \"aw\"", - text ".LC_" <> pprCLabel dflags lbl <> char ':', - ptext symbolSize <+> pprCLabel dflags lbl ] - - -- PLT code stubs are generated automatically by the dynamic linker. - _ -> empty - -pprImportedSymbol _ _ _ - = panic "PIC.pprImportedSymbol: no match" - --------------------------------------------------------------------------------- --- Generate code to calculate the address that should be put in the --- PIC base register. --- This is called by MachCodeGen for every CmmProc that accessed the --- PIC base register. It adds the appropriate instructions to the --- top of the CmmProc. - --- It is assumed that the first NatCmmDecl in the input list is a Proc --- and the rest are CmmDatas. - --- Darwin is simple: just fetch the address of a local label. --- The FETCHPC pseudo-instruction is expanded to multiple instructions --- during pretty-printing so that we don't have to deal with the --- local label: - --- PowerPC version: --- bcl 20,31,1f. --- 1: mflr picReg - --- i386 version: --- call 1f --- 1: popl %picReg - - - --- Get a pointer to our own fake GOT, which is defined on a per-module basis. --- This is exactly how GCC does it in linux. - -initializePicBase_ppc - :: Arch -> OS -> Reg - -> [NatCmmDecl RawCmmStatics PPC.Instr] - -> NatM [NatCmmDecl RawCmmStatics PPC.Instr] - -initializePicBase_ppc ArchPPC os picReg - (CmmProc info lab live (ListGraph blocks) : statics) - | osElfTarget os - = do - let - gotOffset = PPC.ImmConstantDiff - (PPC.ImmCLbl gotLabel) - (PPC.ImmCLbl mkPicBaseLabel) - - blocks' = case blocks of - [] -> [] - (b:bs) -> fetchPC b : map maybeFetchPC bs - - maybeFetchPC b@(BasicBlock bID _) - | bID `mapMember` info = fetchPC b - | otherwise = b - - -- GCC does PIC prologs thusly: - -- bcl 20,31,.L1 - -- .L1: - -- mflr 30 - -- addis 30,30,.LCTOC1-.L1@ha - -- addi 30,30,.LCTOC1-.L1@l - -- TODO: below we use it over temporary register, - -- it can and should be optimised by picking - -- correct PIC reg. - fetchPC (BasicBlock bID insns) = - BasicBlock bID (PPC.FETCHPC picReg - : PPC.ADDIS picReg picReg (PPC.HA gotOffset) - : PPC.ADD picReg picReg - (PPC.RIImm (PPC.LO gotOffset)) - : PPC.MR PPC.r30 picReg - : insns) - - return (CmmProc info lab live (ListGraph blocks') : statics) - -------------------------------------------------------------------------- --- Load TOC into register 2 --- PowerPC 64-bit ELF ABI 2.0 requires the address of the callee --- in register 12. --- We pass the label to FETCHTOC and create a .localentry too. --- TODO: Explain this better and refer to ABI spec! -{- -We would like to do approximately this, but spill slot allocation -might be added before the first BasicBlock. That violates the ABI. - -For now we will emit the prologue code in the pretty printer, -which is also what we do for ELF v1. -initializePicBase_ppc (ArchPPC_64 ELF_V2) OSLinux picReg - (CmmProc info lab live (ListGraph (entry:blocks)) : statics) - = do - bID <-getUniqueM - return (CmmProc info lab live (ListGraph (b':entry:blocks)) - : statics) - where BasicBlock entryID _ = entry - b' = BasicBlock bID [PPC.FETCHTOC picReg lab, - PPC.BCC PPC.ALWAYS entryID] --} - -initializePicBase_ppc _ _ _ _ - = panic "initializePicBase_ppc: not needed" - - --- We cheat a bit here by defining a pseudo-instruction named FETCHGOT --- which pretty-prints as: --- call 1f --- 1: popl %picReg --- addl __GLOBAL_OFFSET_TABLE__+.-1b, %picReg --- (See PprMach.hs) - -initializePicBase_x86 - :: Arch -> OS -> Reg - -> [NatCmmDecl (Alignment, RawCmmStatics) X86.Instr] - -> NatM [NatCmmDecl (Alignment, RawCmmStatics) X86.Instr] - -initializePicBase_x86 ArchX86 os picReg - (CmmProc info lab live (ListGraph blocks) : statics) - | osElfTarget os - = return (CmmProc info lab live (ListGraph blocks') : statics) - where blocks' = case blocks of - [] -> [] - (b:bs) -> fetchGOT b : map maybeFetchGOT bs - - -- we want to add a FETCHGOT instruction to the beginning of - -- every block that is an entry point, which corresponds to - -- the blocks that have entries in the info-table mapping. - maybeFetchGOT b@(BasicBlock bID _) - | bID `mapMember` info = fetchGOT b - | otherwise = b - - fetchGOT (BasicBlock bID insns) = - BasicBlock bID (X86.FETCHGOT picReg : insns) - -initializePicBase_x86 ArchX86 OSDarwin picReg - (CmmProc info lab live (ListGraph (entry:blocks)) : statics) - = return (CmmProc info lab live (ListGraph (block':blocks)) : statics) - - where BasicBlock bID insns = entry - block' = BasicBlock bID (X86.FETCHPC picReg : insns) - -initializePicBase_x86 _ _ _ _ - = panic "initializePicBase_x86: not needed" - diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs deleted file mode 100644 index ad47501981..0000000000 --- a/compiler/nativeGen/PPC/CodeGen.hs +++ /dev/null @@ -1,2453 +0,0 @@ -{-# LANGUAGE CPP, GADTs #-} - ------------------------------------------------------------------------------ --- --- Generating machine code (instruction selection) --- --- (c) The University of Glasgow 1996-2004 --- ------------------------------------------------------------------------------ - --- This is a big module, but, if you pay attention to --- (a) the sectioning, and (b) the type signatures, --- the structure should not be too overwhelming. - -module PPC.CodeGen ( - cmmTopCodeGen, - generateJumpTableForInstr, - InstrBlock -) - -where - -#include "HsVersions.h" - --- NCG stuff: -import GhcPrelude - -import GHC.Platform.Regs -import PPC.Instr -import PPC.Cond -import PPC.Regs -import CPrim -import NCGMonad ( NatM, getNewRegNat, getNewLabelNat - , getBlockIdNat, getPicBaseNat, getNewRegPairNat - , getPicBaseMaybeNat ) -import Instruction -import PIC -import Format -import RegClass -import Reg -import TargetReg -import GHC.Platform - --- Our intermediate code: -import GHC.Cmm.BlockId -import GHC.Cmm.Ppr ( pprExpr ) -import GHC.Cmm -import GHC.Cmm.Utils -import GHC.Cmm.Switch -import GHC.Cmm.CLabel -import GHC.Cmm.Dataflow.Block -import GHC.Cmm.Dataflow.Graph - --- The rest: -import OrdList -import Outputable -import GHC.Driver.Session - -import Control.Monad ( mapAndUnzipM, when ) -import Data.Bits -import Data.Word - -import BasicTypes -import FastString -import Util - --- ----------------------------------------------------------------------------- --- Top-level of the instruction selector - --- | 'InstrBlock's are the insn sequences generated by the insn selectors. --- They are really trees of insns to facilitate fast appending, where a --- left-to-right traversal (pre-order?) yields the insns in the correct --- order. - -cmmTopCodeGen - :: RawCmmDecl - -> NatM [NatCmmDecl RawCmmStatics Instr] - -cmmTopCodeGen (CmmProc info lab live graph) = do - let blocks = toBlockListEntryFirst graph - (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks - dflags <- getDynFlags - let proc = CmmProc info lab live (ListGraph $ concat nat_blocks) - tops = proc : concat statics - os = platformOS $ targetPlatform dflags - arch = platformArch $ targetPlatform dflags - case arch of - ArchPPC | os == OSAIX -> return tops - | otherwise -> do - picBaseMb <- getPicBaseMaybeNat - case picBaseMb of - Just picBase -> initializePicBase_ppc arch os picBase tops - Nothing -> return tops - ArchPPC_64 ELF_V1 -> fixup_entry tops - -- generating function descriptor is handled in - -- pretty printer - ArchPPC_64 ELF_V2 -> fixup_entry tops - -- generating function prologue is handled in - -- pretty printer - _ -> panic "PPC.cmmTopCodeGen: unknown arch" - where - fixup_entry (CmmProc info lab live (ListGraph (entry:blocks)) : statics) - = do - let BasicBlock bID insns = entry - bID' <- if lab == (blockLbl bID) - then newBlockId - else return bID - let b' = BasicBlock bID' insns - return (CmmProc info lab live (ListGraph (b':blocks)) : statics) - fixup_entry _ = panic "cmmTopCodegen: Broken CmmProc" - -cmmTopCodeGen (CmmData sec dat) = do - return [CmmData sec dat] -- no translation, we just use CmmStatic - -basicBlockCodeGen - :: Block CmmNode C C - -> NatM ( [NatBasicBlock Instr] - , [NatCmmDecl RawCmmStatics Instr]) - -basicBlockCodeGen block = do - let (_, nodes, tail) = blockSplit block - id = entryLabel block - stmts = blockToList nodes - mid_instrs <- stmtsToInstrs stmts - tail_instrs <- stmtToInstrs tail - let instrs = mid_instrs `appOL` tail_instrs - -- code generation may introduce new basic block boundaries, which - -- are indicated by the NEWBLOCK instruction. We must split up the - -- instruction stream into basic blocks again. Also, we extract - -- LDATAs here too. - let - (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs - - mkBlocks (NEWBLOCK id) (instrs,blocks,statics) - = ([], BasicBlock id instrs : blocks, statics) - mkBlocks (LDATA sec dat) (instrs,blocks,statics) - = (instrs, blocks, CmmData sec dat:statics) - mkBlocks instr (instrs,blocks,statics) - = (instr:instrs, blocks, statics) - return (BasicBlock id top : other_blocks, statics) - -stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock -stmtsToInstrs stmts - = do instrss <- mapM stmtToInstrs stmts - return (concatOL instrss) - -stmtToInstrs :: CmmNode e x -> NatM InstrBlock -stmtToInstrs stmt = do - dflags <- getDynFlags - case stmt of - CmmComment s -> return (unitOL (COMMENT s)) - CmmTick {} -> return nilOL - CmmUnwind {} -> return nilOL - - CmmAssign reg src - | isFloatType ty -> assignReg_FltCode format reg src - | target32Bit (targetPlatform dflags) && - isWord64 ty -> assignReg_I64Code reg src - | otherwise -> assignReg_IntCode format reg src - where ty = cmmRegType dflags reg - format = cmmTypeFormat ty - - CmmStore addr src - | isFloatType ty -> assignMem_FltCode format addr src - | target32Bit (targetPlatform dflags) && - isWord64 ty -> assignMem_I64Code addr src - | otherwise -> assignMem_IntCode format addr src - where ty = cmmExprType dflags src - format = cmmTypeFormat ty - - CmmUnsafeForeignCall target result_regs args - -> genCCall target result_regs args - - CmmBranch id -> genBranch id - CmmCondBranch arg true false prediction -> do - b1 <- genCondJump true arg prediction - b2 <- genBranch false - return (b1 `appOL` b2) - CmmSwitch arg ids -> do dflags <- getDynFlags - genSwitch dflags arg ids - CmmCall { cml_target = arg - , cml_args_regs = gregs } -> do - dflags <- getDynFlags - genJump arg (jumpRegs dflags gregs) - _ -> - panic "stmtToInstrs: statement should have been cps'd away" - -jumpRegs :: DynFlags -> [GlobalReg] -> [Reg] -jumpRegs dflags gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ] - where platform = targetPlatform dflags - --------------------------------------------------------------------------------- --- | 'InstrBlock's are the insn sequences generated by the insn selectors. --- They are really trees of insns to facilitate fast appending, where a --- left-to-right traversal yields the insns in the correct order. --- -type InstrBlock - = OrdList Instr - - --- | Register's passed up the tree. If the stix code forces the register --- to live in a pre-decided machine register, it comes out as @Fixed@; --- otherwise, it comes out as @Any@, and the parent can decide which --- register to put it in. --- -data Register - = Fixed Format Reg InstrBlock - | Any Format (Reg -> InstrBlock) - - -swizzleRegisterRep :: Register -> Format -> Register -swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code -swizzleRegisterRep (Any _ codefn) format = Any format codefn - - --- | Grab the Reg for a CmmReg -getRegisterReg :: Platform -> CmmReg -> Reg - -getRegisterReg _ (CmmLocal (LocalReg u pk)) - = RegVirtual $ mkVirtualReg u (cmmTypeFormat pk) - -getRegisterReg platform (CmmGlobal mid) - = case globalRegMaybe platform mid of - Just reg -> RegReal reg - Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid) - -- By this stage, the only MagicIds remaining should be the - -- ones which map to a real machine register on this - -- platform. Hence ... - --- | Convert a BlockId to some CmmStatic data -jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic -jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags)) -jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel) - where blockLabel = blockLbl blockid - - - --- ----------------------------------------------------------------------------- --- General things for putting together code sequences - --- Expand CmmRegOff. ToDo: should we do it this way around, or convert --- CmmExprs into CmmRegOff? -mangleIndexTree :: DynFlags -> CmmExpr -> CmmExpr -mangleIndexTree dflags (CmmRegOff reg off) - = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)] - where width = typeWidth (cmmRegType dflags reg) - -mangleIndexTree _ _ - = panic "PPC.CodeGen.mangleIndexTree: no match" - --- ----------------------------------------------------------------------------- --- Code gen for 64-bit arithmetic on 32-bit platforms - -{- -Simple support for generating 64-bit code (ie, 64 bit values and 64 -bit assignments) on 32-bit platforms. Unlike the main code generator -we merely shoot for generating working code as simply as possible, and -pay little attention to code quality. Specifically, there is no -attempt to deal cleverly with the fixed-vs-floating register -distinction; all values are generated into (pairs of) floating -registers, even if this would mean some redundant reg-reg moves as a -result. Only one of the VRegUniques is returned, since it will be -of the VRegUniqueLo form, and the upper-half VReg can be determined -by applying getHiVRegFromLo to it. --} - -data ChildCode64 -- a.k.a "Register64" - = ChildCode64 - InstrBlock -- code - Reg -- the lower 32-bit temporary which contains the - -- result; use getHiVRegFromLo to find the other - -- VRegUnique. Rules of this simplified insn - -- selection game are therefore that the returned - -- Reg may be modified - - --- | Compute an expression into a register, but --- we don't mind which one it is. -getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock) -getSomeReg expr = do - r <- getRegister expr - case r of - Any rep code -> do - tmp <- getNewRegNat rep - return (tmp, code tmp) - Fixed _ reg code -> - return (reg, code) - -getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock) -getI64Amodes addrTree = do - Amode hi_addr addr_code <- getAmode D addrTree - case addrOffset hi_addr 4 of - Just lo_addr -> return (hi_addr, lo_addr, addr_code) - Nothing -> do (hi_ptr, code) <- getSomeReg addrTree - return (AddrRegImm hi_ptr (ImmInt 0), - AddrRegImm hi_ptr (ImmInt 4), - code) - - -assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock -assignMem_I64Code addrTree valueTree = do - (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree - ChildCode64 vcode rlo <- iselExpr64 valueTree - let - rhi = getHiVRegFromLo rlo - - -- Big-endian store - mov_hi = ST II32 rhi hi_addr - mov_lo = ST II32 rlo lo_addr - return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi) - - -assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock -assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do - ChildCode64 vcode r_src_lo <- iselExpr64 valueTree - let - r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32 - r_dst_hi = getHiVRegFromLo r_dst_lo - r_src_hi = getHiVRegFromLo r_src_lo - mov_lo = MR r_dst_lo r_src_lo - mov_hi = MR r_dst_hi r_src_hi - return ( - vcode `snocOL` mov_lo `snocOL` mov_hi - ) - -assignReg_I64Code _ _ - = panic "assignReg_I64Code(powerpc): invalid lvalue" - - -iselExpr64 :: CmmExpr -> NatM ChildCode64 -iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do - (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree - (rlo, rhi) <- getNewRegPairNat II32 - let mov_hi = LD II32 rhi hi_addr - mov_lo = LD II32 rlo lo_addr - return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) - rlo - -iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty - = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32)) - -iselExpr64 (CmmLit (CmmInt i _)) = do - (rlo,rhi) <- getNewRegPairNat II32 - let - half0 = fromIntegral (fromIntegral i :: Word16) - half1 = fromIntegral (fromIntegral (i `shiftR` 16) :: Word16) - half2 = fromIntegral (fromIntegral (i `shiftR` 32) :: Word16) - half3 = fromIntegral (fromIntegral (i `shiftR` 48) :: Word16) - - code = toOL [ - LIS rlo (ImmInt half1), - OR rlo rlo (RIImm $ ImmInt half0), - LIS rhi (ImmInt half3), - OR rhi rhi (RIImm $ ImmInt half2) - ] - return (ChildCode64 code rlo) - -iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do - ChildCode64 code1 r1lo <- iselExpr64 e1 - ChildCode64 code2 r2lo <- iselExpr64 e2 - (rlo,rhi) <- getNewRegPairNat II32 - let - r1hi = getHiVRegFromLo r1lo - r2hi = getHiVRegFromLo r2lo - code = code1 `appOL` - code2 `appOL` - toOL [ ADDC rlo r1lo r2lo, - ADDE rhi r1hi r2hi ] - return (ChildCode64 code rlo) - -iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do - ChildCode64 code1 r1lo <- iselExpr64 e1 - ChildCode64 code2 r2lo <- iselExpr64 e2 - (rlo,rhi) <- getNewRegPairNat II32 - let - r1hi = getHiVRegFromLo r1lo - r2hi = getHiVRegFromLo r2lo - code = code1 `appOL` - code2 `appOL` - toOL [ SUBFC rlo r2lo (RIReg r1lo), - SUBFE rhi r2hi r1hi ] - return (ChildCode64 code rlo) - -iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do - (expr_reg,expr_code) <- getSomeReg expr - (rlo, rhi) <- getNewRegPairNat II32 - let mov_hi = LI rhi (ImmInt 0) - mov_lo = MR rlo expr_reg - return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi) - rlo - -iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do - (expr_reg,expr_code) <- getSomeReg expr - (rlo, rhi) <- getNewRegPairNat II32 - let mov_hi = SRA II32 rhi expr_reg (RIImm (ImmInt 31)) - mov_lo = MR rlo expr_reg - return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi) - rlo -iselExpr64 expr - = pprPanic "iselExpr64(powerpc)" (pprExpr expr) - - - -getRegister :: CmmExpr -> NatM Register -getRegister e = do dflags <- getDynFlags - getRegister' dflags e - -getRegister' :: DynFlags -> CmmExpr -> NatM Register - -getRegister' dflags (CmmReg (CmmGlobal PicBaseReg)) - | OSAIX <- platformOS (targetPlatform dflags) = do - let code dst = toOL [ LD II32 dst tocAddr ] - tocAddr = AddrRegImm toc (ImmLit (text "ghc_toc_table[TC]")) - return (Any II32 code) - | target32Bit (targetPlatform dflags) = do - reg <- getPicBaseNat $ archWordFormat (target32Bit (targetPlatform dflags)) - return (Fixed (archWordFormat (target32Bit (targetPlatform dflags))) - reg nilOL) - | otherwise = return (Fixed II64 toc nilOL) - -getRegister' dflags (CmmReg reg) - = return (Fixed (cmmTypeFormat (cmmRegType dflags reg)) - (getRegisterReg (targetPlatform dflags) reg) nilOL) - -getRegister' dflags tree@(CmmRegOff _ _) - = getRegister' dflags (mangleIndexTree dflags tree) - - -- for 32-bit architectures, support some 64 -> 32 bit conversions: - -- TO_W_(x), TO_W_(x >> 32) - -getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32) - [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) - | target32Bit (targetPlatform dflags) = do - ChildCode64 code rlo <- iselExpr64 x - return $ Fixed II32 (getHiVRegFromLo rlo) code - -getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32) - [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) - | target32Bit (targetPlatform dflags) = do - ChildCode64 code rlo <- iselExpr64 x - return $ Fixed II32 (getHiVRegFromLo rlo) code - -getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32) [x]) - | target32Bit (targetPlatform dflags) = do - ChildCode64 code rlo <- iselExpr64 x - return $ Fixed II32 rlo code - -getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32) [x]) - | target32Bit (targetPlatform dflags) = do - ChildCode64 code rlo <- iselExpr64 x - return $ Fixed II32 rlo code - -getRegister' dflags (CmmLoad mem pk) - | not (isWord64 pk) = do - let platform = targetPlatform dflags - Amode addr addr_code <- getAmode D mem - let code dst = ASSERT((targetClassOfReg platform dst == RcDouble) == isFloatType pk) - addr_code `snocOL` LD format dst addr - return (Any format code) - | not (target32Bit (targetPlatform dflags)) = do - Amode addr addr_code <- getAmode DS mem - let code dst = addr_code `snocOL` LD II64 dst addr - return (Any II64 code) - - where format = cmmTypeFormat pk - --- catch simple cases of zero- or sign-extended load -getRegister' _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do - Amode addr addr_code <- getAmode D mem - return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr)) - -getRegister' _ (CmmMachOp (MO_XX_Conv W8 W32) [CmmLoad mem _]) = do - Amode addr addr_code <- getAmode D mem - return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr)) - -getRegister' _ (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad mem _]) = do - Amode addr addr_code <- getAmode D mem - return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr)) - -getRegister' _ (CmmMachOp (MO_XX_Conv W8 W64) [CmmLoad mem _]) = do - Amode addr addr_code <- getAmode D mem - return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr)) - --- Note: there is no Load Byte Arithmetic instruction, so no signed case here - -getRegister' _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do - Amode addr addr_code <- getAmode D mem - return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr)) - -getRegister' _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do - Amode addr addr_code <- getAmode D mem - return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr)) - -getRegister' _ (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad mem _]) = do - Amode addr addr_code <- getAmode D mem - return (Any II64 (\dst -> addr_code `snocOL` LD II16 dst addr)) - -getRegister' _ (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad mem _]) = do - Amode addr addr_code <- getAmode D mem - return (Any II64 (\dst -> addr_code `snocOL` LA II16 dst addr)) - -getRegister' _ (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad mem _]) = do - Amode addr addr_code <- getAmode D mem - return (Any II64 (\dst -> addr_code `snocOL` LD II32 dst addr)) - -getRegister' _ (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad mem _]) = do - -- lwa is DS-form. See Note [Power instruction format] - Amode addr addr_code <- getAmode DS mem - return (Any II64 (\dst -> addr_code `snocOL` LA II32 dst addr)) - -getRegister' dflags (CmmMachOp mop [x]) -- unary MachOps - = case mop of - MO_Not rep -> triv_ucode_int rep NOT - - MO_F_Neg w -> triv_ucode_float w FNEG - MO_S_Neg w -> triv_ucode_int w NEG - - MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x - MO_FF_Conv W32 W64 -> conversionNop FF64 x - - MO_FS_Conv from to -> coerceFP2Int from to x - MO_SF_Conv from to -> coerceInt2FP from to x - - MO_SS_Conv from to - | from >= to -> conversionNop (intFormat to) x - | otherwise -> triv_ucode_int to (EXTS (intFormat from)) - - MO_UU_Conv from to - | from >= to -> conversionNop (intFormat to) x - | otherwise -> clearLeft from to - - MO_XX_Conv _ to -> conversionNop (intFormat to) x - - _ -> panic "PPC.CodeGen.getRegister: no match" - - where - triv_ucode_int width instr = trivialUCode (intFormat width) instr x - triv_ucode_float width instr = trivialUCode (floatFormat width) instr x - - conversionNop new_format expr - = do e_code <- getRegister' dflags expr - return (swizzleRegisterRep e_code new_format) - - clearLeft from to - = do (src1, code1) <- getSomeReg x - let arch_fmt = intFormat (wordWidth dflags) - arch_bits = widthInBits (wordWidth dflags) - size = widthInBits from - code dst = code1 `snocOL` - CLRLI arch_fmt dst src1 (arch_bits - size) - return (Any (intFormat to) code) - -getRegister' _ (CmmMachOp mop [x, y]) -- dyadic PrimOps - = case mop of - MO_F_Eq _ -> condFltReg EQQ x y - MO_F_Ne _ -> condFltReg NE x y - MO_F_Gt _ -> condFltReg GTT x y - MO_F_Ge _ -> condFltReg GE x y - MO_F_Lt _ -> condFltReg LTT x y - MO_F_Le _ -> condFltReg LE x y - - MO_Eq rep -> condIntReg EQQ rep x y - MO_Ne rep -> condIntReg NE rep x y - - MO_S_Gt rep -> condIntReg GTT rep x y - MO_S_Ge rep -> condIntReg GE rep x y - MO_S_Lt rep -> condIntReg LTT rep x y - MO_S_Le rep -> condIntReg LE rep x y - - MO_U_Gt rep -> condIntReg GU rep x y - MO_U_Ge rep -> condIntReg GEU rep x y - MO_U_Lt rep -> condIntReg LU rep x y - MO_U_Le rep -> condIntReg LEU rep x y - - MO_F_Add w -> triv_float w FADD - MO_F_Sub w -> triv_float w FSUB - MO_F_Mul w -> triv_float w FMUL - MO_F_Quot w -> triv_float w FDIV - - -- optimize addition with 32-bit immediate - -- (needed for PIC) - MO_Add W32 -> - case y of - CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True imm - -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep) - CmmLit lit - -> do - (src, srcCode) <- getSomeReg x - let imm = litToImm lit - code dst = srcCode `appOL` toOL [ - ADDIS dst src (HA imm), - ADD dst dst (RIImm (LO imm)) - ] - return (Any II32 code) - _ -> trivialCode W32 True ADD x y - - MO_Add rep -> trivialCode rep True ADD x y - MO_Sub rep -> - case y of - CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm) - -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep) - _ -> case x of - CmmLit (CmmInt imm _) - | Just _ <- makeImmediate rep True imm - -- subfi ('subtract from' with immediate) doesn't exist - -> trivialCode rep True SUBFC y x - _ -> trivialCodeNoImm' (intFormat rep) SUBF y x - - MO_Mul rep -> shiftMulCode rep True MULL x y - MO_S_MulMayOflo rep -> do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - let - format = intFormat rep - code dst = code1 `appOL` code2 - `appOL` toOL [ MULLO format dst src1 src2 - , MFOV format dst - ] - return (Any format code) - - MO_S_Quot rep -> divCode rep True x y - MO_U_Quot rep -> divCode rep False x y - - MO_S_Rem rep -> remainder rep True x y - MO_U_Rem rep -> remainder rep False x y - - MO_And rep -> case y of - (CmmLit (CmmInt imm _)) | imm == -8 || imm == -4 - -> do - (src, srcCode) <- getSomeReg x - let clear_mask = if imm == -4 then 2 else 3 - fmt = intFormat rep - code dst = srcCode - `appOL` unitOL (CLRRI fmt dst src clear_mask) - return (Any fmt code) - _ -> trivialCode rep False AND x y - MO_Or rep -> trivialCode rep False OR x y - MO_Xor rep -> trivialCode rep False XOR x y - - MO_Shl rep -> shiftMulCode rep False SL x y - MO_S_Shr rep -> srCode rep True SRA x y - MO_U_Shr rep -> srCode rep False SR x y - _ -> panic "PPC.CodeGen.getRegister: no match" - - where - triv_float :: Width -> (Format -> Reg -> Reg -> Reg -> Instr) -> NatM Register - triv_float width instr = trivialCodeNoImm (floatFormat width) instr x y - - remainder :: Width -> Bool -> CmmExpr -> CmmExpr -> NatM Register - remainder rep sgn x y = do - let fmt = intFormat rep - tmp <- getNewRegNat fmt - code <- remainderCode rep sgn tmp x y - return (Any fmt code) - - -getRegister' _ (CmmLit (CmmInt i rep)) - | Just imm <- makeImmediate rep True i - = let - code dst = unitOL (LI dst imm) - in - return (Any (intFormat rep) code) - -getRegister' _ (CmmLit (CmmFloat f frep)) = do - lbl <- getNewLabelNat - dflags <- getDynFlags - dynRef <- cmmMakeDynamicReference dflags DataReference lbl - Amode addr addr_code <- getAmode D dynRef - let format = floatFormat frep - code dst = - LDATA (Section ReadOnlyData lbl) - (RawCmmStatics lbl [CmmStaticLit (CmmFloat f frep)]) - `consOL` (addr_code `snocOL` LD format dst addr) - return (Any format code) - -getRegister' dflags (CmmLit lit) - | target32Bit (targetPlatform dflags) - = let rep = cmmLitType dflags lit - imm = litToImm lit - code dst = toOL [ - LIS dst (HA imm), - ADD dst dst (RIImm (LO imm)) - ] - in return (Any (cmmTypeFormat rep) code) - | otherwise - = do lbl <- getNewLabelNat - dflags <- getDynFlags - dynRef <- cmmMakeDynamicReference dflags DataReference lbl - Amode addr addr_code <- getAmode D dynRef - let rep = cmmLitType dflags lit - format = cmmTypeFormat rep - code dst = - LDATA (Section ReadOnlyData lbl) (RawCmmStatics lbl [CmmStaticLit lit]) - `consOL` (addr_code `snocOL` LD format dst addr) - return (Any format code) - -getRegister' _ other = pprPanic "getRegister(ppc)" (pprExpr other) - - -- extend?Rep: wrap integer expression of type `from` - -- in a conversion to `to` -extendSExpr :: Width -> Width -> CmmExpr -> CmmExpr -extendSExpr from to x = CmmMachOp (MO_SS_Conv from to) [x] - -extendUExpr :: Width -> Width -> CmmExpr -> CmmExpr -extendUExpr from to x = CmmMachOp (MO_UU_Conv from to) [x] - --- ----------------------------------------------------------------------------- --- The 'Amode' type: Memory addressing modes passed up the tree. - -data Amode - = Amode AddrMode InstrBlock - -{- -Now, given a tree (the argument to a CmmLoad) that references memory, -produce a suitable addressing mode. - -A Rule of the Game (tm) for Amodes: use of the addr bit must -immediately follow use of the code part, since the code part puts -values in registers which the addr then refers to. So you can't put -anything in between, lest it overwrite some of those registers. If -you need to do some other computation between the code part and use of -the addr bit, first store the effective address from the amode in a -temporary, then do the other computation, and then use the temporary: - - code - LEA amode, tmp - ... other computation ... - ... (tmp) ... --} - -{- Note [Power instruction format] -In some instructions the 16 bit offset must be a multiple of 4, i.e. -the two least significant bits must be zero. The "Power ISA" specification -calls these instruction formats "DS-FORM" and the instructions with -arbitrary 16 bit offsets are "D-FORM". - -The Power ISA specification document can be obtained from www.power.org. --} -data InstrForm = D | DS - -getAmode :: InstrForm -> CmmExpr -> NatM Amode -getAmode inf tree@(CmmRegOff _ _) - = do dflags <- getDynFlags - getAmode inf (mangleIndexTree dflags tree) - -getAmode _ (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)]) - | Just off <- makeImmediate W32 True (-i) - = do - (reg, code) <- getSomeReg x - return (Amode (AddrRegImm reg off) code) - - -getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)]) - | Just off <- makeImmediate W32 True i - = do - (reg, code) <- getSomeReg x - return (Amode (AddrRegImm reg off) code) - -getAmode D (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)]) - | Just off <- makeImmediate W64 True (-i) - = do - (reg, code) <- getSomeReg x - return (Amode (AddrRegImm reg off) code) - - -getAmode D (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)]) - | Just off <- makeImmediate W64 True i - = do - (reg, code) <- getSomeReg x - return (Amode (AddrRegImm reg off) code) - -getAmode DS (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)]) - | Just off <- makeImmediate W64 True (-i) - = do - (reg, code) <- getSomeReg x - (reg', off', code') <- - if i `mod` 4 == 0 - then do return (reg, off, code) - else do - tmp <- getNewRegNat II64 - return (tmp, ImmInt 0, - code `snocOL` ADD tmp reg (RIImm off)) - return (Amode (AddrRegImm reg' off') code') - -getAmode DS (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)]) - | Just off <- makeImmediate W64 True i - = do - (reg, code) <- getSomeReg x - (reg', off', code') <- - if i `mod` 4 == 0 - then do return (reg, off, code) - else do - tmp <- getNewRegNat II64 - return (tmp, ImmInt 0, - code `snocOL` ADD tmp reg (RIImm off)) - return (Amode (AddrRegImm reg' off') code') - - -- optimize addition with 32-bit immediate - -- (needed for PIC) -getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit lit]) - = do - dflags <- getDynFlags - (src, srcCode) <- getSomeReg x - let imm = litToImm lit - case () of - _ | OSAIX <- platformOS (targetPlatform dflags) - , isCmmLabelType lit -> - -- HA16/LO16 relocations on labels not supported on AIX - return (Amode (AddrRegImm src imm) srcCode) - | otherwise -> do - tmp <- getNewRegNat II32 - let code = srcCode `snocOL` ADDIS tmp src (HA imm) - return (Amode (AddrRegImm tmp (LO imm)) code) - where - isCmmLabelType (CmmLabel {}) = True - isCmmLabelType (CmmLabelOff {}) = True - isCmmLabelType (CmmLabelDiffOff {}) = True - isCmmLabelType _ = False - -getAmode _ (CmmLit lit) - = do - dflags <- getDynFlags - case platformArch $ targetPlatform dflags of - ArchPPC -> do - tmp <- getNewRegNat II32 - let imm = litToImm lit - code = unitOL (LIS tmp (HA imm)) - return (Amode (AddrRegImm tmp (LO imm)) code) - _ -> do -- TODO: Load from TOC, - -- see getRegister' _ (CmmLit lit) - tmp <- getNewRegNat II64 - let imm = litToImm lit - code = toOL [ - LIS tmp (HIGHESTA imm), - OR tmp tmp (RIImm (HIGHERA imm)), - SL II64 tmp tmp (RIImm (ImmInt 32)), - ORIS tmp tmp (HA imm) - ] - return (Amode (AddrRegImm tmp (LO imm)) code) - -getAmode _ (CmmMachOp (MO_Add W32) [x, y]) - = do - (regX, codeX) <- getSomeReg x - (regY, codeY) <- getSomeReg y - return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY)) - -getAmode _ (CmmMachOp (MO_Add W64) [x, y]) - = do - (regX, codeX) <- getSomeReg x - (regY, codeY) <- getSomeReg y - return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY)) - -getAmode _ other - = do - (reg, code) <- getSomeReg other - let - off = ImmInt 0 - return (Amode (AddrRegImm reg off) code) - - --- The 'CondCode' type: Condition codes passed up the tree. -data CondCode - = CondCode Bool Cond InstrBlock - --- Set up a condition code for a conditional branch. - -getCondCode :: CmmExpr -> NatM CondCode - --- almost the same as everywhere else - but we need to --- extend small integers to 32 bit or 64 bit first - -getCondCode (CmmMachOp mop [x, y]) - = do - case mop of - MO_F_Eq W32 -> condFltCode EQQ x y - MO_F_Ne W32 -> condFltCode NE x y - MO_F_Gt W32 -> condFltCode GTT x y - MO_F_Ge W32 -> condFltCode GE x y - MO_F_Lt W32 -> condFltCode LTT x y - MO_F_Le W32 -> condFltCode LE x y - - MO_F_Eq W64 -> condFltCode EQQ x y - MO_F_Ne W64 -> condFltCode NE x y - MO_F_Gt W64 -> condFltCode GTT x y - MO_F_Ge W64 -> condFltCode GE x y - MO_F_Lt W64 -> condFltCode LTT x y - MO_F_Le W64 -> condFltCode LE x y - - MO_Eq rep -> condIntCode EQQ rep x y - MO_Ne rep -> condIntCode NE rep x y - - MO_S_Gt rep -> condIntCode GTT rep x y - MO_S_Ge rep -> condIntCode GE rep x y - MO_S_Lt rep -> condIntCode LTT rep x y - MO_S_Le rep -> condIntCode LE rep x y - - MO_U_Gt rep -> condIntCode GU rep x y - MO_U_Ge rep -> condIntCode GEU rep x y - MO_U_Lt rep -> condIntCode LU rep x y - MO_U_Le rep -> condIntCode LEU rep x y - - _ -> pprPanic "getCondCode(powerpc)" (pprMachOp mop) - -getCondCode _ = panic "getCondCode(2)(powerpc)" - - --- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be --- passed back up the tree. - -condIntCode :: Cond -> Width -> CmmExpr -> CmmExpr -> NatM CondCode -condIntCode cond width x y = do - dflags <- getDynFlags - condIntCode' (target32Bit (targetPlatform dflags)) cond width x y - -condIntCode' :: Bool -> Cond -> Width -> CmmExpr -> CmmExpr -> NatM CondCode - --- simple code for 64-bit on 32-bit platforms -condIntCode' True cond W64 x y - | condUnsigned cond - = do - ChildCode64 code_x x_lo <- iselExpr64 x - ChildCode64 code_y y_lo <- iselExpr64 y - let x_hi = getHiVRegFromLo x_lo - y_hi = getHiVRegFromLo y_lo - end_lbl <- getBlockIdNat - let code = code_x `appOL` code_y `appOL` toOL - [ CMPL II32 x_hi (RIReg y_hi) - , BCC NE end_lbl Nothing - , CMPL II32 x_lo (RIReg y_lo) - , BCC ALWAYS end_lbl Nothing - - , NEWBLOCK end_lbl - ] - return (CondCode False cond code) - | otherwise - = do - ChildCode64 code_x x_lo <- iselExpr64 x - ChildCode64 code_y y_lo <- iselExpr64 y - let x_hi = getHiVRegFromLo x_lo - y_hi = getHiVRegFromLo y_lo - end_lbl <- getBlockIdNat - cmp_lo <- getBlockIdNat - let code = code_x `appOL` code_y `appOL` toOL - [ CMP II32 x_hi (RIReg y_hi) - , BCC NE end_lbl Nothing - , CMP II32 x_hi (RIImm (ImmInt 0)) - , BCC LE cmp_lo Nothing - , CMPL II32 x_lo (RIReg y_lo) - , BCC ALWAYS end_lbl Nothing - , NEWBLOCK cmp_lo - , CMPL II32 y_lo (RIReg x_lo) - , BCC ALWAYS end_lbl Nothing - - , NEWBLOCK end_lbl - ] - return (CondCode False cond code) - --- optimize pointer tag checks. Operation andi. sets condition register --- so cmpi ..., 0 is redundant. -condIntCode' _ cond _ (CmmMachOp (MO_And _) [x, CmmLit (CmmInt imm rep)]) - (CmmLit (CmmInt 0 _)) - | not $ condUnsigned cond, - Just src2 <- makeImmediate rep False imm - = do - (src1, code) <- getSomeReg x - let code' = code `snocOL` AND r0 src1 (RIImm src2) - return (CondCode False cond code') - -condIntCode' _ cond width x (CmmLit (CmmInt y rep)) - | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y - = do - let op_len = max W32 width - let extend = extendSExpr width op_len - (src1, code) <- getSomeReg (extend x) - let format = intFormat op_len - code' = code `snocOL` - (if condUnsigned cond then CMPL else CMP) format src1 (RIImm src2) - return (CondCode False cond code') - -condIntCode' _ cond width x y = do - let op_len = max W32 width - let extend = if condUnsigned cond then extendUExpr width op_len - else extendSExpr width op_len - (src1, code1) <- getSomeReg (extend x) - (src2, code2) <- getSomeReg (extend y) - let format = intFormat op_len - code' = code1 `appOL` code2 `snocOL` - (if condUnsigned cond then CMPL else CMP) format src1 (RIReg src2) - return (CondCode False cond code') - -condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode -condFltCode cond x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - let - code' = code1 `appOL` code2 `snocOL` FCMP src1 src2 - code'' = case cond of -- twiddle CR to handle unordered case - GE -> code' `snocOL` CRNOR ltbit eqbit gtbit - LE -> code' `snocOL` CRNOR gtbit eqbit ltbit - _ -> code' - where - ltbit = 0 ; eqbit = 2 ; gtbit = 1 - return (CondCode True cond code'') - - - --- ----------------------------------------------------------------------------- --- Generating assignments - --- Assignments are really at the heart of the whole code generation --- business. Almost all top-level nodes of any real importance are --- assignments, which correspond to loads, stores, or register --- transfers. If we're really lucky, some of the register transfers --- will go away, because we can use the destination register to --- complete the code generation for the right hand side. This only --- fails when the right hand side is forced into a fixed register --- (e.g. the result of a call). - -assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock -assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock - -assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock -assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock - -assignMem_IntCode pk addr src = do - (srcReg, code) <- getSomeReg src - Amode dstAddr addr_code <- case pk of - II64 -> getAmode DS addr - _ -> getAmode D addr - return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr - --- dst is a reg, but src could be anything -assignReg_IntCode _ reg src - = do - dflags <- getDynFlags - let dst = getRegisterReg (targetPlatform dflags) reg - r <- getRegister src - return $ case r of - Any _ code -> code dst - Fixed _ freg fcode -> fcode `snocOL` MR dst freg - - - --- Easy, isn't it? -assignMem_FltCode = assignMem_IntCode -assignReg_FltCode = assignReg_IntCode - - - -genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock - -genJump (CmmLit (CmmLabel lbl)) regs - = return (unitOL $ JMP lbl regs) - -genJump tree gregs - = do - dflags <- getDynFlags - genJump' tree (platformToGCP (targetPlatform dflags)) gregs - -genJump' :: CmmExpr -> GenCCallPlatform -> [Reg] -> NatM InstrBlock - -genJump' tree (GCP64ELF 1) regs - = do - (target,code) <- getSomeReg tree - return (code - `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 0)) - `snocOL` LD II64 toc (AddrRegImm target (ImmInt 8)) - `snocOL` MTCTR r11 - `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 16)) - `snocOL` BCTR [] Nothing regs) - -genJump' tree (GCP64ELF 2) regs - = do - (target,code) <- getSomeReg tree - return (code - `snocOL` MR r12 target - `snocOL` MTCTR r12 - `snocOL` BCTR [] Nothing regs) - -genJump' tree _ regs - = do - (target,code) <- getSomeReg tree - return (code `snocOL` MTCTR target `snocOL` BCTR [] Nothing regs) - --- ----------------------------------------------------------------------------- --- Unconditional branches -genBranch :: BlockId -> NatM InstrBlock -genBranch = return . toOL . mkJumpInstr - - --- ----------------------------------------------------------------------------- --- Conditional jumps - -{- -Conditional jumps are always to local labels, so we can use branch -instructions. We peek at the arguments to decide what kind of -comparison to do. --} - - -genCondJump - :: BlockId -- the branch target - -> CmmExpr -- the condition on which to branch - -> Maybe Bool - -> NatM InstrBlock - -genCondJump id bool prediction = do - CondCode _ cond code <- getCondCode bool - return (code `snocOL` BCC cond id prediction) - - - --- ----------------------------------------------------------------------------- --- Generating C calls - --- Now the biggest nightmare---calls. Most of the nastiness is buried in --- @get_arg@, which moves the arguments to the correct registers/stack --- locations. Apart from that, the code is easy. - -genCCall :: ForeignTarget -- function to call - -> [CmmFormal] -- where to put the result - -> [CmmActual] -- arguments (of mixed type) - -> NatM InstrBlock -genCCall (PrimTarget MO_ReadBarrier) _ _ - = return $ unitOL LWSYNC -genCCall (PrimTarget MO_WriteBarrier) _ _ - = return $ unitOL LWSYNC - -genCCall (PrimTarget MO_Touch) _ _ - = return $ nilOL - -genCCall (PrimTarget (MO_Prefetch_Data _)) _ _ - = return $ nilOL - -genCCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] - = do dflags <- getDynFlags - let platform = targetPlatform dflags - fmt = intFormat width - reg_dst = getRegisterReg platform (CmmLocal dst) - (instr, n_code) <- case amop of - AMO_Add -> getSomeRegOrImm ADD True reg_dst - AMO_Sub -> case n of - CmmLit (CmmInt i _) - | Just imm <- makeImmediate width True (-i) - -> return (ADD reg_dst reg_dst (RIImm imm), nilOL) - _ - -> do - (n_reg, n_code) <- getSomeReg n - return (SUBF reg_dst n_reg reg_dst, n_code) - AMO_And -> getSomeRegOrImm AND False reg_dst - AMO_Nand -> do (n_reg, n_code) <- getSomeReg n - return (NAND reg_dst reg_dst n_reg, n_code) - AMO_Or -> getSomeRegOrImm OR False reg_dst - AMO_Xor -> getSomeRegOrImm XOR False reg_dst - Amode addr_reg addr_code <- getAmodeIndex addr - lbl_retry <- getBlockIdNat - return $ n_code `appOL` addr_code - `appOL` toOL [ HWSYNC - , BCC ALWAYS lbl_retry Nothing - - , NEWBLOCK lbl_retry - , LDR fmt reg_dst addr_reg - , instr - , STC fmt reg_dst addr_reg - , BCC NE lbl_retry (Just False) - , ISYNC - ] - where - getAmodeIndex (CmmMachOp (MO_Add _) [x, y]) - = do - (regX, codeX) <- getSomeReg x - (regY, codeY) <- getSomeReg y - return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY)) - getAmodeIndex other - = do - (reg, code) <- getSomeReg other - return (Amode (AddrRegReg r0 reg) code) -- NB: r0 is 0 here! - getSomeRegOrImm op sign dst - = case n of - CmmLit (CmmInt i _) | Just imm <- makeImmediate width sign i - -> return (op dst dst (RIImm imm), nilOL) - _ - -> do - (n_reg, n_code) <- getSomeReg n - return (op dst dst (RIReg n_reg), n_code) - -genCCall (PrimTarget (MO_AtomicRead width)) [dst] [addr] - = do dflags <- getDynFlags - let platform = targetPlatform dflags - fmt = intFormat width - reg_dst = getRegisterReg platform (CmmLocal dst) - form = if widthInBits width == 64 then DS else D - Amode addr_reg addr_code <- getAmode form addr - lbl_end <- getBlockIdNat - return $ addr_code `appOL` toOL [ HWSYNC - , LD fmt reg_dst addr_reg - , CMP fmt reg_dst (RIReg reg_dst) - , BCC NE lbl_end (Just False) - , BCC ALWAYS lbl_end Nothing - -- See Note [Seemingly useless cmp and bne] - , NEWBLOCK lbl_end - , ISYNC - ] - --- Note [Seemingly useless cmp and bne] --- In Power ISA, Book II, Section 4.4.1, Instruction Synchronize Instruction --- the second paragraph says that isync may complete before storage accesses --- "associated" with a preceding instruction have been performed. The cmp --- operation and the following bne introduce a data and control dependency --- on the load instruction (See also Power ISA, Book II, Appendix B.2.3, Safe --- Fetch). --- This is also what gcc does. - - -genCCall (PrimTarget (MO_AtomicWrite width)) [] [addr, val] = do - code <- assignMem_IntCode (intFormat width) addr val - return $ unitOL(HWSYNC) `appOL` code - -genCCall (PrimTarget (MO_Clz width)) [dst] [src] - = do dflags <- getDynFlags - let platform = targetPlatform dflags - reg_dst = getRegisterReg platform (CmmLocal dst) - if target32Bit platform && width == W64 - then do - ChildCode64 code vr_lo <- iselExpr64 src - lbl1 <- getBlockIdNat - lbl2 <- getBlockIdNat - lbl3 <- getBlockIdNat - let vr_hi = getHiVRegFromLo vr_lo - cntlz = toOL [ CMPL II32 vr_hi (RIImm (ImmInt 0)) - , BCC NE lbl2 Nothing - , BCC ALWAYS lbl1 Nothing - - , NEWBLOCK lbl1 - , CNTLZ II32 reg_dst vr_lo - , ADD reg_dst reg_dst (RIImm (ImmInt 32)) - , BCC ALWAYS lbl3 Nothing - - , NEWBLOCK lbl2 - , CNTLZ II32 reg_dst vr_hi - , BCC ALWAYS lbl3 Nothing - - , NEWBLOCK lbl3 - ] - return $ code `appOL` cntlz - else do - let format = if width == W64 then II64 else II32 - (s_reg, s_code) <- getSomeReg src - (pre, reg , post) <- - case width of - W64 -> return (nilOL, s_reg, nilOL) - W32 -> return (nilOL, s_reg, nilOL) - W16 -> do - reg_tmp <- getNewRegNat format - return - ( unitOL $ AND reg_tmp s_reg (RIImm (ImmInt 65535)) - , reg_tmp - , unitOL $ ADD reg_dst reg_dst (RIImm (ImmInt (-16))) - ) - W8 -> do - reg_tmp <- getNewRegNat format - return - ( unitOL $ AND reg_tmp s_reg (RIImm (ImmInt 255)) - , reg_tmp - , unitOL $ ADD reg_dst reg_dst (RIImm (ImmInt (-24))) - ) - _ -> panic "genCall: Clz wrong format" - let cntlz = unitOL (CNTLZ format reg_dst reg) - return $ s_code `appOL` pre `appOL` cntlz `appOL` post - -genCCall (PrimTarget (MO_Ctz width)) [dst] [src] - = do dflags <- getDynFlags - let platform = targetPlatform dflags - reg_dst = getRegisterReg platform (CmmLocal dst) - if target32Bit platform && width == W64 - then do - let format = II32 - ChildCode64 code vr_lo <- iselExpr64 src - lbl1 <- getBlockIdNat - lbl2 <- getBlockIdNat - lbl3 <- getBlockIdNat - x' <- getNewRegNat format - x'' <- getNewRegNat format - r' <- getNewRegNat format - cnttzlo <- cnttz format reg_dst vr_lo - let vr_hi = getHiVRegFromLo vr_lo - cnttz64 = toOL [ CMPL format vr_lo (RIImm (ImmInt 0)) - , BCC NE lbl2 Nothing - , BCC ALWAYS lbl1 Nothing - - , NEWBLOCK lbl1 - , ADD x' vr_hi (RIImm (ImmInt (-1))) - , ANDC x'' x' vr_hi - , CNTLZ format r' x'' - -- 32 + (32 - clz(x'')) - , SUBFC reg_dst r' (RIImm (ImmInt 64)) - , BCC ALWAYS lbl3 Nothing - - , NEWBLOCK lbl2 - ] - `appOL` cnttzlo `appOL` - toOL [ BCC ALWAYS lbl3 Nothing - - , NEWBLOCK lbl3 - ] - return $ code `appOL` cnttz64 - else do - let format = if width == W64 then II64 else II32 - (s_reg, s_code) <- getSomeReg src - (reg_ctz, pre_code) <- - case width of - W64 -> return (s_reg, nilOL) - W32 -> return (s_reg, nilOL) - W16 -> do - reg_tmp <- getNewRegNat format - return (reg_tmp, unitOL $ ORIS reg_tmp s_reg (ImmInt 1)) - W8 -> do - reg_tmp <- getNewRegNat format - return (reg_tmp, unitOL $ OR reg_tmp s_reg (RIImm (ImmInt 256))) - _ -> panic "genCall: Ctz wrong format" - ctz_code <- cnttz format reg_dst reg_ctz - return $ s_code `appOL` pre_code `appOL` ctz_code - where - -- cnttz(x) = sizeof(x) - cntlz(~x & (x - 1)) - -- see Henry S. Warren, Hacker's Delight, p 107 - cnttz format dst src = do - let format_bits = 8 * formatInBytes format - x' <- getNewRegNat format - x'' <- getNewRegNat format - r' <- getNewRegNat format - return $ toOL [ ADD x' src (RIImm (ImmInt (-1))) - , ANDC x'' x' src - , CNTLZ format r' x'' - , SUBFC dst r' (RIImm (ImmInt (format_bits))) - ] - -genCCall target dest_regs argsAndHints - = do dflags <- getDynFlags - let platform = targetPlatform dflags - case target of - PrimTarget (MO_S_QuotRem width) -> divOp1 platform True width - dest_regs argsAndHints - PrimTarget (MO_U_QuotRem width) -> divOp1 platform False width - dest_regs argsAndHints - PrimTarget (MO_U_QuotRem2 width) -> divOp2 platform width dest_regs - argsAndHints - PrimTarget (MO_U_Mul2 width) -> multOp2 platform width dest_regs - argsAndHints - PrimTarget (MO_Add2 _) -> add2Op platform dest_regs argsAndHints - PrimTarget (MO_AddWordC _) -> addcOp platform dest_regs argsAndHints - PrimTarget (MO_SubWordC _) -> subcOp platform dest_regs argsAndHints - PrimTarget (MO_AddIntC width) -> addSubCOp ADDO platform width - dest_regs argsAndHints - PrimTarget (MO_SubIntC width) -> addSubCOp SUBFO platform width - dest_regs argsAndHints - PrimTarget MO_F64_Fabs -> fabs platform dest_regs argsAndHints - PrimTarget MO_F32_Fabs -> fabs platform dest_regs argsAndHints - _ -> genCCall' dflags (platformToGCP platform) - target dest_regs argsAndHints - where divOp1 platform signed width [res_q, res_r] [arg_x, arg_y] - = do let reg_q = getRegisterReg platform (CmmLocal res_q) - reg_r = getRegisterReg platform (CmmLocal res_r) - remainderCode width signed reg_q arg_x arg_y - <*> pure reg_r - - divOp1 _ _ _ _ _ - = panic "genCCall: Wrong number of arguments for divOp1" - divOp2 platform width [res_q, res_r] - [arg_x_high, arg_x_low, arg_y] - = do let reg_q = getRegisterReg platform (CmmLocal res_q) - reg_r = getRegisterReg platform (CmmLocal res_r) - fmt = intFormat width - half = 4 * (formatInBytes fmt) - (xh_reg, xh_code) <- getSomeReg arg_x_high - (xl_reg, xl_code) <- getSomeReg arg_x_low - (y_reg, y_code) <- getSomeReg arg_y - s <- getNewRegNat fmt - b <- getNewRegNat fmt - v <- getNewRegNat fmt - vn1 <- getNewRegNat fmt - vn0 <- getNewRegNat fmt - un32 <- getNewRegNat fmt - tmp <- getNewRegNat fmt - un10 <- getNewRegNat fmt - un1 <- getNewRegNat fmt - un0 <- getNewRegNat fmt - q1 <- getNewRegNat fmt - rhat <- getNewRegNat fmt - tmp1 <- getNewRegNat fmt - q0 <- getNewRegNat fmt - un21 <- getNewRegNat fmt - again1 <- getBlockIdNat - no1 <- getBlockIdNat - then1 <- getBlockIdNat - endif1 <- getBlockIdNat - again2 <- getBlockIdNat - no2 <- getBlockIdNat - then2 <- getBlockIdNat - endif2 <- getBlockIdNat - return $ y_code `appOL` xl_code `appOL` xh_code `appOL` - -- see Hacker's Delight p 196 Figure 9-3 - toOL [ -- b = 2 ^ (bits_in_word / 2) - LI b (ImmInt 1) - , SL fmt b b (RIImm (ImmInt half)) - -- s = clz(y) - , CNTLZ fmt s y_reg - -- v = y << s - , SL fmt v y_reg (RIReg s) - -- vn1 = upper half of v - , SR fmt vn1 v (RIImm (ImmInt half)) - -- vn0 = lower half of v - , CLRLI fmt vn0 v half - -- un32 = (u1 << s) - -- | (u0 >> (bits_in_word - s)) - , SL fmt un32 xh_reg (RIReg s) - , SUBFC tmp s - (RIImm (ImmInt (8 * formatInBytes fmt))) - , SR fmt tmp xl_reg (RIReg tmp) - , OR un32 un32 (RIReg tmp) - -- un10 = u0 << s - , SL fmt un10 xl_reg (RIReg s) - -- un1 = upper half of un10 - , SR fmt un1 un10 (RIImm (ImmInt half)) - -- un0 = lower half of un10 - , CLRLI fmt un0 un10 half - -- q1 = un32/vn1 - , DIV fmt False q1 un32 vn1 - -- rhat = un32 - q1*vn1 - , MULL fmt tmp q1 (RIReg vn1) - , SUBF rhat tmp un32 - , BCC ALWAYS again1 Nothing - - , NEWBLOCK again1 - -- if (q1 >= b || q1*vn0 > b*rhat + un1) - , CMPL fmt q1 (RIReg b) - , BCC GEU then1 Nothing - , BCC ALWAYS no1 Nothing - - , NEWBLOCK no1 - , MULL fmt tmp q1 (RIReg vn0) - , SL fmt tmp1 rhat (RIImm (ImmInt half)) - , ADD tmp1 tmp1 (RIReg un1) - , CMPL fmt tmp (RIReg tmp1) - , BCC LEU endif1 Nothing - , BCC ALWAYS then1 Nothing - - , NEWBLOCK then1 - -- q1 = q1 - 1 - , ADD q1 q1 (RIImm (ImmInt (-1))) - -- rhat = rhat + vn1 - , ADD rhat rhat (RIReg vn1) - -- if (rhat < b) goto again1 - , CMPL fmt rhat (RIReg b) - , BCC LTT again1 Nothing - , BCC ALWAYS endif1 Nothing - - , NEWBLOCK endif1 - -- un21 = un32*b + un1 - q1*v - , SL fmt un21 un32 (RIImm (ImmInt half)) - , ADD un21 un21 (RIReg un1) - , MULL fmt tmp q1 (RIReg v) - , SUBF un21 tmp un21 - -- compute second quotient digit - -- q0 = un21/vn1 - , DIV fmt False q0 un21 vn1 - -- rhat = un21- q0*vn1 - , MULL fmt tmp q0 (RIReg vn1) - , SUBF rhat tmp un21 - , BCC ALWAYS again2 Nothing - - , NEWBLOCK again2 - -- if (q0>b || q0*vn0 > b*rhat + un0) - , CMPL fmt q0 (RIReg b) - , BCC GEU then2 Nothing - , BCC ALWAYS no2 Nothing - - , NEWBLOCK no2 - , MULL fmt tmp q0 (RIReg vn0) - , SL fmt tmp1 rhat (RIImm (ImmInt half)) - , ADD tmp1 tmp1 (RIReg un0) - , CMPL fmt tmp (RIReg tmp1) - , BCC LEU endif2 Nothing - , BCC ALWAYS then2 Nothing - - , NEWBLOCK then2 - -- q0 = q0 - 1 - , ADD q0 q0 (RIImm (ImmInt (-1))) - -- rhat = rhat + vn1 - , ADD rhat rhat (RIReg vn1) - -- if (rhat<b) goto again2 - , CMPL fmt rhat (RIReg b) - , BCC LTT again2 Nothing - , BCC ALWAYS endif2 Nothing - - , NEWBLOCK endif2 - -- compute remainder - -- r = (un21*b + un0 - q0*v) >> s - , SL fmt reg_r un21 (RIImm (ImmInt half)) - , ADD reg_r reg_r (RIReg un0) - , MULL fmt tmp q0 (RIReg v) - , SUBF reg_r tmp reg_r - , SR fmt reg_r reg_r (RIReg s) - -- compute quotient - -- q = q1*b + q0 - , SL fmt reg_q q1 (RIImm (ImmInt half)) - , ADD reg_q reg_q (RIReg q0) - ] - divOp2 _ _ _ _ - = panic "genCCall: Wrong number of arguments for divOp2" - multOp2 platform width [res_h, res_l] [arg_x, arg_y] - = do let reg_h = getRegisterReg platform (CmmLocal res_h) - reg_l = getRegisterReg platform (CmmLocal res_l) - fmt = intFormat width - (x_reg, x_code) <- getSomeReg arg_x - (y_reg, y_code) <- getSomeReg arg_y - return $ y_code `appOL` x_code - `appOL` toOL [ MULL fmt reg_l x_reg (RIReg y_reg) - , MULHU fmt reg_h x_reg y_reg - ] - multOp2 _ _ _ _ - = panic "genCall: Wrong number of arguments for multOp2" - add2Op platform [res_h, res_l] [arg_x, arg_y] - = do let reg_h = getRegisterReg platform (CmmLocal res_h) - reg_l = getRegisterReg platform (CmmLocal res_l) - (x_reg, x_code) <- getSomeReg arg_x - (y_reg, y_code) <- getSomeReg arg_y - return $ y_code `appOL` x_code - `appOL` toOL [ LI reg_h (ImmInt 0) - , ADDC reg_l x_reg y_reg - , ADDZE reg_h reg_h - ] - add2Op _ _ _ - = panic "genCCall: Wrong number of arguments/results for add2" - - addcOp platform [res_r, res_c] [arg_x, arg_y] - = add2Op platform [res_c {-hi-}, res_r {-lo-}] [arg_x, arg_y] - addcOp _ _ _ - = panic "genCCall: Wrong number of arguments/results for addc" - - -- PowerPC subfc sets the carry for rT = ~(rA) + rB + 1, - -- which is 0 for borrow and 1 otherwise. We need 1 and 0 - -- so xor with 1. - subcOp platform [res_r, res_c] [arg_x, arg_y] - = do let reg_r = getRegisterReg platform (CmmLocal res_r) - reg_c = getRegisterReg platform (CmmLocal res_c) - (x_reg, x_code) <- getSomeReg arg_x - (y_reg, y_code) <- getSomeReg arg_y - return $ y_code `appOL` x_code - `appOL` toOL [ LI reg_c (ImmInt 0) - , SUBFC reg_r y_reg (RIReg x_reg) - , ADDZE reg_c reg_c - , XOR reg_c reg_c (RIImm (ImmInt 1)) - ] - subcOp _ _ _ - = panic "genCCall: Wrong number of arguments/results for subc" - addSubCOp instr platform width [res_r, res_c] [arg_x, arg_y] - = do let reg_r = getRegisterReg platform (CmmLocal res_r) - reg_c = getRegisterReg platform (CmmLocal res_c) - (x_reg, x_code) <- getSomeReg arg_x - (y_reg, y_code) <- getSomeReg arg_y - return $ y_code `appOL` x_code - `appOL` toOL [ instr reg_r y_reg x_reg, - -- SUBFO argument order reversed! - MFOV (intFormat width) reg_c - ] - addSubCOp _ _ _ _ _ - = panic "genCall: Wrong number of arguments/results for addC" - fabs platform [res] [arg] - = do let res_r = getRegisterReg platform (CmmLocal res) - (arg_reg, arg_code) <- getSomeReg arg - return $ arg_code `snocOL` FABS res_r arg_reg - fabs _ _ _ - = panic "genCall: Wrong number of arguments/results for fabs" - --- TODO: replace 'Int' by an enum such as 'PPC_64ABI' -data GenCCallPlatform = GCP32ELF | GCP64ELF !Int | GCPAIX - -platformToGCP :: Platform -> GenCCallPlatform -platformToGCP platform - = case platformOS platform of - OSAIX -> GCPAIX - _ -> case platformArch platform of - ArchPPC -> GCP32ELF - ArchPPC_64 ELF_V1 -> GCP64ELF 1 - ArchPPC_64 ELF_V2 -> GCP64ELF 2 - _ -> panic "platformToGCP: Not PowerPC" - - -genCCall' - :: DynFlags - -> GenCCallPlatform - -> ForeignTarget -- function to call - -> [CmmFormal] -- where to put the result - -> [CmmActual] -- arguments (of mixed type) - -> NatM InstrBlock - -{- - PowerPC Linux uses the System V Release 4 Calling Convention - for PowerPC. It is described in the - "System V Application Binary Interface PowerPC Processor Supplement". - - PowerPC 64 Linux uses the System V Release 4 Calling Convention for - 64-bit PowerPC. It is specified in - "64-bit PowerPC ELF Application Binary Interface Supplement 1.9" - (PPC64 ELF v1.9). - - PowerPC 64 Linux in little endian mode uses the "Power Architecture 64-Bit - ELF V2 ABI Specification -- OpenPOWER ABI for Linux Supplement" - (PPC64 ELF v2). - - AIX follows the "PowerOpen ABI: Application Binary Interface Big-Endian - 32-Bit Hardware Implementation" - - All four conventions are similar: - Parameters may be passed in general-purpose registers starting at r3, in - floating point registers starting at f1, or on the stack. - - But there are substantial differences: - * The number of registers used for parameter passing and the exact set of - nonvolatile registers differs (see MachRegs.hs). - * On AIX and 64-bit ELF, stack space is always reserved for parameters, - even if they are passed in registers. The called routine may choose to - save parameters from registers to the corresponding space on the stack. - * On AIX and 64-bit ELF, a corresponding amount of GPRs is skipped when - a floating point parameter is passed in an FPR. - * SysV insists on either passing I64 arguments on the stack, or in two GPRs, - starting with an odd-numbered GPR. It may skip a GPR to achieve this. - AIX just treats an I64 likt two separate I32s (high word first). - * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only - 4-byte aligned like everything else on AIX. - * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on - PowerPC Linux does not agree, so neither do we. - - According to all conventions, the parameter area should be part of the - caller's stack frame, allocated in the caller's prologue code (large enough - to hold the parameter lists for all called routines). The NCG already - uses the stack for register spilling, leaving 64 bytes free at the top. - If we need a larger parameter area than that, we increase the size - of the stack frame just before ccalling. --} - - -genCCall' dflags gcp target dest_regs args - = do - (finalStack,passArgumentsCode,usedRegs) <- passArguments - (zip3 args argReps argHints) - allArgRegs - (allFPArgRegs platform) - initialStackOffset - nilOL [] - - (labelOrExpr, reduceToFF32) <- case target of - ForeignTarget (CmmLit (CmmLabel lbl)) _ -> do - uses_pic_base_implicitly - return (Left lbl, False) - ForeignTarget expr _ -> do - uses_pic_base_implicitly - return (Right expr, False) - PrimTarget mop -> outOfLineMachOp mop - - let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode - codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32 - - case labelOrExpr of - Left lbl -> do -- the linker does all the work for us - return ( codeBefore - `snocOL` BL lbl usedRegs - `appOL` maybeNOP -- some ABI require a NOP after BL - `appOL` codeAfter) - Right dyn -> do -- implement call through function pointer - (dynReg, dynCode) <- getSomeReg dyn - case gcp of - GCP64ELF 1 -> return ( dynCode - `appOL` codeBefore - `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 40)) - `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 0)) - `snocOL` LD II64 toc (AddrRegImm dynReg (ImmInt 8)) - `snocOL` MTCTR r11 - `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 16)) - `snocOL` BCTRL usedRegs - `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 40)) - `appOL` codeAfter) - GCP64ELF 2 -> return ( dynCode - `appOL` codeBefore - `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 24)) - `snocOL` MR r12 dynReg - `snocOL` MTCTR r12 - `snocOL` BCTRL usedRegs - `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 24)) - `appOL` codeAfter) - GCPAIX -> return ( dynCode - -- AIX/XCOFF follows the PowerOPEN ABI - -- which is quite similar to LinuxPPC64/ELFv1 - `appOL` codeBefore - `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 20)) - `snocOL` LD II32 r11 (AddrRegImm dynReg (ImmInt 0)) - `snocOL` LD II32 toc (AddrRegImm dynReg (ImmInt 4)) - `snocOL` MTCTR r11 - `snocOL` LD II32 r11 (AddrRegImm dynReg (ImmInt 8)) - `snocOL` BCTRL usedRegs - `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 20)) - `appOL` codeAfter) - _ -> return ( dynCode - `snocOL` MTCTR dynReg - `appOL` codeBefore - `snocOL` BCTRL usedRegs - `appOL` codeAfter) - where - platform = targetPlatform dflags - - uses_pic_base_implicitly = do - -- See Note [implicit register in PPC PIC code] - -- on why we claim to use PIC register here - when (positionIndependent dflags && target32Bit platform) $ do - _ <- getPicBaseNat $ archWordFormat True - return () - - initialStackOffset = case gcp of - GCPAIX -> 24 - GCP32ELF -> 8 - GCP64ELF 1 -> 48 - GCP64ELF 2 -> 32 - _ -> panic "genCall': unknown calling convention" - -- size of linkage area + size of arguments, in bytes - stackDelta finalStack = case gcp of - GCPAIX -> - roundTo 16 $ (24 +) $ max 32 $ sum $ - map (widthInBytes . typeWidth) argReps - GCP32ELF -> roundTo 16 finalStack - GCP64ELF 1 -> - roundTo 16 $ (48 +) $ max 64 $ sum $ - map (roundTo 8 . widthInBytes . typeWidth) - argReps - GCP64ELF 2 -> - roundTo 16 $ (32 +) $ max 64 $ sum $ - map (roundTo 8 . widthInBytes . typeWidth) - argReps - _ -> panic "genCall': unknown calling conv." - - argReps = map (cmmExprType dflags) args - (argHints, _) = foreignTargetHints target - - roundTo a x | x `mod` a == 0 = x - | otherwise = x + a - (x `mod` a) - - spFormat = if target32Bit platform then II32 else II64 - - -- TODO: Do not create a new stack frame if delta is too large. - move_sp_down finalStack - | delta > stackFrameHeaderSize dflags = - toOL [STU spFormat sp (AddrRegImm sp (ImmInt (-delta))), - DELTA (-delta)] - | otherwise = nilOL - where delta = stackDelta finalStack - move_sp_up finalStack - | delta > stackFrameHeaderSize dflags = - toOL [ADD sp sp (RIImm (ImmInt delta)), - DELTA 0] - | otherwise = nilOL - where delta = stackDelta finalStack - - -- A NOP instruction is required after a call (bl instruction) - -- on AIX and 64-Bit Linux. - -- If the call is to a function with a different TOC (r2) the - -- link editor replaces the NOP instruction with a load of the TOC - -- from the stack to restore the TOC. - maybeNOP = case gcp of - GCP32ELF -> nilOL - -- See Section 3.9.4 of OpenPower ABI - GCPAIX -> unitOL NOP - -- See Section 3.5.11 of PPC64 ELF v1.9 - GCP64ELF 1 -> unitOL NOP - -- See Section 2.3.6 of PPC64 ELF v2 - GCP64ELF 2 -> unitOL NOP - _ -> panic "maybeNOP: Unknown PowerPC 64-bit ABI" - - passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed) - passArguments ((arg,arg_ty,_):args) gprs fprs stackOffset - accumCode accumUsed | isWord64 arg_ty - && target32Bit (targetPlatform dflags) = - do - ChildCode64 code vr_lo <- iselExpr64 arg - let vr_hi = getHiVRegFromLo vr_lo - - case gcp of - GCPAIX -> - do let storeWord vr (gpr:_) _ = MR gpr vr - storeWord vr [] offset - = ST II32 vr (AddrRegImm sp (ImmInt offset)) - passArguments args - (drop 2 gprs) - fprs - (stackOffset+8) - (accumCode `appOL` code - `snocOL` storeWord vr_hi gprs stackOffset - `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4)) - ((take 2 gprs) ++ accumUsed) - GCP32ELF -> - do let stackOffset' = roundTo 8 stackOffset - stackCode = accumCode `appOL` code - `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset')) - `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4))) - regCode hireg loreg = - accumCode `appOL` code - `snocOL` MR hireg vr_hi - `snocOL` MR loreg vr_lo - - case gprs of - hireg : loreg : regs | even (length gprs) -> - passArguments args regs fprs stackOffset - (regCode hireg loreg) (hireg : loreg : accumUsed) - _skipped : hireg : loreg : regs -> - passArguments args regs fprs stackOffset - (regCode hireg loreg) (hireg : loreg : accumUsed) - _ -> -- only one or no regs left - passArguments args [] fprs (stackOffset'+8) - stackCode accumUsed - GCP64ELF _ -> panic "passArguments: 32 bit code" - - passArguments ((arg,rep,hint):args) gprs fprs stackOffset accumCode accumUsed - | reg : _ <- regs = do - register <- getRegister arg_pro - let code = case register of - Fixed _ freg fcode -> fcode `snocOL` MR reg freg - Any _ acode -> acode reg - stackOffsetRes = case gcp of - -- The PowerOpen ABI requires that we - -- reserve stack slots for register - -- parameters - GCPAIX -> stackOffset + stackBytes - -- ... the SysV ABI 32-bit doesn't. - GCP32ELF -> stackOffset - -- ... but SysV ABI 64-bit does. - GCP64ELF _ -> stackOffset + stackBytes - passArguments args - (drop nGprs gprs) - (drop nFprs fprs) - stackOffsetRes - (accumCode `appOL` code) - (reg : accumUsed) - | otherwise = do - (vr, code) <- getSomeReg arg_pro - passArguments args - (drop nGprs gprs) - (drop nFprs fprs) - (stackOffset' + stackBytes) - (accumCode `appOL` code - `snocOL` ST format_pro vr stackSlot) - accumUsed - where - arg_pro - | isBitsType rep = CmmMachOp (conv_op (typeWidth rep) (wordWidth dflags)) [arg] - | otherwise = arg - format_pro - | isBitsType rep = intFormat (wordWidth dflags) - | otherwise = cmmTypeFormat rep - conv_op = case hint of - SignedHint -> MO_SS_Conv - _ -> MO_UU_Conv - - stackOffset' = case gcp of - GCPAIX -> - -- The 32bit PowerOPEN ABI is happy with - -- 32bit-alignment ... - stackOffset - GCP32ELF - -- ... the SysV ABI requires 8-byte - -- alignment for doubles. - | isFloatType rep && typeWidth rep == W64 -> - roundTo 8 stackOffset - | otherwise -> - stackOffset - GCP64ELF _ -> - -- Everything on the stack is mapped to - -- 8-byte aligned doublewords - stackOffset - stackOffset'' - | isFloatType rep && typeWidth rep == W32 = - case gcp of - -- The ELF v1 ABI Section 3.2.3 requires: - -- "Single precision floating point values - -- are mapped to the second word in a single - -- doubleword" - GCP64ELF 1 -> stackOffset' + 4 - _ -> stackOffset' - | otherwise = stackOffset' - - stackSlot = AddrRegImm sp (ImmInt stackOffset'') - (nGprs, nFprs, stackBytes, regs) - = case gcp of - GCPAIX -> - case cmmTypeFormat rep of - II8 -> (1, 0, 4, gprs) - II16 -> (1, 0, 4, gprs) - II32 -> (1, 0, 4, gprs) - -- The PowerOpen ABI requires that we skip a - -- corresponding number of GPRs when we use - -- the FPRs. - -- - -- E.g. for a `double` two GPRs are skipped, - -- whereas for a `float` one GPR is skipped - -- when parameters are assigned to - -- registers. - -- - -- The PowerOpen ABI specification can be found at - -- ftp://www.sourceware.org/pub/binutils/ppc-docs/ppc-poweropen/ - FF32 -> (1, 1, 4, fprs) - FF64 -> (2, 1, 8, fprs) - II64 -> panic "genCCall' passArguments II64" - - GCP32ELF -> - case cmmTypeFormat rep of - II8 -> (1, 0, 4, gprs) - II16 -> (1, 0, 4, gprs) - II32 -> (1, 0, 4, gprs) - -- ... the SysV ABI doesn't. - FF32 -> (0, 1, 4, fprs) - FF64 -> (0, 1, 8, fprs) - II64 -> panic "genCCall' passArguments II64" - GCP64ELF _ -> - case cmmTypeFormat rep of - II8 -> (1, 0, 8, gprs) - II16 -> (1, 0, 8, gprs) - II32 -> (1, 0, 8, gprs) - II64 -> (1, 0, 8, gprs) - -- The ELFv1 ABI requires that we skip a - -- corresponding number of GPRs when we use - -- the FPRs. - FF32 -> (1, 1, 8, fprs) - FF64 -> (1, 1, 8, fprs) - - moveResult reduceToFF32 = - case dest_regs of - [] -> nilOL - [dest] - | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1) - | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1) - | isWord64 rep && target32Bit (targetPlatform dflags) - -> toOL [MR (getHiVRegFromLo r_dest) r3, - MR r_dest r4] - | otherwise -> unitOL (MR r_dest r3) - where rep = cmmRegType dflags (CmmLocal dest) - r_dest = getRegisterReg platform (CmmLocal dest) - _ -> panic "genCCall' moveResult: Bad dest_regs" - - outOfLineMachOp mop = - do - dflags <- getDynFlags - mopExpr <- cmmMakeDynamicReference dflags CallReference $ - mkForeignLabel functionName Nothing ForeignLabelInThisPackage IsFunction - let mopLabelOrExpr = case mopExpr of - CmmLit (CmmLabel lbl) -> Left lbl - _ -> Right mopExpr - return (mopLabelOrExpr, reduce) - where - (functionName, reduce) = case mop of - MO_F32_Exp -> (fsLit "exp", True) - MO_F32_ExpM1 -> (fsLit "expm1", True) - MO_F32_Log -> (fsLit "log", True) - MO_F32_Log1P -> (fsLit "log1p", True) - MO_F32_Sqrt -> (fsLit "sqrt", True) - MO_F32_Fabs -> unsupported - - MO_F32_Sin -> (fsLit "sin", True) - MO_F32_Cos -> (fsLit "cos", True) - MO_F32_Tan -> (fsLit "tan", True) - - MO_F32_Asin -> (fsLit "asin", True) - MO_F32_Acos -> (fsLit "acos", True) - MO_F32_Atan -> (fsLit "atan", True) - - MO_F32_Sinh -> (fsLit "sinh", True) - MO_F32_Cosh -> (fsLit "cosh", True) - MO_F32_Tanh -> (fsLit "tanh", True) - MO_F32_Pwr -> (fsLit "pow", True) - - MO_F32_Asinh -> (fsLit "asinh", True) - MO_F32_Acosh -> (fsLit "acosh", True) - MO_F32_Atanh -> (fsLit "atanh", True) - - MO_F64_Exp -> (fsLit "exp", False) - MO_F64_ExpM1 -> (fsLit "expm1", False) - MO_F64_Log -> (fsLit "log", False) - MO_F64_Log1P -> (fsLit "log1p", False) - MO_F64_Sqrt -> (fsLit "sqrt", False) - MO_F64_Fabs -> unsupported - - MO_F64_Sin -> (fsLit "sin", False) - MO_F64_Cos -> (fsLit "cos", False) - MO_F64_Tan -> (fsLit "tan", False) - - MO_F64_Asin -> (fsLit "asin", False) - MO_F64_Acos -> (fsLit "acos", False) - MO_F64_Atan -> (fsLit "atan", False) - - MO_F64_Sinh -> (fsLit "sinh", False) - MO_F64_Cosh -> (fsLit "cosh", False) - MO_F64_Tanh -> (fsLit "tanh", False) - MO_F64_Pwr -> (fsLit "pow", False) - - MO_F64_Asinh -> (fsLit "asinh", False) - MO_F64_Acosh -> (fsLit "acosh", False) - MO_F64_Atanh -> (fsLit "atanh", False) - - MO_UF_Conv w -> (fsLit $ word2FloatLabel w, False) - - MO_Memcpy _ -> (fsLit "memcpy", False) - MO_Memset _ -> (fsLit "memset", False) - MO_Memmove _ -> (fsLit "memmove", False) - MO_Memcmp _ -> (fsLit "memcmp", False) - - MO_BSwap w -> (fsLit $ bSwapLabel w, False) - MO_BRev w -> (fsLit $ bRevLabel w, False) - MO_PopCnt w -> (fsLit $ popCntLabel w, False) - MO_Pdep w -> (fsLit $ pdepLabel w, False) - MO_Pext w -> (fsLit $ pextLabel w, False) - MO_Clz _ -> unsupported - MO_Ctz _ -> unsupported - MO_AtomicRMW {} -> unsupported - MO_Cmpxchg w -> (fsLit $ cmpxchgLabel w, False) - MO_AtomicRead _ -> unsupported - MO_AtomicWrite _ -> unsupported - - MO_S_Mul2 {} -> unsupported - MO_S_QuotRem {} -> unsupported - MO_U_QuotRem {} -> unsupported - MO_U_QuotRem2 {} -> unsupported - MO_Add2 {} -> unsupported - MO_AddWordC {} -> unsupported - MO_SubWordC {} -> unsupported - MO_AddIntC {} -> unsupported - MO_SubIntC {} -> unsupported - MO_U_Mul2 {} -> unsupported - MO_ReadBarrier -> unsupported - MO_WriteBarrier -> unsupported - MO_Touch -> unsupported - MO_Prefetch_Data _ -> unsupported - unsupported = panic ("outOfLineCmmOp: " ++ show mop - ++ " not supported") - --- ----------------------------------------------------------------------------- --- Generating a table-branch - -genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock -genSwitch dflags expr targets - | OSAIX <- platformOS (targetPlatform dflags) - = do - (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset) - let fmt = archWordFormat $ target32Bit $ targetPlatform dflags - sha = if target32Bit $ targetPlatform dflags then 2 else 3 - tmp <- getNewRegNat fmt - lbl <- getNewLabelNat - dynRef <- cmmMakeDynamicReference dflags DataReference lbl - (tableReg,t_code) <- getSomeReg $ dynRef - let code = e_code `appOL` t_code `appOL` toOL [ - SL fmt tmp reg (RIImm (ImmInt sha)), - LD fmt tmp (AddrRegReg tableReg tmp), - MTCTR tmp, - BCTR ids (Just lbl) [] - ] - return code - - | (positionIndependent dflags) || (not $ target32Bit $ targetPlatform dflags) - = do - (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset) - let fmt = archWordFormat $ target32Bit $ targetPlatform dflags - sha = if target32Bit $ targetPlatform dflags then 2 else 3 - tmp <- getNewRegNat fmt - lbl <- getNewLabelNat - dynRef <- cmmMakeDynamicReference dflags DataReference lbl - (tableReg,t_code) <- getSomeReg $ dynRef - let code = e_code `appOL` t_code `appOL` toOL [ - SL fmt tmp reg (RIImm (ImmInt sha)), - LD fmt tmp (AddrRegReg tableReg tmp), - ADD tmp tmp (RIReg tableReg), - MTCTR tmp, - BCTR ids (Just lbl) [] - ] - return code - | otherwise - = do - (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset) - let fmt = archWordFormat $ target32Bit $ targetPlatform dflags - sha = if target32Bit $ targetPlatform dflags then 2 else 3 - tmp <- getNewRegNat fmt - lbl <- getNewLabelNat - let code = e_code `appOL` toOL [ - SL fmt tmp reg (RIImm (ImmInt sha)), - ADDIS tmp tmp (HA (ImmCLbl lbl)), - LD fmt tmp (AddrRegImm tmp (LO (ImmCLbl lbl))), - MTCTR tmp, - BCTR ids (Just lbl) [] - ] - return code - where (offset, ids) = switchTargetsToTable targets - -generateJumpTableForInstr :: DynFlags -> Instr - -> Maybe (NatCmmDecl RawCmmStatics Instr) -generateJumpTableForInstr dflags (BCTR ids (Just lbl) _) = - let jumpTable - | (positionIndependent dflags) - || (not $ target32Bit $ targetPlatform dflags) - = map jumpTableEntryRel ids - | otherwise = map (jumpTableEntry dflags) ids - where jumpTableEntryRel Nothing - = CmmStaticLit (CmmInt 0 (wordWidth dflags)) - jumpTableEntryRel (Just blockid) - = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0 - (wordWidth dflags)) - where blockLabel = blockLbl blockid - in Just (CmmData (Section ReadOnlyData lbl) (RawCmmStatics lbl jumpTable)) -generateJumpTableForInstr _ _ = Nothing - --- ----------------------------------------------------------------------------- --- 'condIntReg' and 'condFltReg': condition codes into registers - --- Turn those condition codes into integers now (when they appear on --- the right hand side of an assignment). - - - -condReg :: NatM CondCode -> NatM Register -condReg getCond = do - CondCode _ cond cond_code <- getCond - dflags <- getDynFlags - let - code dst = cond_code - `appOL` negate_code - `appOL` toOL [ - MFCR dst, - RLWINM dst dst (bit + 1) 31 31 - ] - - negate_code | do_negate = unitOL (CRNOR bit bit bit) - | otherwise = nilOL - - (bit, do_negate) = case cond of - LTT -> (0, False) - LE -> (1, True) - EQQ -> (2, False) - GE -> (0, True) - GTT -> (1, False) - - NE -> (2, True) - - LU -> (0, False) - LEU -> (1, True) - GEU -> (0, True) - GU -> (1, False) - _ -> panic "PPC.CodeGen.codeReg: no match" - - format = archWordFormat $ target32Bit $ targetPlatform dflags - return (Any format code) - -condIntReg :: Cond -> Width -> CmmExpr -> CmmExpr -> NatM Register -condIntReg cond width x y = condReg (condIntCode cond width x y) -condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register -condFltReg cond x y = condReg (condFltCode cond x y) - - - --- ----------------------------------------------------------------------------- --- 'trivial*Code': deal with trivial instructions - --- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode', --- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions. --- Only look for constants on the right hand side, because that's --- where the generic optimizer will have put them. - --- Similarly, for unary instructions, we don't have to worry about --- matching an StInt as the argument, because genericOpt will already --- have handled the constant-folding. - - - -{- -Wolfgang's PowerPC version of The Rules: - -A slightly modified version of The Rules to take advantage of the fact -that PowerPC instructions work on all registers and don't implicitly -clobber any fixed registers. - -* The only expression for which getRegister returns Fixed is (CmmReg reg). - -* If getRegister returns Any, then the code it generates may modify only: - (a) fresh temporaries - (b) the destination register - It may *not* modify global registers, unless the global - register happens to be the destination register. - It may not clobber any other registers. In fact, only ccalls clobber any - fixed registers. - Also, it may not modify the counter register (used by genCCall). - - Corollary: If a getRegister for a subexpression returns Fixed, you need - not move it to a fresh temporary before evaluating the next subexpression. - The Fixed register won't be modified. - Therefore, we don't need a counterpart for the x86's getStableReg on PPC. - -* SDM's First Rule is valid for PowerPC, too: subexpressions can depend on - the value of the destination register. --} - -trivialCode - :: Width - -> Bool - -> (Reg -> Reg -> RI -> Instr) - -> CmmExpr - -> CmmExpr - -> NatM Register - -trivialCode rep signed instr x (CmmLit (CmmInt y _)) - | Just imm <- makeImmediate rep signed y - = do - (src1, code1) <- getSomeReg x - let code dst = code1 `snocOL` instr dst src1 (RIImm imm) - return (Any (intFormat rep) code) - -trivialCode rep _ instr x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2) - return (Any (intFormat rep) code) - -shiftMulCode - :: Width - -> Bool - -> (Format-> Reg -> Reg -> RI -> Instr) - -> CmmExpr - -> CmmExpr - -> NatM Register -shiftMulCode width sign instr x (CmmLit (CmmInt y _)) - | Just imm <- makeImmediate width sign y - = do - (src1, code1) <- getSomeReg x - let format = intFormat width - let ins_fmt = intFormat (max W32 width) - let code dst = code1 `snocOL` instr ins_fmt dst src1 (RIImm imm) - return (Any format code) - -shiftMulCode width _ instr x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - let format = intFormat width - let ins_fmt = intFormat (max W32 width) - let code dst = code1 `appOL` code2 - `snocOL` instr ins_fmt dst src1 (RIReg src2) - return (Any format code) - -trivialCodeNoImm' :: Format -> (Reg -> Reg -> Reg -> Instr) - -> CmmExpr -> CmmExpr -> NatM Register -trivialCodeNoImm' format instr x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2 - return (Any format code) - -trivialCodeNoImm :: Format -> (Format -> Reg -> Reg -> Reg -> Instr) - -> CmmExpr -> CmmExpr -> NatM Register -trivialCodeNoImm format instr x y - = trivialCodeNoImm' format (instr format) x y - -srCode :: Width -> Bool -> (Format-> Reg -> Reg -> RI -> Instr) - -> CmmExpr -> CmmExpr -> NatM Register -srCode width sgn instr x (CmmLit (CmmInt y _)) - | Just imm <- makeImmediate width sgn y - = do - let op_len = max W32 width - extend = if sgn then extendSExpr else extendUExpr - (src1, code1) <- getSomeReg (extend width op_len x) - let code dst = code1 `snocOL` - instr (intFormat op_len) dst src1 (RIImm imm) - return (Any (intFormat width) code) - -srCode width sgn instr x y = do - let op_len = max W32 width - extend = if sgn then extendSExpr else extendUExpr - (src1, code1) <- getSomeReg (extend width op_len x) - (src2, code2) <- getSomeReg (extendUExpr width op_len y) - -- Note: Shift amount `y` is unsigned - let code dst = code1 `appOL` code2 `snocOL` - instr (intFormat op_len) dst src1 (RIReg src2) - return (Any (intFormat width) code) - -divCode :: Width -> Bool -> CmmExpr -> CmmExpr -> NatM Register -divCode width sgn x y = do - let op_len = max W32 width - extend = if sgn then extendSExpr else extendUExpr - (src1, code1) <- getSomeReg (extend width op_len x) - (src2, code2) <- getSomeReg (extend width op_len y) - let code dst = code1 `appOL` code2 `snocOL` - DIV (intFormat op_len) sgn dst src1 src2 - return (Any (intFormat width) code) - - -trivialUCode :: Format - -> (Reg -> Reg -> Instr) - -> CmmExpr - -> NatM Register -trivialUCode rep instr x = do - (src, code) <- getSomeReg x - let code' dst = code `snocOL` instr dst src - return (Any rep code') - --- There is no "remainder" instruction on the PPC, so we have to do --- it the hard way. --- The "sgn" parameter is the signedness for the division instruction - -remainderCode :: Width -> Bool -> Reg -> CmmExpr -> CmmExpr - -> NatM (Reg -> InstrBlock) -remainderCode rep sgn reg_q arg_x arg_y = do - let op_len = max W32 rep - fmt = intFormat op_len - extend = if sgn then extendSExpr else extendUExpr - (x_reg, x_code) <- getSomeReg (extend rep op_len arg_x) - (y_reg, y_code) <- getSomeReg (extend rep op_len arg_y) - return $ \reg_r -> y_code `appOL` x_code - `appOL` toOL [ DIV fmt sgn reg_q x_reg y_reg - , MULL fmt reg_r reg_q (RIReg y_reg) - , SUBF reg_r reg_r x_reg - ] - - -coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register -coerceInt2FP fromRep toRep x = do - dflags <- getDynFlags - let arch = platformArch $ targetPlatform dflags - coerceInt2FP' arch fromRep toRep x - -coerceInt2FP' :: Arch -> Width -> Width -> CmmExpr -> NatM Register -coerceInt2FP' ArchPPC fromRep toRep x = do - (src, code) <- getSomeReg x - lbl <- getNewLabelNat - itmp <- getNewRegNat II32 - ftmp <- getNewRegNat FF64 - dflags <- getDynFlags - dynRef <- cmmMakeDynamicReference dflags DataReference lbl - Amode addr addr_code <- getAmode D dynRef - let - code' dst = code `appOL` maybe_exts `appOL` toOL [ - LDATA (Section ReadOnlyData lbl) $ RawCmmStatics lbl - [CmmStaticLit (CmmInt 0x43300000 W32), - CmmStaticLit (CmmInt 0x80000000 W32)], - XORIS itmp src (ImmInt 0x8000), - ST II32 itmp (spRel dflags 3), - LIS itmp (ImmInt 0x4330), - ST II32 itmp (spRel dflags 2), - LD FF64 ftmp (spRel dflags 2) - ] `appOL` addr_code `appOL` toOL [ - LD FF64 dst addr, - FSUB FF64 dst ftmp dst - ] `appOL` maybe_frsp dst - - maybe_exts = case fromRep of - W8 -> unitOL $ EXTS II8 src src - W16 -> unitOL $ EXTS II16 src src - W32 -> nilOL - _ -> panic "PPC.CodeGen.coerceInt2FP: no match" - - maybe_frsp dst - = case toRep of - W32 -> unitOL $ FRSP dst dst - W64 -> nilOL - _ -> panic "PPC.CodeGen.coerceInt2FP: no match" - - return (Any (floatFormat toRep) code') - --- On an ELF v1 Linux we use the compiler doubleword in the stack frame --- this is the TOC pointer doubleword on ELF v2 Linux. The latter is only --- set right before a call and restored right after return from the call. --- So it is fine. -coerceInt2FP' (ArchPPC_64 _) fromRep toRep x = do - (src, code) <- getSomeReg x - dflags <- getDynFlags - let - code' dst = code `appOL` maybe_exts `appOL` toOL [ - ST II64 src (spRel dflags 3), - LD FF64 dst (spRel dflags 3), - FCFID dst dst - ] `appOL` maybe_frsp dst - - maybe_exts = case fromRep of - W8 -> unitOL $ EXTS II8 src src - W16 -> unitOL $ EXTS II16 src src - W32 -> unitOL $ EXTS II32 src src - W64 -> nilOL - _ -> panic "PPC.CodeGen.coerceInt2FP: no match" - - maybe_frsp dst - = case toRep of - W32 -> unitOL $ FRSP dst dst - W64 -> nilOL - _ -> panic "PPC.CodeGen.coerceInt2FP: no match" - - return (Any (floatFormat toRep) code') - -coerceInt2FP' _ _ _ _ = panic "PPC.CodeGen.coerceInt2FP: unknown arch" - - -coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register -coerceFP2Int fromRep toRep x = do - dflags <- getDynFlags - let arch = platformArch $ targetPlatform dflags - coerceFP2Int' arch fromRep toRep x - -coerceFP2Int' :: Arch -> Width -> Width -> CmmExpr -> NatM Register -coerceFP2Int' ArchPPC _ toRep x = do - dflags <- getDynFlags - -- the reps don't really matter: F*->FF64 and II32->I* are no-ops - (src, code) <- getSomeReg x - tmp <- getNewRegNat FF64 - let - code' dst = code `appOL` toOL [ - -- convert to int in FP reg - FCTIWZ tmp src, - -- store value (64bit) from FP to stack - ST FF64 tmp (spRel dflags 2), - -- read low word of value (high word is undefined) - LD II32 dst (spRel dflags 3)] - return (Any (intFormat toRep) code') - -coerceFP2Int' (ArchPPC_64 _) _ toRep x = do - dflags <- getDynFlags - -- the reps don't really matter: F*->FF64 and II64->I* are no-ops - (src, code) <- getSomeReg x - tmp <- getNewRegNat FF64 - let - code' dst = code `appOL` toOL [ - -- convert to int in FP reg - FCTIDZ tmp src, - -- store value (64bit) from FP to compiler word on stack - ST FF64 tmp (spRel dflags 3), - LD II64 dst (spRel dflags 3)] - return (Any (intFormat toRep) code') - -coerceFP2Int' _ _ _ _ = panic "PPC.CodeGen.coerceFP2Int: unknown arch" - --- Note [.LCTOC1 in PPC PIC code] --- The .LCTOC1 label is defined to point 32768 bytes into the GOT table --- to make the most of the PPC's 16-bit displacements. --- As 16-bit signed offset is used (usually via addi/lwz instructions) --- first element will have '-32768' offset against .LCTOC1. - --- Note [implicit register in PPC PIC code] --- PPC generates calls by labels in assembly --- in form of: --- bl puts+32768@plt --- in this form it's not seen directly (by GHC NCG) --- that r30 (PicBaseReg) is used, --- but r30 is a required part of PLT code setup: --- puts+32768@plt: --- lwz r11,-30484(r30) ; offset in .LCTOC1 --- mtctr r11 --- bctr diff --git a/compiler/nativeGen/PPC/Cond.hs b/compiler/nativeGen/PPC/Cond.hs deleted file mode 100644 index bd8bdee81a..0000000000 --- a/compiler/nativeGen/PPC/Cond.hs +++ /dev/null @@ -1,63 +0,0 @@ -module PPC.Cond ( - Cond(..), - condNegate, - condUnsigned, - condToSigned, - condToUnsigned, -) - -where - -import GhcPrelude - -import Panic - -data Cond - = ALWAYS - | EQQ - | GE - | GEU - | GTT - | GU - | LE - | LEU - | LTT - | LU - | NE - deriving Eq - - -condNegate :: Cond -> Cond -condNegate ALWAYS = panic "condNegate: ALWAYS" -condNegate EQQ = NE -condNegate GE = LTT -condNegate GEU = LU -condNegate GTT = LE -condNegate GU = LEU -condNegate LE = GTT -condNegate LEU = GU -condNegate LTT = GE -condNegate LU = GEU -condNegate NE = EQQ - --- Condition utils -condUnsigned :: Cond -> Bool -condUnsigned GU = True -condUnsigned LU = True -condUnsigned GEU = True -condUnsigned LEU = True -condUnsigned _ = False - -condToSigned :: Cond -> Cond -condToSigned GU = GTT -condToSigned LU = LTT -condToSigned GEU = GE -condToSigned LEU = LE -condToSigned x = x - -condToUnsigned :: Cond -> Cond -condToUnsigned GTT = GU -condToUnsigned LTT = LU -condToUnsigned GE = GEU -condToUnsigned LE = LEU -condToUnsigned x = x diff --git a/compiler/nativeGen/PPC/Instr.hs b/compiler/nativeGen/PPC/Instr.hs deleted file mode 100644 index ad2039d463..0000000000 --- a/compiler/nativeGen/PPC/Instr.hs +++ /dev/null @@ -1,713 +0,0 @@ -{-# LANGUAGE CPP #-} - -{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} - ------------------------------------------------------------------------------ --- --- Machine-dependent assembly language --- --- (c) The University of Glasgow 1993-2004 --- ------------------------------------------------------------------------------ - -#include "HsVersions.h" - -module PPC.Instr ( - archWordFormat, - RI(..), - Instr(..), - stackFrameHeaderSize, - maxSpillSlots, - allocMoreStack, - makeFarBranches -) - -where - -import GhcPrelude - -import PPC.Regs -import PPC.Cond -import Instruction -import Format -import TargetReg -import RegClass -import Reg - -import GHC.Platform.Regs -import GHC.Cmm.BlockId -import GHC.Cmm.Dataflow.Collections -import GHC.Cmm.Dataflow.Label -import GHC.Driver.Session -import GHC.Cmm -import GHC.Cmm.Info -import FastString -import GHC.Cmm.CLabel -import Outputable -import GHC.Platform -import UniqFM (listToUFM, lookupUFM) -import UniqSupply - -import Control.Monad (replicateM) -import Data.Maybe (fromMaybe) - --------------------------------------------------------------------------------- --- Format of a PPC memory address. --- -archWordFormat :: Bool -> Format -archWordFormat is32Bit - | is32Bit = II32 - | otherwise = II64 - - --- | Instruction instance for powerpc -instance Instruction Instr where - regUsageOfInstr = ppc_regUsageOfInstr - patchRegsOfInstr = ppc_patchRegsOfInstr - isJumpishInstr = ppc_isJumpishInstr - jumpDestsOfInstr = ppc_jumpDestsOfInstr - patchJumpInstr = ppc_patchJumpInstr - mkSpillInstr = ppc_mkSpillInstr - mkLoadInstr = ppc_mkLoadInstr - takeDeltaInstr = ppc_takeDeltaInstr - isMetaInstr = ppc_isMetaInstr - mkRegRegMoveInstr _ = ppc_mkRegRegMoveInstr - takeRegRegMoveInstr = ppc_takeRegRegMoveInstr - mkJumpInstr = ppc_mkJumpInstr - mkStackAllocInstr = ppc_mkStackAllocInstr - mkStackDeallocInstr = ppc_mkStackDeallocInstr - - -ppc_mkStackAllocInstr :: Platform -> Int -> [Instr] -ppc_mkStackAllocInstr platform amount - = ppc_mkStackAllocInstr' platform (-amount) - -ppc_mkStackDeallocInstr :: Platform -> Int -> [Instr] -ppc_mkStackDeallocInstr platform amount - = ppc_mkStackAllocInstr' platform amount - -ppc_mkStackAllocInstr' :: Platform -> Int -> [Instr] -ppc_mkStackAllocInstr' platform amount - | fits16Bits amount - = [ LD fmt r0 (AddrRegImm sp zero) - , STU fmt r0 (AddrRegImm sp immAmount) - ] - | otherwise - = [ LD fmt r0 (AddrRegImm sp zero) - , ADDIS tmp sp (HA immAmount) - , ADD tmp tmp (RIImm (LO immAmount)) - , STU fmt r0 (AddrRegReg sp tmp) - ] - where - fmt = intFormat $ widthFromBytes (platformWordSizeInBytes platform) - zero = ImmInt 0 - tmp = tmpReg platform - immAmount = ImmInt amount - --- --- See note [extra spill slots] in X86/Instr.hs --- -allocMoreStack - :: Platform - -> Int - -> NatCmmDecl statics PPC.Instr.Instr - -> UniqSM (NatCmmDecl statics PPC.Instr.Instr, [(BlockId,BlockId)]) - -allocMoreStack _ _ top@(CmmData _ _) = return (top,[]) -allocMoreStack platform slots (CmmProc info lbl live (ListGraph code)) = do - let - infos = mapKeys info - entries = case code of - [] -> infos - BasicBlock entry _ : _ -- first block is the entry point - | entry `elem` infos -> infos - | otherwise -> entry : infos - - uniqs <- replicateM (length entries) getUniqueM - - let - delta = ((x + stackAlign - 1) `quot` stackAlign) * stackAlign -- round up - where x = slots * spillSlotSize -- sp delta - - alloc = mkStackAllocInstr platform delta - dealloc = mkStackDeallocInstr platform delta - - retargetList = (zip entries (map mkBlockId uniqs)) - - new_blockmap :: LabelMap BlockId - new_blockmap = mapFromList retargetList - - insert_stack_insns (BasicBlock id insns) - | Just new_blockid <- mapLookup id new_blockmap - = [ BasicBlock id $ alloc ++ [BCC ALWAYS new_blockid Nothing] - , BasicBlock new_blockid block' - ] - | otherwise - = [ BasicBlock id block' ] - where - block' = foldr insert_dealloc [] insns - - insert_dealloc insn r - -- BCTR might or might not be a non-local jump. For - -- "labeled-goto" we use JMP, and for "computed-goto" we - -- use MTCTR followed by BCTR. See 'PPC.CodeGen.genJump'. - = case insn of - JMP _ _ -> dealloc ++ (insn : r) - BCTR [] Nothing _ -> dealloc ++ (insn : r) - BCTR ids label rs -> BCTR (map (fmap retarget) ids) label rs : r - BCCFAR cond b p -> BCCFAR cond (retarget b) p : r - BCC cond b p -> BCC cond (retarget b) p : r - _ -> insn : r - -- BL and BCTRL are call-like instructions rather than - -- jumps, and are used only for C calls. - - retarget :: BlockId -> BlockId - retarget b - = fromMaybe b (mapLookup b new_blockmap) - - new_code - = concatMap insert_stack_insns code - - -- in - return (CmmProc info lbl live (ListGraph new_code),retargetList) - - --- ----------------------------------------------------------------------------- --- Machine's assembly language - --- We have a few common "instructions" (nearly all the pseudo-ops) but --- mostly all of 'Instr' is machine-specific. - --- Register or immediate -data RI - = RIReg Reg - | RIImm Imm - -data Instr - -- comment pseudo-op - = COMMENT FastString - - -- some static data spat out during code - -- generation. Will be extracted before - -- pretty-printing. - | LDATA Section RawCmmStatics - - -- start a new basic block. Useful during - -- codegen, removed later. Preceding - -- instruction should be a jump, as per the - -- invariants for a BasicBlock (see Cmm). - | NEWBLOCK BlockId - - -- specify current stack offset for - -- benefit of subsequent passes - | DELTA Int - - -- Loads and stores. - | LD Format Reg AddrMode -- Load format, dst, src - | LDFAR Format Reg AddrMode -- Load format, dst, src 32 bit offset - | LDR Format Reg AddrMode -- Load and reserve format, dst, src - | LA Format Reg AddrMode -- Load arithmetic format, dst, src - | ST Format Reg AddrMode -- Store format, src, dst - | STFAR Format Reg AddrMode -- Store format, src, dst 32 bit offset - | STU Format Reg AddrMode -- Store with Update format, src, dst - | STC Format Reg AddrMode -- Store conditional format, src, dst - | LIS Reg Imm -- Load Immediate Shifted dst, src - | LI Reg Imm -- Load Immediate dst, src - | MR Reg Reg -- Move Register dst, src -- also for fmr - - | CMP Format Reg RI -- format, src1, src2 - | CMPL Format Reg RI -- format, src1, src2 - - | BCC Cond BlockId (Maybe Bool) -- cond, block, hint - | BCCFAR Cond BlockId (Maybe Bool) -- cond, block, hint - -- hint: - -- Just True: branch likely taken - -- Just False: branch likely not taken - -- Nothing: no hint - | JMP CLabel [Reg] -- same as branch, - -- but with CLabel instead of block ID - -- and live global registers - | MTCTR Reg - | BCTR [Maybe BlockId] (Maybe CLabel) [Reg] - -- with list of local destinations, and - -- jump table location if necessary - | BL CLabel [Reg] -- with list of argument regs - | BCTRL [Reg] - - | ADD Reg Reg RI -- dst, src1, src2 - | ADDO Reg Reg Reg -- add and set overflow - | ADDC Reg Reg Reg -- (carrying) dst, src1, src2 - | ADDE Reg Reg Reg -- (extended) dst, src1, src2 - | ADDZE Reg Reg -- (to zero extended) dst, src - | ADDIS Reg Reg Imm -- Add Immediate Shifted dst, src1, src2 - | SUBF Reg Reg Reg -- dst, src1, src2 ; dst = src2 - src1 - | SUBFO Reg Reg Reg -- subtract from and set overflow - | SUBFC Reg Reg RI -- (carrying) dst, src1, src2 ; - -- dst = src2 - src1 - | SUBFE Reg Reg Reg -- (extended) dst, src1, src2 ; - -- dst = src2 - src1 - | MULL Format Reg Reg RI - | MULLO Format Reg Reg Reg -- multiply and set overflow - | MFOV Format Reg -- move overflow bit (1|33) to register - -- pseudo-instruction; pretty printed as - -- mfxer dst - -- extr[w|d]i dst, dst, 1, [1|33] - | MULHU Format Reg Reg Reg - | DIV Format Bool Reg Reg Reg - | AND Reg Reg RI -- dst, src1, src2 - | ANDC Reg Reg Reg -- AND with complement, dst = src1 & ~ src2 - | NAND Reg Reg Reg -- dst, src1, src2 - | OR Reg Reg RI -- dst, src1, src2 - | ORIS Reg Reg Imm -- OR Immediate Shifted dst, src1, src2 - | XOR Reg Reg RI -- dst, src1, src2 - | XORIS Reg Reg Imm -- XOR Immediate Shifted dst, src1, src2 - - | EXTS Format Reg Reg - | CNTLZ Format Reg Reg - - | NEG Reg Reg - | NOT Reg Reg - - | SL Format Reg Reg RI -- shift left - | SR Format Reg Reg RI -- shift right - | SRA Format Reg Reg RI -- shift right arithmetic - - | RLWINM Reg Reg Int Int Int -- Rotate Left Word Immediate then AND with Mask - | CLRLI Format Reg Reg Int -- clear left immediate (extended mnemonic) - | CLRRI Format Reg Reg Int -- clear right immediate (extended mnemonic) - - | FADD Format Reg Reg Reg - | FSUB Format Reg Reg Reg - | FMUL Format Reg Reg Reg - | FDIV Format Reg Reg Reg - | FABS Reg Reg -- abs is the same for single and double - | FNEG Reg Reg -- negate is the same for single and double prec. - - | FCMP Reg Reg - - | FCTIWZ Reg Reg -- convert to integer word - | FCTIDZ Reg Reg -- convert to integer double word - | FCFID Reg Reg -- convert from integer double word - | FRSP Reg Reg -- reduce to single precision - -- (but destination is a FP register) - - | CRNOR Int Int Int -- condition register nor - | MFCR Reg -- move from condition register - - | MFLR Reg -- move from link register - | FETCHPC Reg -- pseudo-instruction: - -- bcl to next insn, mflr reg - | HWSYNC -- heavy weight sync - | ISYNC -- instruction synchronize - | LWSYNC -- memory barrier - | NOP -- no operation, PowerPC 64 bit - -- needs this as place holder to - -- reload TOC pointer - --- | Get the registers that are being used by this instruction. --- regUsage doesn't need to do any trickery for jumps and such. --- Just state precisely the regs read and written by that insn. --- The consequences of control flow transfers, as far as register --- allocation goes, are taken care of by the register allocator. --- -ppc_regUsageOfInstr :: Platform -> Instr -> RegUsage -ppc_regUsageOfInstr platform instr - = case instr of - LD _ reg addr -> usage (regAddr addr, [reg]) - LDFAR _ reg addr -> usage (regAddr addr, [reg]) - LDR _ reg addr -> usage (regAddr addr, [reg]) - LA _ reg addr -> usage (regAddr addr, [reg]) - ST _ reg addr -> usage (reg : regAddr addr, []) - STFAR _ reg addr -> usage (reg : regAddr addr, []) - STU _ reg addr -> usage (reg : regAddr addr, []) - STC _ reg addr -> usage (reg : regAddr addr, []) - LIS reg _ -> usage ([], [reg]) - LI reg _ -> usage ([], [reg]) - MR reg1 reg2 -> usage ([reg2], [reg1]) - CMP _ reg ri -> usage (reg : regRI ri,[]) - CMPL _ reg ri -> usage (reg : regRI ri,[]) - BCC _ _ _ -> noUsage - BCCFAR _ _ _ -> noUsage - JMP _ regs -> usage (regs, []) - MTCTR reg -> usage ([reg],[]) - BCTR _ _ regs -> usage (regs, []) - BL _ params -> usage (params, callClobberedRegs platform) - BCTRL params -> usage (params, callClobberedRegs platform) - - ADD reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) - ADDO reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) - ADDC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) - ADDE reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) - ADDZE reg1 reg2 -> usage ([reg2], [reg1]) - ADDIS reg1 reg2 _ -> usage ([reg2], [reg1]) - SUBF reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) - SUBFO reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) - SUBFC reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) - SUBFE reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) - MULL _ reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) - MULLO _ reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) - MFOV _ reg -> usage ([], [reg]) - MULHU _ reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) - DIV _ _ reg1 reg2 reg3 - -> usage ([reg2,reg3], [reg1]) - - AND reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) - ANDC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) - NAND reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) - OR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) - ORIS reg1 reg2 _ -> usage ([reg2], [reg1]) - XOR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) - XORIS reg1 reg2 _ -> usage ([reg2], [reg1]) - EXTS _ reg1 reg2 -> usage ([reg2], [reg1]) - CNTLZ _ reg1 reg2 -> usage ([reg2], [reg1]) - NEG reg1 reg2 -> usage ([reg2], [reg1]) - NOT reg1 reg2 -> usage ([reg2], [reg1]) - SL _ reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) - SR _ reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) - SRA _ reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) - RLWINM reg1 reg2 _ _ _ -> usage ([reg2], [reg1]) - CLRLI _ reg1 reg2 _ -> usage ([reg2], [reg1]) - CLRRI _ reg1 reg2 _ -> usage ([reg2], [reg1]) - - FADD _ r1 r2 r3 -> usage ([r2,r3], [r1]) - FSUB _ r1 r2 r3 -> usage ([r2,r3], [r1]) - FMUL _ r1 r2 r3 -> usage ([r2,r3], [r1]) - FDIV _ r1 r2 r3 -> usage ([r2,r3], [r1]) - FABS r1 r2 -> usage ([r2], [r1]) - FNEG r1 r2 -> usage ([r2], [r1]) - FCMP r1 r2 -> usage ([r1,r2], []) - FCTIWZ r1 r2 -> usage ([r2], [r1]) - FCTIDZ r1 r2 -> usage ([r2], [r1]) - FCFID r1 r2 -> usage ([r2], [r1]) - FRSP r1 r2 -> usage ([r2], [r1]) - MFCR reg -> usage ([], [reg]) - MFLR reg -> usage ([], [reg]) - FETCHPC reg -> usage ([], [reg]) - _ -> noUsage - where - usage (src, dst) = RU (filter (interesting platform) src) - (filter (interesting platform) dst) - regAddr (AddrRegReg r1 r2) = [r1, r2] - regAddr (AddrRegImm r1 _) = [r1] - - regRI (RIReg r) = [r] - regRI _ = [] - -interesting :: Platform -> Reg -> Bool -interesting _ (RegVirtual _) = True -interesting platform (RegReal (RealRegSingle i)) = freeReg platform i -interesting _ (RegReal (RealRegPair{})) - = panic "PPC.Instr.interesting: no reg pairs on this arch" - - - --- | Apply a given mapping to all the register references in this --- instruction. -ppc_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr -ppc_patchRegsOfInstr instr env - = case instr of - LD fmt reg addr -> LD fmt (env reg) (fixAddr addr) - LDFAR fmt reg addr -> LDFAR fmt (env reg) (fixAddr addr) - LDR fmt reg addr -> LDR fmt (env reg) (fixAddr addr) - LA fmt reg addr -> LA fmt (env reg) (fixAddr addr) - ST fmt reg addr -> ST fmt (env reg) (fixAddr addr) - STFAR fmt reg addr -> STFAR fmt (env reg) (fixAddr addr) - STU fmt reg addr -> STU fmt (env reg) (fixAddr addr) - STC fmt reg addr -> STC fmt (env reg) (fixAddr addr) - LIS reg imm -> LIS (env reg) imm - LI reg imm -> LI (env reg) imm - MR reg1 reg2 -> MR (env reg1) (env reg2) - CMP fmt reg ri -> CMP fmt (env reg) (fixRI ri) - CMPL fmt reg ri -> CMPL fmt (env reg) (fixRI ri) - BCC cond lbl p -> BCC cond lbl p - BCCFAR cond lbl p -> BCCFAR cond lbl p - JMP l regs -> JMP l regs -- global regs will not be remapped - MTCTR reg -> MTCTR (env reg) - BCTR targets lbl rs -> BCTR targets lbl rs - BL imm argRegs -> BL imm argRegs -- argument regs - BCTRL argRegs -> BCTRL argRegs -- cannot be remapped - ADD reg1 reg2 ri -> ADD (env reg1) (env reg2) (fixRI ri) - ADDO reg1 reg2 reg3 -> ADDO (env reg1) (env reg2) (env reg3) - ADDC reg1 reg2 reg3 -> ADDC (env reg1) (env reg2) (env reg3) - ADDE reg1 reg2 reg3 -> ADDE (env reg1) (env reg2) (env reg3) - ADDZE reg1 reg2 -> ADDZE (env reg1) (env reg2) - ADDIS reg1 reg2 imm -> ADDIS (env reg1) (env reg2) imm - SUBF reg1 reg2 reg3 -> SUBF (env reg1) (env reg2) (env reg3) - SUBFO reg1 reg2 reg3 -> SUBFO (env reg1) (env reg2) (env reg3) - SUBFC reg1 reg2 ri -> SUBFC (env reg1) (env reg2) (fixRI ri) - SUBFE reg1 reg2 reg3 -> SUBFE (env reg1) (env reg2) (env reg3) - MULL fmt reg1 reg2 ri - -> MULL fmt (env reg1) (env reg2) (fixRI ri) - MULLO fmt reg1 reg2 reg3 - -> MULLO fmt (env reg1) (env reg2) (env reg3) - MFOV fmt reg -> MFOV fmt (env reg) - MULHU fmt reg1 reg2 reg3 - -> MULHU fmt (env reg1) (env reg2) (env reg3) - DIV fmt sgn reg1 reg2 reg3 - -> DIV fmt sgn (env reg1) (env reg2) (env reg3) - - AND reg1 reg2 ri -> AND (env reg1) (env reg2) (fixRI ri) - ANDC reg1 reg2 reg3 -> ANDC (env reg1) (env reg2) (env reg3) - NAND reg1 reg2 reg3 -> NAND (env reg1) (env reg2) (env reg3) - OR reg1 reg2 ri -> OR (env reg1) (env reg2) (fixRI ri) - ORIS reg1 reg2 imm -> ORIS (env reg1) (env reg2) imm - XOR reg1 reg2 ri -> XOR (env reg1) (env reg2) (fixRI ri) - XORIS reg1 reg2 imm -> XORIS (env reg1) (env reg2) imm - EXTS fmt reg1 reg2 -> EXTS fmt (env reg1) (env reg2) - CNTLZ fmt reg1 reg2 -> CNTLZ fmt (env reg1) (env reg2) - NEG reg1 reg2 -> NEG (env reg1) (env reg2) - NOT reg1 reg2 -> NOT (env reg1) (env reg2) - SL fmt reg1 reg2 ri - -> SL fmt (env reg1) (env reg2) (fixRI ri) - SR fmt reg1 reg2 ri - -> SR fmt (env reg1) (env reg2) (fixRI ri) - SRA fmt reg1 reg2 ri - -> SRA fmt (env reg1) (env reg2) (fixRI ri) - RLWINM reg1 reg2 sh mb me - -> RLWINM (env reg1) (env reg2) sh mb me - CLRLI fmt reg1 reg2 n -> CLRLI fmt (env reg1) (env reg2) n - CLRRI fmt reg1 reg2 n -> CLRRI fmt (env reg1) (env reg2) n - FADD fmt r1 r2 r3 -> FADD fmt (env r1) (env r2) (env r3) - FSUB fmt r1 r2 r3 -> FSUB fmt (env r1) (env r2) (env r3) - FMUL fmt r1 r2 r3 -> FMUL fmt (env r1) (env r2) (env r3) - FDIV fmt r1 r2 r3 -> FDIV fmt (env r1) (env r2) (env r3) - FABS r1 r2 -> FABS (env r1) (env r2) - FNEG r1 r2 -> FNEG (env r1) (env r2) - FCMP r1 r2 -> FCMP (env r1) (env r2) - FCTIWZ r1 r2 -> FCTIWZ (env r1) (env r2) - FCTIDZ r1 r2 -> FCTIDZ (env r1) (env r2) - FCFID r1 r2 -> FCFID (env r1) (env r2) - FRSP r1 r2 -> FRSP (env r1) (env r2) - MFCR reg -> MFCR (env reg) - MFLR reg -> MFLR (env reg) - FETCHPC reg -> FETCHPC (env reg) - _ -> instr - where - fixAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2) - fixAddr (AddrRegImm r1 i) = AddrRegImm (env r1) i - - fixRI (RIReg r) = RIReg (env r) - fixRI other = other - - --------------------------------------------------------------------------------- --- | Checks whether this instruction is a jump/branch instruction. --- One that can change the flow of control in a way that the --- register allocator needs to worry about. -ppc_isJumpishInstr :: Instr -> Bool -ppc_isJumpishInstr instr - = case instr of - BCC{} -> True - BCCFAR{} -> True - BCTR{} -> True - BCTRL{} -> True - BL{} -> True - JMP{} -> True - _ -> False - - --- | Checks whether this instruction is a jump/branch instruction. --- One that can change the flow of control in a way that the --- register allocator needs to worry about. -ppc_jumpDestsOfInstr :: Instr -> [BlockId] -ppc_jumpDestsOfInstr insn - = case insn of - BCC _ id _ -> [id] - BCCFAR _ id _ -> [id] - BCTR targets _ _ -> [id | Just id <- targets] - _ -> [] - - --- | Change the destination of this jump instruction. --- Used in the linear allocator when adding fixup blocks for join --- points. -ppc_patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr -ppc_patchJumpInstr insn patchF - = case insn of - BCC cc id p -> BCC cc (patchF id) p - BCCFAR cc id p -> BCCFAR cc (patchF id) p - BCTR ids lbl rs -> BCTR (map (fmap patchF) ids) lbl rs - _ -> insn - - --- ----------------------------------------------------------------------------- - --- | An instruction to spill a register into a spill slot. -ppc_mkSpillInstr - :: DynFlags - -> Reg -- register to spill - -> Int -- current stack delta - -> Int -- spill slot to use - -> Instr - -ppc_mkSpillInstr dflags reg delta slot - = let platform = targetPlatform dflags - off = spillSlotToOffset dflags slot - arch = platformArch platform - in - let fmt = case targetClassOfReg platform reg of - RcInteger -> case arch of - ArchPPC -> II32 - _ -> II64 - RcDouble -> FF64 - _ -> panic "PPC.Instr.mkSpillInstr: no match" - instr = case makeImmediate W32 True (off-delta) of - Just _ -> ST - Nothing -> STFAR -- pseudo instruction: 32 bit offsets - - in instr fmt reg (AddrRegImm sp (ImmInt (off-delta))) - - -ppc_mkLoadInstr - :: DynFlags - -> Reg -- register to load - -> Int -- current stack delta - -> Int -- spill slot to use - -> Instr - -ppc_mkLoadInstr dflags reg delta slot - = let platform = targetPlatform dflags - off = spillSlotToOffset dflags slot - arch = platformArch platform - in - let fmt = case targetClassOfReg platform reg of - RcInteger -> case arch of - ArchPPC -> II32 - _ -> II64 - RcDouble -> FF64 - _ -> panic "PPC.Instr.mkLoadInstr: no match" - instr = case makeImmediate W32 True (off-delta) of - Just _ -> LD - Nothing -> LDFAR -- pseudo instruction: 32 bit offsets - - in instr fmt reg (AddrRegImm sp (ImmInt (off-delta))) - - --- | The size of a minimal stackframe header including minimal --- parameter save area. -stackFrameHeaderSize :: DynFlags -> Int -stackFrameHeaderSize dflags - = case platformOS platform of - OSAIX -> 24 + 8 * 4 - _ -> case platformArch platform of - -- header + parameter save area - ArchPPC -> 64 -- TODO: check ABI spec - ArchPPC_64 ELF_V1 -> 48 + 8 * 8 - ArchPPC_64 ELF_V2 -> 32 + 8 * 8 - _ -> panic "PPC.stackFrameHeaderSize: not defined for this OS" - where platform = targetPlatform dflags - --- | The maximum number of bytes required to spill a register. PPC32 --- has 32-bit GPRs and 64-bit FPRs, while PPC64 has 64-bit GPRs and --- 64-bit FPRs. So the maximum is 8 regardless of platforms unlike --- x86. Note that AltiVec's vector registers are 128-bit wide so we --- must not use this to spill them. -spillSlotSize :: Int -spillSlotSize = 8 - --- | The number of spill slots available without allocating more. -maxSpillSlots :: DynFlags -> Int -maxSpillSlots dflags - = ((rESERVED_C_STACK_BYTES dflags - stackFrameHeaderSize dflags) - `div` spillSlotSize) - 1 --- = 0 -- useful for testing allocMoreStack - --- | The number of bytes that the stack pointer should be aligned --- to. This is 16 both on PPC32 and PPC64 ELF (see ELF processor --- specific supplements). -stackAlign :: Int -stackAlign = 16 - --- | Convert a spill slot number to a *byte* offset, with no sign. -spillSlotToOffset :: DynFlags -> Int -> Int -spillSlotToOffset dflags slot - = stackFrameHeaderSize dflags + spillSlotSize * slot - - --------------------------------------------------------------------------------- --- | See if this instruction is telling us the current C stack delta -ppc_takeDeltaInstr - :: Instr - -> Maybe Int - -ppc_takeDeltaInstr instr - = case instr of - DELTA i -> Just i - _ -> Nothing - - -ppc_isMetaInstr - :: Instr - -> Bool - -ppc_isMetaInstr instr - = case instr of - COMMENT{} -> True - LDATA{} -> True - NEWBLOCK{} -> True - DELTA{} -> True - _ -> False - - --- | Copy the value in a register to another one. --- Must work for all register classes. -ppc_mkRegRegMoveInstr - :: Reg - -> Reg - -> Instr - -ppc_mkRegRegMoveInstr src dst - = MR dst src - - --- | Make an unconditional jump instruction. -ppc_mkJumpInstr - :: BlockId - -> [Instr] - -ppc_mkJumpInstr id - = [BCC ALWAYS id Nothing] - - --- | Take the source and destination from this reg -> reg move instruction --- or Nothing if it's not one -ppc_takeRegRegMoveInstr :: Instr -> Maybe (Reg,Reg) -ppc_takeRegRegMoveInstr (MR dst src) = Just (src,dst) -ppc_takeRegRegMoveInstr _ = Nothing - --- ----------------------------------------------------------------------------- --- Making far branches - --- Conditional branches on PowerPC are limited to +-32KB; if our Procs get too --- big, we have to work around this limitation. - -makeFarBranches - :: LabelMap RawCmmStatics - -> [NatBasicBlock Instr] - -> [NatBasicBlock Instr] -makeFarBranches info_env blocks - | last blockAddresses < nearLimit = blocks - | otherwise = zipWith handleBlock blockAddresses blocks - where - blockAddresses = scanl (+) 0 $ map blockLen blocks - blockLen (BasicBlock _ instrs) = length instrs - - handleBlock addr (BasicBlock id instrs) - = BasicBlock id (zipWith makeFar [addr..] instrs) - - makeFar _ (BCC ALWAYS tgt _) = BCC ALWAYS tgt Nothing - makeFar addr (BCC cond tgt p) - | abs (addr - targetAddr) >= nearLimit - = BCCFAR cond tgt p - | otherwise - = BCC cond tgt p - where Just targetAddr = lookupUFM blockAddressMap tgt - makeFar _ other = other - - -- 8192 instructions are allowed; let's keep some distance, as - -- we have a few pseudo-insns that are pretty-printed as - -- multiple instructions, and it's just not worth the effort - -- to calculate things exactly - nearLimit = 7000 - mapSize info_env * maxRetInfoTableSizeW - - blockAddressMap = listToUFM $ zip (map blockId blocks) blockAddresses diff --git a/compiler/nativeGen/PPC/Ppr.hs b/compiler/nativeGen/PPC/Ppr.hs deleted file mode 100644 index 8b81274db9..0000000000 --- a/compiler/nativeGen/PPC/Ppr.hs +++ /dev/null @@ -1,994 +0,0 @@ ------------------------------------------------------------------------------ --- --- Pretty-printing assembly language --- --- (c) The University of Glasgow 1993-2005 --- ------------------------------------------------------------------------------ - -{-# OPTIONS_GHC -fno-warn-orphans #-} -module PPC.Ppr (pprNatCmmDecl) where - -import GhcPrelude - -import PPC.Regs -import PPC.Instr -import PPC.Cond -import PprBase -import Instruction -import Format -import Reg -import RegClass -import TargetReg - -import GHC.Cmm hiding (topInfoTable) -import GHC.Cmm.Dataflow.Collections -import GHC.Cmm.Dataflow.Label - -import GHC.Cmm.BlockId -import GHC.Cmm.CLabel -import GHC.Cmm.Ppr.Expr () -- For Outputable instances - -import Unique ( pprUniqueAlways, getUnique ) -import GHC.Platform -import FastString -import Outputable -import GHC.Driver.Session - -import Data.Word -import Data.Int -import Data.Bits - --- ----------------------------------------------------------------------------- --- Printing this stuff out - -pprNatCmmDecl :: NatCmmDecl RawCmmStatics Instr -> SDoc -pprNatCmmDecl (CmmData section dats) = - pprSectionAlign section $$ pprDatas dats - -pprNatCmmDecl proc@(CmmProc top_info lbl _ (ListGraph blocks)) = - case topInfoTable proc of - Nothing -> - sdocWithPlatform $ \platform -> - -- special case for code without info table: - pprSectionAlign (Section Text lbl) $$ - (case platformArch platform of - ArchPPC_64 ELF_V1 -> pprFunctionDescriptor lbl - ArchPPC_64 ELF_V2 -> pprFunctionPrologue lbl - _ -> pprLabel lbl) $$ -- blocks guaranteed not null, - -- so label needed - vcat (map (pprBasicBlock top_info) blocks) - - Just (RawCmmStatics info_lbl _) -> - sdocWithPlatform $ \platform -> - pprSectionAlign (Section Text info_lbl) $$ - (if platformHasSubsectionsViaSymbols platform - then ppr (mkDeadStripPreventer info_lbl) <> char ':' - else empty) $$ - vcat (map (pprBasicBlock top_info) blocks) $$ - -- above: Even the first block gets a label, because with branch-chain - -- elimination, it might be the target of a goto. - (if platformHasSubsectionsViaSymbols platform - then - -- See Note [Subsections Via Symbols] in X86/Ppr.hs - text "\t.long " - <+> ppr info_lbl - <+> char '-' - <+> ppr (mkDeadStripPreventer info_lbl) - else empty) - -pprFunctionDescriptor :: CLabel -> SDoc -pprFunctionDescriptor lab = pprGloblDecl lab - $$ text "\t.section \".opd\", \"aw\"" - $$ text "\t.align 3" - $$ ppr lab <> char ':' - $$ text "\t.quad ." - <> ppr lab - <> text ",.TOC.@tocbase,0" - $$ text "\t.previous" - $$ text "\t.type" - <+> ppr lab - <> text ", @function" - $$ char '.' <> ppr lab <> char ':' - -pprFunctionPrologue :: CLabel ->SDoc -pprFunctionPrologue lab = pprGloblDecl lab - $$ text ".type " - <> ppr lab - <> text ", @function" - $$ ppr lab <> char ':' - $$ text "0:\taddis\t" <> pprReg toc - <> text ",12,.TOC.-0b@ha" - $$ text "\taddi\t" <> pprReg toc - <> char ',' <> pprReg toc <> text ",.TOC.-0b@l" - $$ text "\t.localentry\t" <> ppr lab - <> text ",.-" <> ppr lab - -pprBasicBlock :: LabelMap RawCmmStatics -> NatBasicBlock Instr -> SDoc -pprBasicBlock info_env (BasicBlock blockid instrs) - = maybe_infotable $$ - pprLabel (blockLbl blockid) $$ - vcat (map pprInstr instrs) - where - maybe_infotable = case mapLookup blockid info_env of - Nothing -> empty - Just (RawCmmStatics info_lbl info) -> - pprAlignForSection Text $$ - vcat (map pprData info) $$ - pprLabel info_lbl - - - -pprDatas :: RawCmmStatics -> SDoc --- See note [emit-time elimination of static indirections] in CLabel. -pprDatas (RawCmmStatics alias [CmmStaticLit (CmmLabel lbl), CmmStaticLit ind, _, _]) - | lbl == mkIndStaticInfoLabel - , let labelInd (CmmLabelOff l _) = Just l - labelInd (CmmLabel l) = Just l - labelInd _ = Nothing - , Just ind' <- labelInd ind - , alias `mayRedirectTo` ind' - = pprGloblDecl alias - $$ text ".equiv" <+> ppr alias <> comma <> ppr (CmmLabel ind') -pprDatas (RawCmmStatics lbl dats) = vcat (pprLabel lbl : map pprData dats) - -pprData :: CmmStatic -> SDoc -pprData (CmmString str) = pprBytes str -pprData (CmmUninitialised bytes) = text ".space " <> int bytes -pprData (CmmStaticLit lit) = pprDataItem lit - -pprGloblDecl :: CLabel -> SDoc -pprGloblDecl lbl - | not (externallyVisibleCLabel lbl) = empty - | otherwise = text ".globl " <> ppr lbl - -pprTypeAndSizeDecl :: CLabel -> SDoc -pprTypeAndSizeDecl lbl - = sdocWithPlatform $ \platform -> - if platformOS platform == OSLinux && externallyVisibleCLabel lbl - then text ".type " <> - ppr lbl <> text ", @object" - else empty - -pprLabel :: CLabel -> SDoc -pprLabel lbl = pprGloblDecl lbl - $$ pprTypeAndSizeDecl lbl - $$ (ppr lbl <> char ':') - --- ----------------------------------------------------------------------------- --- pprInstr: print an 'Instr' - -instance Outputable Instr where - ppr instr = pprInstr instr - - -pprReg :: Reg -> SDoc - -pprReg r - = case r of - RegReal (RealRegSingle i) -> ppr_reg_no i - RegReal (RealRegPair{}) -> panic "PPC.pprReg: no reg pairs on this arch" - RegVirtual (VirtualRegI u) -> text "%vI_" <> pprUniqueAlways u - RegVirtual (VirtualRegHi u) -> text "%vHi_" <> pprUniqueAlways u - RegVirtual (VirtualRegF u) -> text "%vF_" <> pprUniqueAlways u - RegVirtual (VirtualRegD u) -> text "%vD_" <> pprUniqueAlways u - - where - ppr_reg_no :: Int -> SDoc - ppr_reg_no i - | i <= 31 = int i -- GPRs - | i <= 63 = int (i-32) -- FPRs - | otherwise = text "very naughty powerpc register" - - - -pprFormat :: Format -> SDoc -pprFormat x - = ptext (case x of - II8 -> sLit "b" - II16 -> sLit "h" - II32 -> sLit "w" - II64 -> sLit "d" - FF32 -> sLit "fs" - FF64 -> sLit "fd") - - -pprCond :: Cond -> SDoc -pprCond c - = ptext (case c of { - ALWAYS -> sLit ""; - EQQ -> sLit "eq"; NE -> sLit "ne"; - LTT -> sLit "lt"; GE -> sLit "ge"; - GTT -> sLit "gt"; LE -> sLit "le"; - LU -> sLit "lt"; GEU -> sLit "ge"; - GU -> sLit "gt"; LEU -> sLit "le"; }) - - -pprImm :: Imm -> SDoc - -pprImm (ImmInt i) = int i -pprImm (ImmInteger i) = integer i -pprImm (ImmCLbl l) = ppr l -pprImm (ImmIndex l i) = ppr l <> char '+' <> int i -pprImm (ImmLit s) = s - -pprImm (ImmFloat _) = text "naughty float immediate" -pprImm (ImmDouble _) = text "naughty double immediate" - -pprImm (ImmConstantSum a b) = pprImm a <> char '+' <> pprImm b -pprImm (ImmConstantDiff a b) = pprImm a <> char '-' - <> lparen <> pprImm b <> rparen - -pprImm (LO (ImmInt i)) = pprImm (LO (ImmInteger (toInteger i))) -pprImm (LO (ImmInteger i)) = pprImm (ImmInteger (toInteger lo16)) - where - lo16 = fromInteger (i .&. 0xffff) :: Int16 - -pprImm (LO i) - = pprImm i <> text "@l" - -pprImm (HI i) - = pprImm i <> text "@h" - -pprImm (HA (ImmInt i)) = pprImm (HA (ImmInteger (toInteger i))) -pprImm (HA (ImmInteger i)) = pprImm (ImmInteger ha16) - where - ha16 = if lo16 >= 0x8000 then hi16+1 else hi16 - hi16 = (i `shiftR` 16) - lo16 = i .&. 0xffff - -pprImm (HA i) - = pprImm i <> text "@ha" - -pprImm (HIGHERA i) - = pprImm i <> text "@highera" - -pprImm (HIGHESTA i) - = pprImm i <> text "@highesta" - - -pprAddr :: AddrMode -> SDoc -pprAddr (AddrRegReg r1 r2) - = pprReg r1 <> char ',' <+> pprReg r2 -pprAddr (AddrRegImm r1 (ImmInt i)) - = hcat [ int i, char '(', pprReg r1, char ')' ] -pprAddr (AddrRegImm r1 (ImmInteger i)) - = hcat [ integer i, char '(', pprReg r1, char ')' ] -pprAddr (AddrRegImm r1 imm) - = hcat [ pprImm imm, char '(', pprReg r1, char ')' ] - - -pprSectionAlign :: Section -> SDoc -pprSectionAlign sec@(Section seg _) = - sdocWithPlatform $ \platform -> - pprSectionHeader platform sec $$ - pprAlignForSection seg - --- | Print appropriate alignment for the given section type. -pprAlignForSection :: SectionType -> SDoc -pprAlignForSection seg = - sdocWithPlatform $ \platform -> - let ppc64 = not $ target32Bit platform - in ptext $ case seg of - Text -> sLit ".align 2" - Data - | ppc64 -> sLit ".align 3" - | otherwise -> sLit ".align 2" - ReadOnlyData - | ppc64 -> sLit ".align 3" - | otherwise -> sLit ".align 2" - RelocatableReadOnlyData - | ppc64 -> sLit ".align 3" - | otherwise -> sLit ".align 2" - UninitialisedData - | ppc64 -> sLit ".align 3" - | otherwise -> sLit ".align 2" - ReadOnlyData16 -> sLit ".align 4" - -- TODO: This is copied from the ReadOnlyData case, but it can likely be - -- made more efficient. - CString - | ppc64 -> sLit ".align 3" - | otherwise -> sLit ".align 2" - OtherSection _ -> panic "PprMach.pprSectionAlign: unknown section" - -pprDataItem :: CmmLit -> SDoc -pprDataItem lit - = sdocWithDynFlags $ \dflags -> - vcat (ppr_item (cmmTypeFormat $ cmmLitType dflags lit) lit dflags) - where - imm = litToImm lit - archPPC_64 dflags = not $ target32Bit $ targetPlatform dflags - - ppr_item II8 _ _ = [text "\t.byte\t" <> pprImm imm] - - ppr_item II32 _ _ = [text "\t.long\t" <> pprImm imm] - - ppr_item II64 _ dflags - | archPPC_64 dflags = [text "\t.quad\t" <> pprImm imm] - - - ppr_item FF32 (CmmFloat r _) _ - = let bs = floatToBytes (fromRational r) - in map (\b -> text "\t.byte\t" <> pprImm (ImmInt b)) bs - - ppr_item FF64 (CmmFloat r _) _ - = let bs = doubleToBytes (fromRational r) - in map (\b -> text "\t.byte\t" <> pprImm (ImmInt b)) bs - - ppr_item II16 _ _ = [text "\t.short\t" <> pprImm imm] - - ppr_item II64 (CmmInt x _) dflags - | not(archPPC_64 dflags) = - [text "\t.long\t" - <> int (fromIntegral - (fromIntegral (x `shiftR` 32) :: Word32)), - text "\t.long\t" - <> int (fromIntegral (fromIntegral x :: Word32))] - - ppr_item _ _ _ - = panic "PPC.Ppr.pprDataItem: no match" - - -pprInstr :: Instr -> SDoc - -pprInstr (COMMENT _) = empty -- nuke 'em -{- -pprInstr (COMMENT s) = - if platformOS platform == OSLinux - then text "# " <> ftext s - else text "; " <> ftext s --} -pprInstr (DELTA d) - = pprInstr (COMMENT (mkFastString ("\tdelta = " ++ show d))) - -pprInstr (NEWBLOCK _) - = panic "PprMach.pprInstr: NEWBLOCK" - -pprInstr (LDATA _ _) - = panic "PprMach.pprInstr: LDATA" - -{- -pprInstr (SPILL reg slot) - = hcat [ - text "\tSPILL", - char '\t', - pprReg reg, - comma, - text "SLOT" <> parens (int slot)] - -pprInstr (RELOAD slot reg) - = hcat [ - text "\tRELOAD", - char '\t', - text "SLOT" <> parens (int slot), - comma, - pprReg reg] --} - -pprInstr (LD fmt reg addr) = hcat [ - char '\t', - text "l", - ptext (case fmt of - II8 -> sLit "bz" - II16 -> sLit "hz" - II32 -> sLit "wz" - II64 -> sLit "d" - FF32 -> sLit "fs" - FF64 -> sLit "fd" - ), - case addr of AddrRegImm _ _ -> empty - AddrRegReg _ _ -> char 'x', - char '\t', - pprReg reg, - text ", ", - pprAddr addr - ] - -pprInstr (LDFAR fmt reg (AddrRegImm source off)) = - sdocWithPlatform $ \platform -> vcat [ - pprInstr (ADDIS (tmpReg platform) source (HA off)), - pprInstr (LD fmt reg (AddrRegImm (tmpReg platform) (LO off))) - ] -pprInstr (LDFAR _ _ _) = - panic "PPC.Ppr.pprInstr LDFAR: no match" - -pprInstr (LDR fmt reg1 addr) = hcat [ - text "\tl", - case fmt of - II32 -> char 'w' - II64 -> char 'd' - _ -> panic "PPC.Ppr.Instr LDR: no match", - text "arx\t", - pprReg reg1, - text ", ", - pprAddr addr - ] - -pprInstr (LA fmt reg addr) = hcat [ - char '\t', - text "l", - ptext (case fmt of - II8 -> sLit "ba" - II16 -> sLit "ha" - II32 -> sLit "wa" - II64 -> sLit "d" - FF32 -> sLit "fs" - FF64 -> sLit "fd" - ), - case addr of AddrRegImm _ _ -> empty - AddrRegReg _ _ -> char 'x', - char '\t', - pprReg reg, - text ", ", - pprAddr addr - ] -pprInstr (ST fmt reg addr) = hcat [ - char '\t', - text "st", - pprFormat fmt, - case addr of AddrRegImm _ _ -> empty - AddrRegReg _ _ -> char 'x', - char '\t', - pprReg reg, - text ", ", - pprAddr addr - ] -pprInstr (STFAR fmt reg (AddrRegImm source off)) = - sdocWithPlatform $ \platform -> vcat [ - pprInstr (ADDIS (tmpReg platform) source (HA off)), - pprInstr (ST fmt reg (AddrRegImm (tmpReg platform) (LO off))) - ] -pprInstr (STFAR _ _ _) = - panic "PPC.Ppr.pprInstr STFAR: no match" -pprInstr (STU fmt reg addr) = hcat [ - char '\t', - text "st", - pprFormat fmt, - char 'u', - case addr of AddrRegImm _ _ -> empty - AddrRegReg _ _ -> char 'x', - char '\t', - pprReg reg, - text ", ", - pprAddr addr - ] -pprInstr (STC fmt reg1 addr) = hcat [ - text "\tst", - case fmt of - II32 -> char 'w' - II64 -> char 'd' - _ -> panic "PPC.Ppr.Instr STC: no match", - text "cx.\t", - pprReg reg1, - text ", ", - pprAddr addr - ] -pprInstr (LIS reg imm) = hcat [ - char '\t', - text "lis", - char '\t', - pprReg reg, - text ", ", - pprImm imm - ] -pprInstr (LI reg imm) = hcat [ - char '\t', - text "li", - char '\t', - pprReg reg, - text ", ", - pprImm imm - ] -pprInstr (MR reg1 reg2) - | reg1 == reg2 = empty - | otherwise = hcat [ - char '\t', - sdocWithPlatform $ \platform -> - case targetClassOfReg platform reg1 of - RcInteger -> text "mr" - _ -> text "fmr", - char '\t', - pprReg reg1, - text ", ", - pprReg reg2 - ] -pprInstr (CMP fmt reg ri) = hcat [ - char '\t', - op, - char '\t', - pprReg reg, - text ", ", - pprRI ri - ] - where - op = hcat [ - text "cmp", - pprFormat fmt, - case ri of - RIReg _ -> empty - RIImm _ -> char 'i' - ] -pprInstr (CMPL fmt reg ri) = hcat [ - char '\t', - op, - char '\t', - pprReg reg, - text ", ", - pprRI ri - ] - where - op = hcat [ - text "cmpl", - pprFormat fmt, - case ri of - RIReg _ -> empty - RIImm _ -> char 'i' - ] -pprInstr (BCC cond blockid prediction) = hcat [ - char '\t', - text "b", - pprCond cond, - pprPrediction prediction, - char '\t', - ppr lbl - ] - where lbl = mkLocalBlockLabel (getUnique blockid) - pprPrediction p = case p of - Nothing -> empty - Just True -> char '+' - Just False -> char '-' - -pprInstr (BCCFAR cond blockid prediction) = vcat [ - hcat [ - text "\tb", - pprCond (condNegate cond), - neg_prediction, - text "\t$+8" - ], - hcat [ - text "\tb\t", - ppr lbl - ] - ] - where lbl = mkLocalBlockLabel (getUnique blockid) - neg_prediction = case prediction of - Nothing -> empty - Just True -> char '-' - Just False -> char '+' - -pprInstr (JMP lbl _) - -- We never jump to ForeignLabels; if we ever do, c.f. handling for "BL" - | isForeignLabel lbl = panic "PPC.Ppr.pprInstr: JMP to ForeignLabel" - | otherwise = - hcat [ -- an alias for b that takes a CLabel - char '\t', - text "b", - char '\t', - ppr lbl - ] - -pprInstr (MTCTR reg) = hcat [ - char '\t', - text "mtctr", - char '\t', - pprReg reg - ] -pprInstr (BCTR _ _ _) = hcat [ - char '\t', - text "bctr" - ] -pprInstr (BL lbl _) = do - sdocWithPlatform $ \platform -> case platformOS platform of - OSAIX -> - -- On AIX, "printf" denotes a function-descriptor (for use - -- by function pointers), whereas the actual entry-code - -- address is denoted by the dot-prefixed ".printf" label. - -- Moreover, the PPC NCG only ever emits a BL instruction - -- for calling C ABI functions. Most of the time these calls - -- originate from FFI imports and have a 'ForeignLabel', - -- but when profiling the codegen inserts calls via - -- 'emitRtsCallGen' which are 'CmmLabel's even though - -- they'd technically be more like 'ForeignLabel's. - hcat [ - text "\tbl\t.", - ppr lbl - ] - _ -> - hcat [ - text "\tbl\t", - ppr lbl - ] -pprInstr (BCTRL _) = hcat [ - char '\t', - text "bctrl" - ] -pprInstr (ADD reg1 reg2 ri) = pprLogic (sLit "add") reg1 reg2 ri -pprInstr (ADDIS reg1 reg2 imm) = hcat [ - char '\t', - text "addis", - char '\t', - pprReg reg1, - text ", ", - pprReg reg2, - text ", ", - pprImm imm - ] - -pprInstr (ADDO reg1 reg2 reg3) = pprLogic (sLit "addo") reg1 reg2 (RIReg reg3) -pprInstr (ADDC reg1 reg2 reg3) = pprLogic (sLit "addc") reg1 reg2 (RIReg reg3) -pprInstr (ADDE reg1 reg2 reg3) = pprLogic (sLit "adde") reg1 reg2 (RIReg reg3) -pprInstr (ADDZE reg1 reg2) = pprUnary (sLit "addze") reg1 reg2 -pprInstr (SUBF reg1 reg2 reg3) = pprLogic (sLit "subf") reg1 reg2 (RIReg reg3) -pprInstr (SUBFO reg1 reg2 reg3) = pprLogic (sLit "subfo") reg1 reg2 (RIReg reg3) -pprInstr (SUBFC reg1 reg2 ri) = hcat [ - char '\t', - text "subf", - case ri of - RIReg _ -> empty - RIImm _ -> char 'i', - text "c\t", - pprReg reg1, - text ", ", - pprReg reg2, - text ", ", - pprRI ri - ] -pprInstr (SUBFE reg1 reg2 reg3) = pprLogic (sLit "subfe") reg1 reg2 (RIReg reg3) -pprInstr (MULL fmt reg1 reg2 ri) = pprMul fmt reg1 reg2 ri -pprInstr (MULLO fmt reg1 reg2 reg3) = hcat [ - char '\t', - text "mull", - case fmt of - II32 -> char 'w' - II64 -> char 'd' - _ -> panic "PPC: illegal format", - text "o\t", - pprReg reg1, - text ", ", - pprReg reg2, - text ", ", - pprReg reg3 - ] -pprInstr (MFOV fmt reg) = vcat [ - hcat [ - char '\t', - text "mfxer", - char '\t', - pprReg reg - ], - hcat [ - char '\t', - text "extr", - case fmt of - II32 -> char 'w' - II64 -> char 'd' - _ -> panic "PPC: illegal format", - text "i\t", - pprReg reg, - text ", ", - pprReg reg, - text ", 1, ", - case fmt of - II32 -> text "1" - II64 -> text "33" - _ -> panic "PPC: illegal format" - ] - ] - -pprInstr (MULHU fmt reg1 reg2 reg3) = hcat [ - char '\t', - text "mulh", - case fmt of - II32 -> char 'w' - II64 -> char 'd' - _ -> panic "PPC: illegal format", - text "u\t", - pprReg reg1, - text ", ", - pprReg reg2, - text ", ", - pprReg reg3 - ] - -pprInstr (DIV fmt sgn reg1 reg2 reg3) = pprDiv fmt sgn reg1 reg2 reg3 - - -- for some reason, "andi" doesn't exist. - -- we'll use "andi." instead. -pprInstr (AND reg1 reg2 (RIImm imm)) = hcat [ - char '\t', - text "andi.", - char '\t', - pprReg reg1, - text ", ", - pprReg reg2, - text ", ", - pprImm imm - ] -pprInstr (AND reg1 reg2 ri) = pprLogic (sLit "and") reg1 reg2 ri -pprInstr (ANDC reg1 reg2 reg3) = pprLogic (sLit "andc") reg1 reg2 (RIReg reg3) -pprInstr (NAND reg1 reg2 reg3) = pprLogic (sLit "nand") reg1 reg2 (RIReg reg3) - -pprInstr (OR reg1 reg2 ri) = pprLogic (sLit "or") reg1 reg2 ri -pprInstr (XOR reg1 reg2 ri) = pprLogic (sLit "xor") reg1 reg2 ri - -pprInstr (ORIS reg1 reg2 imm) = hcat [ - char '\t', - text "oris", - char '\t', - pprReg reg1, - text ", ", - pprReg reg2, - text ", ", - pprImm imm - ] - -pprInstr (XORIS reg1 reg2 imm) = hcat [ - char '\t', - text "xoris", - char '\t', - pprReg reg1, - text ", ", - pprReg reg2, - text ", ", - pprImm imm - ] - -pprInstr (EXTS fmt reg1 reg2) = hcat [ - char '\t', - text "exts", - pprFormat fmt, - char '\t', - pprReg reg1, - text ", ", - pprReg reg2 - ] -pprInstr (CNTLZ fmt reg1 reg2) = hcat [ - char '\t', - text "cntlz", - case fmt of - II32 -> char 'w' - II64 -> char 'd' - _ -> panic "PPC: illegal format", - char '\t', - pprReg reg1, - text ", ", - pprReg reg2 - ] - -pprInstr (NEG reg1 reg2) = pprUnary (sLit "neg") reg1 reg2 -pprInstr (NOT reg1 reg2) = pprUnary (sLit "not") reg1 reg2 - -pprInstr (SR II32 reg1 reg2 (RIImm (ImmInt i))) | i < 0 || i > 31 = - -- Handle the case where we are asked to shift a 32 bit register by - -- less than zero or more than 31 bits. We convert this into a clear - -- of the destination register. - -- Fixes ticket https://gitlab.haskell.org/ghc/ghc/issues/5900 - pprInstr (XOR reg1 reg2 (RIReg reg2)) - -pprInstr (SL II32 reg1 reg2 (RIImm (ImmInt i))) | i < 0 || i > 31 = - -- As above for SR, but for left shifts. - -- Fixes ticket https://gitlab.haskell.org/ghc/ghc/issues/10870 - pprInstr (XOR reg1 reg2 (RIReg reg2)) - -pprInstr (SRA II32 reg1 reg2 (RIImm (ImmInt i))) | i > 31 = - -- PT: I don't know what to do for negative shift amounts: - -- For now just panic. - -- - -- For shift amounts greater than 31 set all bit to the - -- value of the sign bit, this also what sraw does. - pprInstr (SRA II32 reg1 reg2 (RIImm (ImmInt 31))) - -pprInstr (SL fmt reg1 reg2 ri) = - let op = case fmt of - II32 -> "slw" - II64 -> "sld" - _ -> panic "PPC.Ppr.pprInstr: shift illegal size" - in pprLogic (sLit op) reg1 reg2 (limitShiftRI fmt ri) - -pprInstr (SR fmt reg1 reg2 ri) = - let op = case fmt of - II32 -> "srw" - II64 -> "srd" - _ -> panic "PPC.Ppr.pprInstr: shift illegal size" - in pprLogic (sLit op) reg1 reg2 (limitShiftRI fmt ri) - -pprInstr (SRA fmt reg1 reg2 ri) = - let op = case fmt of - II32 -> "sraw" - II64 -> "srad" - _ -> panic "PPC.Ppr.pprInstr: shift illegal size" - in pprLogic (sLit op) reg1 reg2 (limitShiftRI fmt ri) - -pprInstr (RLWINM reg1 reg2 sh mb me) = hcat [ - text "\trlwinm\t", - pprReg reg1, - text ", ", - pprReg reg2, - text ", ", - int sh, - text ", ", - int mb, - text ", ", - int me - ] - -pprInstr (CLRLI fmt reg1 reg2 n) = hcat [ - text "\tclrl", - pprFormat fmt, - text "i ", - pprReg reg1, - text ", ", - pprReg reg2, - text ", ", - int n - ] -pprInstr (CLRRI fmt reg1 reg2 n) = hcat [ - text "\tclrr", - pprFormat fmt, - text "i ", - pprReg reg1, - text ", ", - pprReg reg2, - text ", ", - int n - ] - -pprInstr (FADD fmt reg1 reg2 reg3) = pprBinaryF (sLit "fadd") fmt reg1 reg2 reg3 -pprInstr (FSUB fmt reg1 reg2 reg3) = pprBinaryF (sLit "fsub") fmt reg1 reg2 reg3 -pprInstr (FMUL fmt reg1 reg2 reg3) = pprBinaryF (sLit "fmul") fmt reg1 reg2 reg3 -pprInstr (FDIV fmt reg1 reg2 reg3) = pprBinaryF (sLit "fdiv") fmt reg1 reg2 reg3 -pprInstr (FABS reg1 reg2) = pprUnary (sLit "fabs") reg1 reg2 -pprInstr (FNEG reg1 reg2) = pprUnary (sLit "fneg") reg1 reg2 - -pprInstr (FCMP reg1 reg2) = hcat [ - char '\t', - text "fcmpu\t0, ", - -- Note: we're using fcmpu, not fcmpo - -- The difference is with fcmpo, compare with NaN is an invalid operation. - -- We don't handle invalid fp ops, so we don't care. - -- Moreover, we use `fcmpu 0, ...` rather than `fcmpu cr0, ...` for - -- better portability since some non-GNU assembler (such as - -- IBM's `as`) tend not to support the symbolic register name cr0. - -- This matches the syntax that GCC seems to emit for PPC targets. - pprReg reg1, - text ", ", - pprReg reg2 - ] - -pprInstr (FCTIWZ reg1 reg2) = pprUnary (sLit "fctiwz") reg1 reg2 -pprInstr (FCTIDZ reg1 reg2) = pprUnary (sLit "fctidz") reg1 reg2 -pprInstr (FCFID reg1 reg2) = pprUnary (sLit "fcfid") reg1 reg2 -pprInstr (FRSP reg1 reg2) = pprUnary (sLit "frsp") reg1 reg2 - -pprInstr (CRNOR dst src1 src2) = hcat [ - text "\tcrnor\t", - int dst, - text ", ", - int src1, - text ", ", - int src2 - ] - -pprInstr (MFCR reg) = hcat [ - char '\t', - text "mfcr", - char '\t', - pprReg reg - ] - -pprInstr (MFLR reg) = hcat [ - char '\t', - text "mflr", - char '\t', - pprReg reg - ] - -pprInstr (FETCHPC reg) = vcat [ - text "\tbcl\t20,31,1f", - hcat [ text "1:\tmflr\t", pprReg reg ] - ] - -pprInstr HWSYNC = text "\tsync" - -pprInstr ISYNC = text "\tisync" - -pprInstr LWSYNC = text "\tlwsync" - -pprInstr NOP = text "\tnop" - - -pprLogic :: PtrString -> Reg -> Reg -> RI -> SDoc -pprLogic op reg1 reg2 ri = hcat [ - char '\t', - ptext op, - case ri of - RIReg _ -> empty - RIImm _ -> char 'i', - char '\t', - pprReg reg1, - text ", ", - pprReg reg2, - text ", ", - pprRI ri - ] - - -pprMul :: Format -> Reg -> Reg -> RI -> SDoc -pprMul fmt reg1 reg2 ri = hcat [ - char '\t', - text "mull", - case ri of - RIReg _ -> case fmt of - II32 -> char 'w' - II64 -> char 'd' - _ -> panic "PPC: illegal format" - RIImm _ -> char 'i', - char '\t', - pprReg reg1, - text ", ", - pprReg reg2, - text ", ", - pprRI ri - ] - - -pprDiv :: Format -> Bool -> Reg -> Reg -> Reg -> SDoc -pprDiv fmt sgn reg1 reg2 reg3 = hcat [ - char '\t', - text "div", - case fmt of - II32 -> char 'w' - II64 -> char 'd' - _ -> panic "PPC: illegal format", - if sgn then empty else char 'u', - char '\t', - pprReg reg1, - text ", ", - pprReg reg2, - text ", ", - pprReg reg3 - ] - - -pprUnary :: PtrString -> Reg -> Reg -> SDoc -pprUnary op reg1 reg2 = hcat [ - char '\t', - ptext op, - char '\t', - pprReg reg1, - text ", ", - pprReg reg2 - ] - - -pprBinaryF :: PtrString -> Format -> Reg -> Reg -> Reg -> SDoc -pprBinaryF op fmt reg1 reg2 reg3 = hcat [ - char '\t', - ptext op, - pprFFormat fmt, - char '\t', - pprReg reg1, - text ", ", - pprReg reg2, - text ", ", - pprReg reg3 - ] - -pprRI :: RI -> SDoc -pprRI (RIReg r) = pprReg r -pprRI (RIImm r) = pprImm r - - -pprFFormat :: Format -> SDoc -pprFFormat FF64 = empty -pprFFormat FF32 = char 's' -pprFFormat _ = panic "PPC.Ppr.pprFFormat: no match" - - -- limit immediate argument for shift instruction to range 0..63 - -- for 64 bit size and 0..32 otherwise -limitShiftRI :: Format -> RI -> RI -limitShiftRI II64 (RIImm (ImmInt i)) | i > 63 || i < 0 = - panic $ "PPC.Ppr: Shift by " ++ show i ++ " bits is not allowed." -limitShiftRI II32 (RIImm (ImmInt i)) | i > 31 || i < 0 = - panic $ "PPC.Ppr: 32 bit: Shift by " ++ show i ++ " bits is not allowed." -limitShiftRI _ x = x diff --git a/compiler/nativeGen/PPC/RegInfo.hs b/compiler/nativeGen/PPC/RegInfo.hs deleted file mode 100644 index c1a4e73e3d..0000000000 --- a/compiler/nativeGen/PPC/RegInfo.hs +++ /dev/null @@ -1,80 +0,0 @@ -{-# LANGUAGE CPP #-} - ------------------------------------------------------------------------------ --- --- Machine-specific parts of the register allocator --- --- (c) The University of Glasgow 1996-2004 --- ------------------------------------------------------------------------------ -module PPC.RegInfo ( - JumpDest( DestBlockId ), getJumpDestBlockId, - canShortcut, - shortcutJump, - - shortcutStatics -) - -where - -#include "HsVersions.h" - -import GhcPrelude - -import PPC.Instr - -import GHC.Cmm.BlockId -import GHC.Cmm -import GHC.Cmm.CLabel - -import Unique -import Outputable (ppr, text, Outputable, (<>)) - -data JumpDest = DestBlockId BlockId - --- Debug Instance -instance Outputable JumpDest where - ppr (DestBlockId bid) = text "jd<blk>:" <> ppr bid - -getJumpDestBlockId :: JumpDest -> Maybe BlockId -getJumpDestBlockId (DestBlockId bid) = Just bid - -canShortcut :: Instr -> Maybe JumpDest -canShortcut _ = Nothing - -shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr -shortcutJump _ other = other - - --- Here because it knows about JumpDest -shortcutStatics :: (BlockId -> Maybe JumpDest) -> RawCmmStatics -> RawCmmStatics -shortcutStatics fn (RawCmmStatics lbl statics) - = RawCmmStatics lbl $ map (shortcutStatic fn) statics - -- we need to get the jump tables, so apply the mapping to the entries - -- of a CmmData too. - -shortcutLabel :: (BlockId -> Maybe JumpDest) -> CLabel -> CLabel -shortcutLabel fn lab - | Just blkId <- maybeLocalBlockLabel lab = shortBlockId fn blkId - | otherwise = lab - -shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic -shortcutStatic fn (CmmStaticLit (CmmLabel lab)) - = CmmStaticLit (CmmLabel (shortcutLabel fn lab)) -shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off w)) - = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off w) - -- slightly dodgy, we're ignoring the second label, but this - -- works with the way we use CmmLabelDiffOff for jump tables now. -shortcutStatic _ other_static - = other_static - -shortBlockId - :: (BlockId -> Maybe JumpDest) - -> BlockId - -> CLabel - -shortBlockId fn blockid = - case fn blockid of - Nothing -> mkLocalBlockLabel uq - Just (DestBlockId blockid') -> shortBlockId fn blockid' - where uq = getUnique blockid diff --git a/compiler/nativeGen/PPC/Regs.hs b/compiler/nativeGen/PPC/Regs.hs deleted file mode 100644 index ff3ec639be..0000000000 --- a/compiler/nativeGen/PPC/Regs.hs +++ /dev/null @@ -1,333 +0,0 @@ -{-# LANGUAGE CPP #-} - --- ----------------------------------------------------------------------------- --- --- (c) The University of Glasgow 1994-2004 --- --- ----------------------------------------------------------------------------- - -module PPC.Regs ( - -- squeeze functions - virtualRegSqueeze, - realRegSqueeze, - - mkVirtualReg, - regDotColor, - - -- immediates - Imm(..), - strImmLit, - litToImm, - - -- addressing modes - AddrMode(..), - addrOffset, - - -- registers - spRel, - argRegs, - allArgRegs, - callClobberedRegs, - allMachRegNos, - classOfRealReg, - showReg, - - -- machine specific - allFPArgRegs, - fits16Bits, - makeImmediate, - fReg, - r0, sp, toc, r3, r4, r11, r12, r30, - tmpReg, - f1, - - allocatableRegs - -) - -where - -#include "HsVersions.h" - -import GhcPrelude - -import Reg -import RegClass -import Format - -import GHC.Cmm -import GHC.Cmm.CLabel ( CLabel ) -import Unique - -import GHC.Platform.Regs -import GHC.Driver.Session -import Outputable -import GHC.Platform - -import Data.Word ( Word8, Word16, Word32, Word64 ) -import Data.Int ( Int8, Int16, Int32, Int64 ) - - --- squeese functions for the graph allocator ----------------------------------- - --- | regSqueeze_class reg --- Calculate the maximum number of register colors that could be --- denied to a node of this class due to having this reg --- as a neighbour. --- -{-# INLINE virtualRegSqueeze #-} -virtualRegSqueeze :: RegClass -> VirtualReg -> Int -virtualRegSqueeze cls vr - = case cls of - RcInteger - -> case vr of - VirtualRegI{} -> 1 - VirtualRegHi{} -> 1 - _other -> 0 - - RcDouble - -> case vr of - VirtualRegD{} -> 1 - VirtualRegF{} -> 0 - _other -> 0 - - _other -> 0 - -{-# INLINE realRegSqueeze #-} -realRegSqueeze :: RegClass -> RealReg -> Int -realRegSqueeze cls rr - = case cls of - RcInteger - -> case rr of - RealRegSingle regNo - | regNo < 32 -> 1 -- first fp reg is 32 - | otherwise -> 0 - - RealRegPair{} -> 0 - - RcDouble - -> case rr of - RealRegSingle regNo - | regNo < 32 -> 0 - | otherwise -> 1 - - RealRegPair{} -> 0 - - _other -> 0 - -mkVirtualReg :: Unique -> Format -> VirtualReg -mkVirtualReg u format - | not (isFloatFormat format) = VirtualRegI u - | otherwise - = case format of - FF32 -> VirtualRegD u - FF64 -> VirtualRegD u - _ -> panic "mkVirtualReg" - -regDotColor :: RealReg -> SDoc -regDotColor reg - = case classOfRealReg reg of - RcInteger -> text "blue" - RcFloat -> text "red" - RcDouble -> text "green" - - - --- immediates ------------------------------------------------------------------ -data Imm - = ImmInt Int - | ImmInteger Integer -- Sigh. - | ImmCLbl CLabel -- AbstractC Label (with baggage) - | ImmLit SDoc -- Simple string - | ImmIndex CLabel Int - | ImmFloat Rational - | ImmDouble Rational - | ImmConstantSum Imm Imm - | ImmConstantDiff Imm Imm - | LO Imm - | HI Imm - | HA Imm {- high halfword adjusted -} - | HIGHERA Imm - | HIGHESTA Imm - - -strImmLit :: String -> Imm -strImmLit s = ImmLit (text s) - - -litToImm :: CmmLit -> Imm -litToImm (CmmInt i w) = ImmInteger (narrowS w i) - -- narrow to the width: a CmmInt might be out of - -- range, but we assume that ImmInteger only contains - -- in-range values. A signed value should be fine here. -litToImm (CmmFloat f W32) = ImmFloat f -litToImm (CmmFloat f W64) = ImmDouble f -litToImm (CmmLabel l) = ImmCLbl l -litToImm (CmmLabelOff l off) = ImmIndex l off -litToImm (CmmLabelDiffOff l1 l2 off _) - = ImmConstantSum - (ImmConstantDiff (ImmCLbl l1) (ImmCLbl l2)) - (ImmInt off) -litToImm _ = panic "PPC.Regs.litToImm: no match" - - --- addressing modes ------------------------------------------------------------ - -data AddrMode - = AddrRegReg Reg Reg - | AddrRegImm Reg Imm - - -addrOffset :: AddrMode -> Int -> Maybe AddrMode -addrOffset addr off - = case addr of - AddrRegImm r (ImmInt n) - | fits16Bits n2 -> Just (AddrRegImm r (ImmInt n2)) - | otherwise -> Nothing - where n2 = n + off - - AddrRegImm r (ImmInteger n) - | fits16Bits n2 -> Just (AddrRegImm r (ImmInt (fromInteger n2))) - | otherwise -> Nothing - where n2 = n + toInteger off - - _ -> Nothing - - --- registers ------------------------------------------------------------------- --- @spRel@ gives us a stack relative addressing mode for volatile --- temporaries and for excess call arguments. @fpRel@, where --- applicable, is the same but for the frame pointer. - -spRel :: DynFlags - -> Int -- desired stack offset in words, positive or negative - -> AddrMode - -spRel dflags n = AddrRegImm sp (ImmInt (n * wORD_SIZE dflags)) - - --- argRegs is the set of regs which are read for an n-argument call to C. --- For archs which pass all args on the stack (x86), is empty. --- Sparc passes up to the first 6 args in regs. -argRegs :: RegNo -> [Reg] -argRegs 0 = [] -argRegs 1 = map regSingle [3] -argRegs 2 = map regSingle [3,4] -argRegs 3 = map regSingle [3..5] -argRegs 4 = map regSingle [3..6] -argRegs 5 = map regSingle [3..7] -argRegs 6 = map regSingle [3..8] -argRegs 7 = map regSingle [3..9] -argRegs 8 = map regSingle [3..10] -argRegs _ = panic "MachRegs.argRegs(powerpc): don't know about >8 arguments!" - - -allArgRegs :: [Reg] -allArgRegs = map regSingle [3..10] - - --- these are the regs which we cannot assume stay alive over a C call. -callClobberedRegs :: Platform -> [Reg] -callClobberedRegs _platform - = map regSingle (0:[2..12] ++ map fReg [0..13]) - - -allMachRegNos :: [RegNo] -allMachRegNos = [0..63] - - -{-# INLINE classOfRealReg #-} -classOfRealReg :: RealReg -> RegClass -classOfRealReg (RealRegSingle i) - | i < 32 = RcInteger - | otherwise = RcDouble - -classOfRealReg (RealRegPair{}) - = panic "regClass(ppr): no reg pairs on this architecture" - -showReg :: RegNo -> String -showReg n - | n >= 0 && n <= 31 = "%r" ++ show n - | n >= 32 && n <= 63 = "%f" ++ show (n - 32) - | otherwise = "%unknown_powerpc_real_reg_" ++ show n - - - --- machine specific ------------------------------------------------------------ - -allFPArgRegs :: Platform -> [Reg] -allFPArgRegs platform - = case platformOS platform of - OSAIX -> map (regSingle . fReg) [1..13] - _ -> case platformArch platform of - ArchPPC -> map (regSingle . fReg) [1..8] - ArchPPC_64 _ -> map (regSingle . fReg) [1..13] - _ -> panic "PPC.Regs.allFPArgRegs: unknown PPC Linux" - -fits16Bits :: Integral a => a -> Bool -fits16Bits x = x >= -32768 && x < 32768 - -makeImmediate :: Integral a => Width -> Bool -> a -> Maybe Imm -makeImmediate rep signed x = fmap ImmInt (toI16 rep signed) - where - narrow W64 False = fromIntegral (fromIntegral x :: Word64) - narrow W32 False = fromIntegral (fromIntegral x :: Word32) - narrow W16 False = fromIntegral (fromIntegral x :: Word16) - narrow W8 False = fromIntegral (fromIntegral x :: Word8) - narrow W64 True = fromIntegral (fromIntegral x :: Int64) - narrow W32 True = fromIntegral (fromIntegral x :: Int32) - narrow W16 True = fromIntegral (fromIntegral x :: Int16) - narrow W8 True = fromIntegral (fromIntegral x :: Int8) - narrow _ _ = panic "PPC.Regs.narrow: no match" - - narrowed = narrow rep signed - - toI16 W32 True - | narrowed >= -32768 && narrowed < 32768 = Just narrowed - | otherwise = Nothing - toI16 W32 False - | narrowed >= 0 && narrowed < 65536 = Just narrowed - | otherwise = Nothing - toI16 W64 True - | narrowed >= -32768 && narrowed < 32768 = Just narrowed - | otherwise = Nothing - toI16 W64 False - | narrowed >= 0 && narrowed < 65536 = Just narrowed - | otherwise = Nothing - toI16 _ _ = Just narrowed - - -{- -The PowerPC has 64 registers of interest; 32 integer registers and 32 floating -point registers. --} - -fReg :: Int -> RegNo -fReg x = (32 + x) - -r0, sp, toc, r3, r4, r11, r12, r30, f1 :: Reg -r0 = regSingle 0 -sp = regSingle 1 -toc = regSingle 2 -r3 = regSingle 3 -r4 = regSingle 4 -r11 = regSingle 11 -r12 = regSingle 12 -r30 = regSingle 30 -f1 = regSingle $ fReg 1 - --- allocatableRegs is allMachRegNos with the fixed-use regs removed. --- i.e., these are the regs for which we are prepared to allow the --- register allocator to attempt to map VRegs to. -allocatableRegs :: Platform -> [RealReg] -allocatableRegs platform - = let isFree i = freeReg platform i - in map RealRegSingle $ filter isFree allMachRegNos - --- temporary register for compiler use -tmpReg :: Platform -> Reg -tmpReg platform = - case platformArch platform of - ArchPPC -> regSingle 13 - ArchPPC_64 _ -> regSingle 30 - _ -> panic "PPC.Regs.tmpReg: unknown arch" diff --git a/compiler/nativeGen/PprBase.hs b/compiler/nativeGen/PprBase.hs deleted file mode 100644 index dc6e65bd70..0000000000 --- a/compiler/nativeGen/PprBase.hs +++ /dev/null @@ -1,275 +0,0 @@ -{-# LANGUAGE MagicHash #-} - ------------------------------------------------------------------------------ --- --- Pretty-printing assembly language --- --- (c) The University of Glasgow 1993-2005 --- ------------------------------------------------------------------------------ - -module PprBase ( - castFloatToWord8Array, - castDoubleToWord8Array, - floatToBytes, - doubleToBytes, - pprASCII, - pprBytes, - pprSectionHeader -) - -where - -import GhcPrelude - -import AsmUtils -import GHC.Cmm.CLabel -import GHC.Cmm -import GHC.Driver.Session -import FastString -import Outputable -import GHC.Platform -import FileCleanup - -import qualified Data.Array.Unsafe as U ( castSTUArray ) -import Data.Array.ST - -import Control.Monad.ST - -import Data.Word -import Data.Bits -import Data.ByteString (ByteString) -import qualified Data.ByteString as BS -import GHC.Exts -import GHC.Word -import System.IO.Unsafe - - - --- ----------------------------------------------------------------------------- --- Converting floating-point literals to integrals for printing - -castFloatToWord8Array :: STUArray s Int Float -> ST s (STUArray s Int Word8) -castFloatToWord8Array = U.castSTUArray - -castDoubleToWord8Array :: STUArray s Int Double -> ST s (STUArray s Int Word8) -castDoubleToWord8Array = U.castSTUArray - --- floatToBytes and doubleToBytes convert to the host's byte --- order. Providing that we're not cross-compiling for a --- target with the opposite endianness, this should work ok --- on all targets. - --- ToDo: this stuff is very similar to the shenanigans in PprAbs, --- could they be merged? - -floatToBytes :: Float -> [Int] -floatToBytes f - = runST (do - arr <- newArray_ ((0::Int),3) - writeArray arr 0 f - arr <- castFloatToWord8Array arr - i0 <- readArray arr 0 - i1 <- readArray arr 1 - i2 <- readArray arr 2 - i3 <- readArray arr 3 - return (map fromIntegral [i0,i1,i2,i3]) - ) - -doubleToBytes :: Double -> [Int] -doubleToBytes d - = runST (do - arr <- newArray_ ((0::Int),7) - writeArray arr 0 d - arr <- castDoubleToWord8Array arr - i0 <- readArray arr 0 - i1 <- readArray arr 1 - i2 <- readArray arr 2 - i3 <- readArray arr 3 - i4 <- readArray arr 4 - i5 <- readArray arr 5 - i6 <- readArray arr 6 - i7 <- readArray arr 7 - return (map fromIntegral [i0,i1,i2,i3,i4,i5,i6,i7]) - ) - --- --------------------------------------------------------------------------- --- Printing ASCII strings. --- --- Print as a string and escape non-printable characters. --- This is similar to charToC in Utils. - -pprASCII :: ByteString -> SDoc -pprASCII str - -- Transform this given literal bytestring to escaped string and construct - -- the literal SDoc directly. - -- See #14741 - -- and Note [Pretty print ASCII when AsmCodeGen] - = text $ BS.foldr (\w s -> do1 w ++ s) "" str - where - do1 :: Word8 -> String - do1 w | 0x09 == w = "\\t" - | 0x0A == w = "\\n" - | 0x22 == w = "\\\"" - | 0x5C == w = "\\\\" - -- ASCII printable characters range - | w >= 0x20 && w <= 0x7E = [chr' w] - | otherwise = '\\' : octal w - - -- we know that the Chars we create are in the ASCII range - -- so we bypass the check in "chr" - chr' :: Word8 -> Char - chr' (W8# w#) = C# (chr# (word2Int# w#)) - - octal :: Word8 -> String - octal w = [ chr' (ord0 + (w `unsafeShiftR` 6) .&. 0x07) - , chr' (ord0 + (w `unsafeShiftR` 3) .&. 0x07) - , chr' (ord0 + w .&. 0x07) - ] - ord0 = 0x30 -- = ord '0' - --- | Pretty print binary data. --- --- Use either the ".string" directive or a ".incbin" directive. --- See Note [Embedding large binary blobs] --- --- A NULL byte is added after the binary data. --- -pprBytes :: ByteString -> SDoc -pprBytes bs = sdocWithDynFlags $ \dflags -> - if binBlobThreshold dflags == 0 - || fromIntegral (BS.length bs) <= binBlobThreshold dflags - then text "\t.string " <> doubleQuotes (pprASCII bs) - else unsafePerformIO $ do - bFile <- newTempName dflags TFL_CurrentModule ".dat" - BS.writeFile bFile bs - return $ text "\t.incbin " - <> pprFilePathString bFile -- proper escape (see #16389) - <> text "\n\t.byte 0" - -{- -Note [Embedding large binary blobs] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -To embed a blob of binary data (e.g. an UTF-8 encoded string) into the generated -code object, we have several options: - - 1. Generate a ".byte" directive for each byte. This is what was done in the past - (see Note [Pretty print ASCII when AsmCodeGen]). - - 2. Generate a single ".string"/".asciz" directive for the whole sequence of - bytes. Bytes in the ASCII printable range are rendered as characters and - other values are escaped (e.g., "\t", "\077", etc.). - - 3. Create a temporary file into which we dump the binary data and generate a - single ".incbin" directive. The assembler will include the binary file for - us in the generated output object. - -Now the code generator uses either (2) or (3), depending on the binary blob -size. Using (3) for small blobs adds too much overhead (see benchmark results -in #16190), so we only do it when the size is above a threshold (500K at the -time of writing). - -The threshold is configurable via the `-fbinary-blob-threshold` flag. - --} - - -{- -Note [Pretty print ASCII when AsmCodeGen] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Previously, when generating assembly code, we created SDoc with -`(ptext . sLit)` for every bytes in literal bytestring, then -combine them using `hcat`. - -When handling literal bytestrings with millions of bytes, -millions of SDoc would be created and to combine, leading to -high memory usage. - -Now we escape the given bytestring to string directly and construct -SDoc only once. This improvement could dramatically decrease the -memory allocation from 4.7GB to 1.3GB when embedding a 3MB literal -string in source code. See #14741 for profiling results. --} - --- ---------------------------------------------------------------------------- --- Printing section headers. --- --- If -split-section was specified, include the suffix label, otherwise just --- print the section type. For Darwin, where subsections-for-symbols are --- used instead, only print section type. --- --- For string literals, additional flags are specified to enable merging of --- identical strings in the linker. With -split-sections each string also gets --- a unique section to allow strings from unused code to be GC'd. - -pprSectionHeader :: Platform -> Section -> SDoc -pprSectionHeader platform (Section t suffix) = - case platformOS platform of - OSAIX -> pprXcoffSectionHeader t - OSDarwin -> pprDarwinSectionHeader t - OSMinGW32 -> pprGNUSectionHeader (char '$') t suffix - _ -> pprGNUSectionHeader (char '.') t suffix - -pprGNUSectionHeader :: SDoc -> SectionType -> CLabel -> SDoc -pprGNUSectionHeader sep t suffix = sdocWithDynFlags $ \dflags -> - let splitSections = gopt Opt_SplitSections dflags - subsection | splitSections = sep <> ppr suffix - | otherwise = empty - in text ".section " <> ptext (header dflags) <> subsection <> - flags dflags - where - header dflags = case t of - Text -> sLit ".text" - Data -> sLit ".data" - ReadOnlyData | OSMinGW32 <- platformOS (targetPlatform dflags) - -> sLit ".rdata" - | otherwise -> sLit ".rodata" - RelocatableReadOnlyData | OSMinGW32 <- platformOS (targetPlatform dflags) - -- Concept does not exist on Windows, - -- So map these to R/O data. - -> sLit ".rdata$rel.ro" - | otherwise -> sLit ".data.rel.ro" - UninitialisedData -> sLit ".bss" - ReadOnlyData16 | OSMinGW32 <- platformOS (targetPlatform dflags) - -> sLit ".rdata$cst16" - | otherwise -> sLit ".rodata.cst16" - CString - | OSMinGW32 <- platformOS (targetPlatform dflags) - -> sLit ".rdata" - | otherwise -> sLit ".rodata.str" - OtherSection _ -> - panic "PprBase.pprGNUSectionHeader: unknown section type" - flags dflags = case t of - CString - | OSMinGW32 <- platformOS (targetPlatform dflags) - -> empty - | otherwise -> text ",\"aMS\"," <> sectionType "progbits" <> text ",1" - _ -> empty - --- XCOFF doesn't support relocating label-differences, so we place all --- RO sections into .text[PR] sections -pprXcoffSectionHeader :: SectionType -> SDoc -pprXcoffSectionHeader t = text $ case t of - Text -> ".csect .text[PR]" - Data -> ".csect .data[RW]" - ReadOnlyData -> ".csect .text[PR] # ReadOnlyData" - RelocatableReadOnlyData -> ".csect .text[PR] # RelocatableReadOnlyData" - ReadOnlyData16 -> ".csect .text[PR] # ReadOnlyData16" - CString -> ".csect .text[PR] # CString" - UninitialisedData -> ".csect .data[BS]" - OtherSection _ -> - panic "PprBase.pprXcoffSectionHeader: unknown section type" - -pprDarwinSectionHeader :: SectionType -> SDoc -pprDarwinSectionHeader t = - ptext $ case t of - Text -> sLit ".text" - Data -> sLit ".data" - ReadOnlyData -> sLit ".const" - RelocatableReadOnlyData -> sLit ".const_data" - UninitialisedData -> sLit ".data" - ReadOnlyData16 -> sLit ".const" - CString -> sLit ".section\t__TEXT,__cstring,cstring_literals" - OtherSection _ -> - panic "PprBase.pprDarwinSectionHeader: unknown section type" diff --git a/compiler/nativeGen/Reg.hs b/compiler/nativeGen/Reg.hs deleted file mode 100644 index 3808434d62..0000000000 --- a/compiler/nativeGen/Reg.hs +++ /dev/null @@ -1,241 +0,0 @@ --- | An architecture independent description of a register. --- This needs to stay architecture independent because it is used --- by NCGMonad and the register allocators, which are shared --- by all architectures. --- -module Reg ( - RegNo, - Reg(..), - regPair, - regSingle, - isRealReg, takeRealReg, - isVirtualReg, takeVirtualReg, - - VirtualReg(..), - renameVirtualReg, - classOfVirtualReg, - getHiVirtualRegFromLo, - getHiVRegFromLo, - - RealReg(..), - regNosOfRealReg, - realRegsAlias, - - liftPatchFnToRegReg -) - -where - -import GhcPrelude - -import Outputable -import Unique -import RegClass -import Data.List (intersect) - --- | An identifier for a primitive real machine register. -type RegNo - = Int - --- VirtualRegs are virtual registers. The register allocator will --- eventually have to map them into RealRegs, or into spill slots. --- --- VirtualRegs are allocated on the fly, usually to represent a single --- value in the abstract assembly code (i.e. dynamic registers are --- usually single assignment). --- --- The single assignment restriction isn't necessary to get correct code, --- although a better register allocation will result if single --- assignment is used -- because the allocator maps a VirtualReg into --- a single RealReg, even if the VirtualReg has multiple live ranges. --- --- Virtual regs can be of either class, so that info is attached. --- -data VirtualReg - = VirtualRegI {-# UNPACK #-} !Unique - | VirtualRegHi {-# UNPACK #-} !Unique -- High part of 2-word register - | VirtualRegF {-# UNPACK #-} !Unique - | VirtualRegD {-# UNPACK #-} !Unique - - deriving (Eq, Show) - --- This is laborious, but necessary. We can't derive Ord because --- Unique doesn't have an Ord instance. Note nonDetCmpUnique in the --- implementation. See Note [No Ord for Unique] --- This is non-deterministic but we do not currently support deterministic --- code-generation. See Note [Unique Determinism and code generation] -instance Ord VirtualReg where - compare (VirtualRegI a) (VirtualRegI b) = nonDetCmpUnique a b - compare (VirtualRegHi a) (VirtualRegHi b) = nonDetCmpUnique a b - compare (VirtualRegF a) (VirtualRegF b) = nonDetCmpUnique a b - compare (VirtualRegD a) (VirtualRegD b) = nonDetCmpUnique a b - - compare VirtualRegI{} _ = LT - compare _ VirtualRegI{} = GT - compare VirtualRegHi{} _ = LT - compare _ VirtualRegHi{} = GT - compare VirtualRegF{} _ = LT - compare _ VirtualRegF{} = GT - - - -instance Uniquable VirtualReg where - getUnique reg - = case reg of - VirtualRegI u -> u - VirtualRegHi u -> u - VirtualRegF u -> u - VirtualRegD u -> u - -instance Outputable VirtualReg where - ppr reg - = case reg of - VirtualRegI u -> text "%vI_" <> pprUniqueAlways u - VirtualRegHi u -> text "%vHi_" <> pprUniqueAlways u - -- this code is kinda wrong on x86 - -- because float and double occupy the same register set - -- namely SSE2 register xmm0 .. xmm15 - VirtualRegF u -> text "%vFloat_" <> pprUniqueAlways u - VirtualRegD u -> text "%vDouble_" <> pprUniqueAlways u - - - -renameVirtualReg :: Unique -> VirtualReg -> VirtualReg -renameVirtualReg u r - = case r of - VirtualRegI _ -> VirtualRegI u - VirtualRegHi _ -> VirtualRegHi u - VirtualRegF _ -> VirtualRegF u - VirtualRegD _ -> VirtualRegD u - - -classOfVirtualReg :: VirtualReg -> RegClass -classOfVirtualReg vr - = case vr of - VirtualRegI{} -> RcInteger - VirtualRegHi{} -> RcInteger - VirtualRegF{} -> RcFloat - VirtualRegD{} -> RcDouble - - - --- Determine the upper-half vreg for a 64-bit quantity on a 32-bit platform --- when supplied with the vreg for the lower-half of the quantity. --- (NB. Not reversible). -getHiVirtualRegFromLo :: VirtualReg -> VirtualReg -getHiVirtualRegFromLo reg - = case reg of - -- makes a pseudo-unique with tag 'H' - VirtualRegI u -> VirtualRegHi (newTagUnique u 'H') - _ -> panic "Reg.getHiVirtualRegFromLo" - -getHiVRegFromLo :: Reg -> Reg -getHiVRegFromLo reg - = case reg of - RegVirtual vr -> RegVirtual (getHiVirtualRegFromLo vr) - RegReal _ -> panic "Reg.getHiVRegFromLo" - - ------------------------------------------------------------------------------------- --- | RealRegs are machine regs which are available for allocation, in --- the usual way. We know what class they are, because that's part of --- the processor's architecture. --- --- RealRegPairs are pairs of real registers that are allocated together --- to hold a larger value, such as with Double regs on SPARC. --- -data RealReg - = RealRegSingle {-# UNPACK #-} !RegNo - | RealRegPair {-# UNPACK #-} !RegNo {-# UNPACK #-} !RegNo - deriving (Eq, Show, Ord) - -instance Uniquable RealReg where - getUnique reg - = case reg of - RealRegSingle i -> mkRegSingleUnique i - RealRegPair r1 r2 -> mkRegPairUnique (r1 * 65536 + r2) - -instance Outputable RealReg where - ppr reg - = case reg of - RealRegSingle i -> text "%r" <> int i - RealRegPair r1 r2 -> text "%r(" <> int r1 - <> vbar <> int r2 <> text ")" - -regNosOfRealReg :: RealReg -> [RegNo] -regNosOfRealReg rr - = case rr of - RealRegSingle r1 -> [r1] - RealRegPair r1 r2 -> [r1, r2] - - -realRegsAlias :: RealReg -> RealReg -> Bool -realRegsAlias rr1 rr2 - = not $ null $ intersect (regNosOfRealReg rr1) (regNosOfRealReg rr2) - --------------------------------------------------------------------------------- --- | A register, either virtual or real -data Reg - = RegVirtual !VirtualReg - | RegReal !RealReg - deriving (Eq, Ord) - -regSingle :: RegNo -> Reg -regSingle regNo = RegReal $ RealRegSingle regNo - -regPair :: RegNo -> RegNo -> Reg -regPair regNo1 regNo2 = RegReal $ RealRegPair regNo1 regNo2 - - --- We like to have Uniques for Reg so that we can make UniqFM and UniqSets --- in the register allocator. -instance Uniquable Reg where - getUnique reg - = case reg of - RegVirtual vr -> getUnique vr - RegReal rr -> getUnique rr - --- | Print a reg in a generic manner --- If you want the architecture specific names, then use the pprReg --- function from the appropriate Ppr module. -instance Outputable Reg where - ppr reg - = case reg of - RegVirtual vr -> ppr vr - RegReal rr -> ppr rr - - -isRealReg :: Reg -> Bool -isRealReg reg - = case reg of - RegReal _ -> True - RegVirtual _ -> False - -takeRealReg :: Reg -> Maybe RealReg -takeRealReg reg - = case reg of - RegReal rr -> Just rr - _ -> Nothing - - -isVirtualReg :: Reg -> Bool -isVirtualReg reg - = case reg of - RegReal _ -> False - RegVirtual _ -> True - -takeVirtualReg :: Reg -> Maybe VirtualReg -takeVirtualReg reg - = case reg of - RegReal _ -> Nothing - RegVirtual vr -> Just vr - - --- | The patch function supplied by the allocator maps VirtualReg to RealReg --- regs, but sometimes we want to apply it to plain old Reg. --- -liftPatchFnToRegReg :: (VirtualReg -> RealReg) -> (Reg -> Reg) -liftPatchFnToRegReg patchF reg - = case reg of - RegVirtual vr -> RegReal (patchF vr) - RegReal _ -> reg diff --git a/compiler/nativeGen/RegAlloc/Graph/ArchBase.hs b/compiler/nativeGen/RegAlloc/Graph/ArchBase.hs deleted file mode 100644 index c38d998779..0000000000 --- a/compiler/nativeGen/RegAlloc/Graph/ArchBase.hs +++ /dev/null @@ -1,163 +0,0 @@ - --- | Utils for calculating general worst, bound, squeese and free, functions. --- --- as per: "A Generalized Algorithm for Graph-Coloring Register Allocation" --- Michael Smith, Normal Ramsey, Glenn Holloway. --- PLDI 2004 --- --- These general versions are not used in GHC proper because they are too slow. --- Instead, hand written optimised versions are provided for each architecture --- in MachRegs*.hs --- --- This code is here because we can test the architecture specific code against --- it. --- -module RegAlloc.Graph.ArchBase ( - RegClass(..), - Reg(..), - RegSub(..), - - worst, - bound, - squeese -) where - -import GhcPrelude - -import UniqSet -import UniqFM -import Unique -import MonadUtils (concatMapM) - - --- Some basic register classes. --- These aren't necessarily in 1-to-1 correspondence with the allocatable --- RegClasses in MachRegs.hs -data RegClass - -- general purpose regs - = ClassG32 -- 32 bit GPRs - | ClassG16 -- 16 bit GPRs - | ClassG8 -- 8 bit GPRs - - -- floating point regs - | ClassF64 -- 64 bit FPRs - deriving (Show, Eq, Enum) - - --- | A register of some class -data Reg - -- a register of some class - = Reg RegClass Int - - -- a sub-component of one of the other regs - | RegSub RegSub Reg - deriving (Show, Eq) - - --- | so we can put regs in UniqSets -instance Uniquable Reg where - getUnique (Reg c i) - = mkRegSingleUnique - $ fromEnum c * 1000 + i - - getUnique (RegSub s (Reg c i)) - = mkRegSubUnique - $ fromEnum s * 10000 + fromEnum c * 1000 + i - - getUnique (RegSub _ (RegSub _ _)) - = error "RegArchBase.getUnique: can't have a sub-reg of a sub-reg." - - --- | A subcomponent of another register -data RegSub - = SubL16 -- lowest 16 bits - | SubL8 -- lowest 8 bits - | SubL8H -- second lowest 8 bits - deriving (Show, Enum, Ord, Eq) - - --- | Worst case displacement --- --- a node N of classN has some number of neighbors, --- all of which are from classC. --- --- (worst neighbors classN classC) is the maximum number of potential --- colors for N that can be lost by coloring its neighbors. --- --- This should be hand coded/cached for each particular architecture, --- because the compute time is very long.. -worst :: (RegClass -> UniqSet Reg) - -> (Reg -> UniqSet Reg) - -> Int -> RegClass -> RegClass -> Int - -worst regsOfClass regAlias neighbors classN classC - = let regAliasS regs = unionManyUniqSets - $ map regAlias - $ nonDetEltsUniqSet regs - -- This is non-deterministic but we do not - -- currently support deterministic code-generation. - -- See Note [Unique Determinism and code generation] - - -- all the regs in classes N, C - regsN = regsOfClass classN - regsC = regsOfClass classC - - -- all the possible subsets of c which have size < m - regsS = filter (\s -> sizeUniqSet s >= 1 - && sizeUniqSet s <= neighbors) - $ powersetLS regsC - - -- for each of the subsets of C, the regs which conflict - -- with posiblities for N - regsS_conflict - = map (\s -> intersectUniqSets regsN (regAliasS s)) regsS - - in maximum $ map sizeUniqSet $ regsS_conflict - - --- | For a node N of classN and neighbors of classesC --- (bound classN classesC) is the maximum number of potential --- colors for N that can be lost by coloring its neighbors. -bound :: (RegClass -> UniqSet Reg) - -> (Reg -> UniqSet Reg) - -> RegClass -> [RegClass] -> Int - -bound regsOfClass regAlias classN classesC - = let regAliasS regs = unionManyUniqSets - $ map regAlias - $ nonDetEltsUFM regs - -- See Note [Unique Determinism and code generation] - - regsC_aliases - = unionManyUniqSets - $ map (regAliasS . getUniqSet . regsOfClass) classesC - - overlap = intersectUniqSets (regsOfClass classN) regsC_aliases - - in sizeUniqSet overlap - - --- | The total squeese on a particular node with a list of neighbors. --- --- A version of this should be constructed for each particular architecture, --- possibly including uses of bound, so that alised registers don't get --- counted twice, as per the paper. -squeese :: (RegClass -> UniqSet Reg) - -> (Reg -> UniqSet Reg) - -> RegClass -> [(Int, RegClass)] -> Int - -squeese regsOfClass regAlias classN countCs - = sum - $ map (\(i, classC) -> worst regsOfClass regAlias i classN classC) - $ countCs - - --- | powerset (for lists) -powersetL :: [a] -> [[a]] -powersetL = concatMapM (\x -> [[],[x]]) - - --- | powersetLS (list of sets) -powersetLS :: Uniquable a => UniqSet a -> [UniqSet a] -powersetLS s = map mkUniqSet $ powersetL $ nonDetEltsUniqSet s - -- See Note [Unique Determinism and code generation] diff --git a/compiler/nativeGen/RegAlloc/Graph/ArchX86.hs b/compiler/nativeGen/RegAlloc/Graph/ArchX86.hs deleted file mode 100644 index 0472e4cf09..0000000000 --- a/compiler/nativeGen/RegAlloc/Graph/ArchX86.hs +++ /dev/null @@ -1,161 +0,0 @@ - --- | A description of the register set of the X86. --- --- This isn't used directly in GHC proper. --- --- See RegArchBase.hs for the reference. --- See MachRegs.hs for the actual trivColorable function used in GHC. --- -module RegAlloc.Graph.ArchX86 ( - classOfReg, - regsOfClass, - regName, - regAlias, - worst, - squeese, -) where - -import GhcPrelude - -import RegAlloc.Graph.ArchBase (Reg(..), RegSub(..), RegClass(..)) -import UniqSet - -import qualified Data.Array as A - - --- | Determine the class of a register -classOfReg :: Reg -> RegClass -classOfReg reg - = case reg of - Reg c _ -> c - - RegSub SubL16 _ -> ClassG16 - RegSub SubL8 _ -> ClassG8 - RegSub SubL8H _ -> ClassG8 - - --- | Determine all the regs that make up a certain class. -regsOfClass :: RegClass -> UniqSet Reg -regsOfClass c - = case c of - ClassG32 - -> mkUniqSet [ Reg ClassG32 i - | i <- [0..7] ] - - ClassG16 - -> mkUniqSet [ RegSub SubL16 (Reg ClassG32 i) - | i <- [0..7] ] - - ClassG8 - -> unionUniqSets - (mkUniqSet [ RegSub SubL8 (Reg ClassG32 i) | i <- [0..3] ]) - (mkUniqSet [ RegSub SubL8H (Reg ClassG32 i) | i <- [0..3] ]) - - ClassF64 - -> mkUniqSet [ Reg ClassF64 i - | i <- [0..5] ] - - --- | Determine the common name of a reg --- returns Nothing if this reg is not part of the machine. -regName :: Reg -> Maybe String -regName reg - = case reg of - Reg ClassG32 i - | i <= 7 -> - let names = A.listArray (0,8) - [ "eax", "ebx", "ecx", "edx" - , "ebp", "esi", "edi", "esp" ] - in Just $ names A.! i - - RegSub SubL16 (Reg ClassG32 i) - | i <= 7 -> - let names = A.listArray (0,8) - [ "ax", "bx", "cx", "dx" - , "bp", "si", "di", "sp"] - in Just $ names A.! i - - RegSub SubL8 (Reg ClassG32 i) - | i <= 3 -> - let names = A.listArray (0,4) [ "al", "bl", "cl", "dl"] - in Just $ names A.! i - - RegSub SubL8H (Reg ClassG32 i) - | i <= 3 -> - let names = A.listArray (0,4) [ "ah", "bh", "ch", "dh"] - in Just $ names A.! i - - _ -> Nothing - - --- | Which regs alias what other regs. -regAlias :: Reg -> UniqSet Reg -regAlias reg - = case reg of - - -- 32 bit regs alias all of the subregs - Reg ClassG32 i - - -- for eax, ebx, ecx, eds - | i <= 3 - -> mkUniqSet - $ [ Reg ClassG32 i, RegSub SubL16 reg - , RegSub SubL8 reg, RegSub SubL8H reg ] - - -- for esi, edi, esp, ebp - | 4 <= i && i <= 7 - -> mkUniqSet - $ [ Reg ClassG32 i, RegSub SubL16 reg ] - - -- 16 bit subregs alias the whole reg - RegSub SubL16 r@(Reg ClassG32 _) - -> regAlias r - - -- 8 bit subregs alias the 32 and 16, but not the other 8 bit subreg - RegSub SubL8 r@(Reg ClassG32 _) - -> mkUniqSet $ [ r, RegSub SubL16 r, RegSub SubL8 r ] - - RegSub SubL8H r@(Reg ClassG32 _) - -> mkUniqSet $ [ r, RegSub SubL16 r, RegSub SubL8H r ] - - -- fp - Reg ClassF64 _ - -> unitUniqSet reg - - _ -> error "regAlias: invalid register" - - --- | Optimised versions of RegColorBase.{worst, squeese} specific to x86 -worst :: Int -> RegClass -> RegClass -> Int -worst n classN classC - = case classN of - ClassG32 - -> case classC of - ClassG32 -> min n 8 - ClassG16 -> min n 8 - ClassG8 -> min n 4 - ClassF64 -> 0 - - ClassG16 - -> case classC of - ClassG32 -> min n 8 - ClassG16 -> min n 8 - ClassG8 -> min n 4 - ClassF64 -> 0 - - ClassG8 - -> case classC of - ClassG32 -> min (n*2) 8 - ClassG16 -> min (n*2) 8 - ClassG8 -> min n 8 - ClassF64 -> 0 - - ClassF64 - -> case classC of - ClassF64 -> min n 6 - _ -> 0 - -squeese :: RegClass -> [(Int, RegClass)] -> Int -squeese classN countCs - = sum (map (\(i, classC) -> worst i classN classC) countCs) - diff --git a/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs b/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs deleted file mode 100644 index f42ff9450a..0000000000 --- a/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs +++ /dev/null @@ -1,99 +0,0 @@ --- | Register coalescing. -module RegAlloc.Graph.Coalesce ( - regCoalesce, - slurpJoinMovs -) where -import GhcPrelude - -import RegAlloc.Liveness -import Instruction -import Reg - -import GHC.Cmm -import Bag -import Digraph -import UniqFM -import UniqSet -import UniqSupply - - --- | Do register coalescing on this top level thing --- --- For Reg -> Reg moves, if the first reg dies at the same time the --- second reg is born then the mov only serves to join live ranges. --- The two regs can be renamed to be the same and the move instruction --- safely erased. -regCoalesce - :: Instruction instr - => [LiveCmmDecl statics instr] - -> UniqSM [LiveCmmDecl statics instr] - -regCoalesce code - = do - let joins = foldl' unionBags emptyBag - $ map slurpJoinMovs code - - let alloc = foldl' buildAlloc emptyUFM - $ bagToList joins - - let patched = map (patchEraseLive (sinkReg alloc)) code - - return patched - - --- | Add a v1 = v2 register renaming to the map. --- The register with the lowest lexical name is set as the --- canonical version. -buildAlloc :: UniqFM Reg -> (Reg, Reg) -> UniqFM Reg -buildAlloc fm (r1, r2) - = let rmin = min r1 r2 - rmax = max r1 r2 - in addToUFM fm rmax rmin - - --- | Determine the canonical name for a register by following --- v1 = v2 renamings in this map. -sinkReg :: UniqFM Reg -> Reg -> Reg -sinkReg fm r - = case lookupUFM fm r of - Nothing -> r - Just r' -> sinkReg fm r' - - --- | Slurp out mov instructions that only serve to join live ranges. --- --- During a mov, if the source reg dies and the destination reg is --- born then we can rename the two regs to the same thing and --- eliminate the move. -slurpJoinMovs - :: Instruction instr - => LiveCmmDecl statics instr - -> Bag (Reg, Reg) - -slurpJoinMovs live - = slurpCmm emptyBag live - where - slurpCmm rs CmmData{} - = rs - - slurpCmm rs (CmmProc _ _ _ sccs) - = foldl' slurpBlock rs (flattenSCCs sccs) - - slurpBlock rs (BasicBlock _ instrs) - = foldl' slurpLI rs instrs - - slurpLI rs (LiveInstr _ Nothing) = rs - slurpLI rs (LiveInstr instr (Just live)) - | Just (r1, r2) <- takeRegRegMoveInstr instr - , elementOfUniqSet r1 $ liveDieRead live - , elementOfUniqSet r2 $ liveBorn live - - -- only coalesce movs between two virtuals for now, - -- else we end up with allocatable regs in the live - -- regs list.. - , isVirtualReg r1 && isVirtualReg r2 - = consBag (r1, r2) rs - - | otherwise - = rs - diff --git a/compiler/nativeGen/RegAlloc/Graph/Main.hs b/compiler/nativeGen/RegAlloc/Graph/Main.hs deleted file mode 100644 index 6b2758f723..0000000000 --- a/compiler/nativeGen/RegAlloc/Graph/Main.hs +++ /dev/null @@ -1,472 +0,0 @@ -{-# LANGUAGE CPP #-} -{-# LANGUAGE ScopedTypeVariables #-} - --- | Graph coloring register allocator. -module RegAlloc.Graph.Main ( - regAlloc -) where -import GhcPrelude - -import qualified GraphColor as Color -import RegAlloc.Liveness -import RegAlloc.Graph.Spill -import RegAlloc.Graph.SpillClean -import RegAlloc.Graph.SpillCost -import RegAlloc.Graph.Stats -import RegAlloc.Graph.TrivColorable -import Instruction -import TargetReg -import RegClass -import Reg - -import Bag -import GHC.Driver.Session -import Outputable -import GHC.Platform -import UniqFM -import UniqSet -import UniqSupply -import Util (seqList) -import CFG - -import Data.Maybe -import Control.Monad - - --- | The maximum number of build\/spill cycles we'll allow. --- --- It should only take 3 or 4 cycles for the allocator to converge. --- If it takes any longer than this it's probably in an infinite loop, --- so it's better just to bail out and report a bug. -maxSpinCount :: Int -maxSpinCount = 10 - - --- | The top level of the graph coloring register allocator. -regAlloc - :: (Outputable statics, Outputable instr, Instruction instr) - => DynFlags - -> UniqFM (UniqSet RealReg) -- ^ registers we can use for allocation - -> UniqSet Int -- ^ set of available spill slots. - -> Int -- ^ current number of spill slots - -> [LiveCmmDecl statics instr] -- ^ code annotated with liveness information. - -> Maybe CFG -- ^ CFG of basic blocks if available - -> UniqSM ( [NatCmmDecl statics instr] - , Maybe Int, [RegAllocStats statics instr] ) - -- ^ code with registers allocated, additional stacks required - -- and stats for each stage of allocation - -regAlloc dflags regsFree slotsFree slotsCount code cfg - = do - -- TODO: the regClass function is currently hard coded to the default - -- target architecture. Would prefer to determine this from dflags. - -- There are other uses of targetRegClass later in this module. - let platform = targetPlatform dflags - triv = trivColorable platform - (targetVirtualRegSqueeze platform) - (targetRealRegSqueeze platform) - - (code_final, debug_codeGraphs, slotsCount', _) - <- regAlloc_spin dflags 0 - triv - regsFree slotsFree slotsCount [] code cfg - - let needStack - | slotsCount == slotsCount' - = Nothing - | otherwise - = Just slotsCount' - - return ( code_final - , needStack - , reverse debug_codeGraphs ) - - --- | Perform solver iterations for the graph coloring allocator. --- --- We extract a register conflict graph from the provided cmm code, --- and try to colour it. If that works then we use the solution rewrite --- the code with real hregs. If coloring doesn't work we add spill code --- and try to colour it again. After `maxSpinCount` iterations we give up. --- -regAlloc_spin - :: forall instr statics. - (Instruction instr, - Outputable instr, - Outputable statics) - => DynFlags - -> Int -- ^ Number of solver iterations we've already performed. - -> Color.Triv VirtualReg RegClass RealReg - -- ^ Function for calculating whether a register is trivially - -- colourable. - -> UniqFM (UniqSet RealReg) -- ^ Free registers that we can allocate. - -> UniqSet Int -- ^ Free stack slots that we can use. - -> Int -- ^ Number of spill slots in use - -> [RegAllocStats statics instr] -- ^ Current regalloc stats to add to. - -> [LiveCmmDecl statics instr] -- ^ Liveness annotated code to allocate. - -> Maybe CFG - -> UniqSM ( [NatCmmDecl statics instr] - , [RegAllocStats statics instr] - , Int -- Slots in use - , Color.Graph VirtualReg RegClass RealReg) - -regAlloc_spin dflags spinCount triv regsFree slotsFree slotsCount debug_codeGraphs code cfg - = do - let platform = targetPlatform dflags - - -- If any of these dump flags are turned on we want to hang on to - -- intermediate structures in the allocator - otherwise tell the - -- allocator to ditch them early so we don't end up creating space leaks. - let dump = or - [ dopt Opt_D_dump_asm_regalloc_stages dflags - , dopt Opt_D_dump_asm_stats dflags - , dopt Opt_D_dump_asm_conflicts dflags ] - - -- Check that we're not running off down the garden path. - when (spinCount > maxSpinCount) - $ pprPanic "regAlloc_spin: max build/spill cycle count exceeded." - ( text "It looks like the register allocator is stuck in an infinite loop." - $$ text "max cycles = " <> int maxSpinCount - $$ text "regsFree = " <> (hcat $ punctuate space $ map ppr - $ nonDetEltsUniqSet $ unionManyUniqSets - $ nonDetEltsUFM regsFree) - -- This is non-deterministic but we do not - -- currently support deterministic code-generation. - -- See Note [Unique Determinism and code generation] - $$ text "slotsFree = " <> ppr (sizeUniqSet slotsFree)) - - -- Build the register conflict graph from the cmm code. - (graph :: Color.Graph VirtualReg RegClass RealReg) - <- {-# SCC "BuildGraph" #-} buildGraph code - - -- VERY IMPORTANT: - -- We really do want the graph to be fully evaluated _before_ we - -- start coloring. If we don't do this now then when the call to - -- Color.colorGraph forces bits of it, the heap will be filled with - -- half evaluated pieces of graph and zillions of apply thunks. - seqGraph graph `seq` return () - - -- Build a map of the cost of spilling each instruction. - -- This is a lazy binding, so the map will only be computed if we - -- actually have to spill to the stack. - let spillCosts = foldl' plusSpillCostInfo zeroSpillCostInfo - $ map (slurpSpillCostInfo platform cfg) code - - -- The function to choose regs to leave uncolored. - let spill = chooseSpill spillCosts - - -- Record startup state in our log. - let stat1 - = if spinCount == 0 - then Just $ RegAllocStatsStart - { raLiveCmm = code - , raGraph = graph - , raSpillCosts = spillCosts } - else Nothing - - -- Try and color the graph. - let (graph_colored, rsSpill, rmCoalesce) - = {-# SCC "ColorGraph" #-} - Color.colorGraph - (gopt Opt_RegsIterative dflags) - spinCount - regsFree triv spill graph - - -- Rewrite registers in the code that have been coalesced. - let patchF reg - | RegVirtual vr <- reg - = case lookupUFM rmCoalesce vr of - Just vr' -> patchF (RegVirtual vr') - Nothing -> reg - - | otherwise - = reg - - let (code_coalesced :: [LiveCmmDecl statics instr]) - = map (patchEraseLive patchF) code - - -- Check whether we've found a coloring. - if isEmptyUniqSet rsSpill - - -- Coloring was successful because no registers needed to be spilled. - then do - -- if -fasm-lint is turned on then validate the graph. - -- This checks for bugs in the graph allocator itself. - let graph_colored_lint = - if gopt Opt_DoAsmLinting dflags - then Color.validateGraph (text "") - True -- Require all nodes to be colored. - graph_colored - else graph_colored - - -- Rewrite the code to use real hregs, using the colored graph. - let code_patched - = map (patchRegsFromGraph platform graph_colored_lint) - code_coalesced - - -- Clean out unneeded SPILL/RELOAD meta instructions. - -- The spill code generator just spills the entire live range - -- of a vreg, but it might not need to be on the stack for - -- its entire lifetime. - let code_spillclean - = map (cleanSpills platform) code_patched - - -- Strip off liveness information from the allocated code. - -- Also rewrite SPILL/RELOAD meta instructions into real machine - -- instructions along the way - let code_final - = map (stripLive dflags) code_spillclean - - -- Record what happened in this stage for debugging - let stat - = RegAllocStatsColored - { raCode = code - , raGraph = graph - , raGraphColored = graph_colored_lint - , raCoalesced = rmCoalesce - , raCodeCoalesced = code_coalesced - , raPatched = code_patched - , raSpillClean = code_spillclean - , raFinal = code_final - , raSRMs = foldl' addSRM (0, 0, 0) - $ map countSRMs code_spillclean } - - -- Bundle up all the register allocator statistics. - -- .. but make sure to drop them on the floor if they're not - -- needed, otherwise we'll get a space leak. - let statList = - if dump then [stat] ++ maybeToList stat1 ++ debug_codeGraphs - else [] - - -- Ensure all the statistics are evaluated, to avoid space leaks. - seqList statList (return ()) - - return ( code_final - , statList - , slotsCount - , graph_colored_lint) - - -- Coloring was unsuccessful. We need to spill some register to the - -- stack, make a new graph, and try to color it again. - else do - -- if -fasm-lint is turned on then validate the graph - let graph_colored_lint = - if gopt Opt_DoAsmLinting dflags - then Color.validateGraph (text "") - False -- don't require nodes to be colored - graph_colored - else graph_colored - - -- Spill uncolored regs to the stack. - (code_spilled, slotsFree', slotsCount', spillStats) - <- regSpill platform code_coalesced slotsFree slotsCount rsSpill - - -- Recalculate liveness information. - -- NOTE: we have to reverse the SCCs here to get them back into - -- the reverse-dependency order required by computeLiveness. - -- If they're not in the correct order that function will panic. - code_relive <- mapM (regLiveness platform . reverseBlocksInTops) - code_spilled - - -- Record what happened in this stage for debugging. - let stat = - RegAllocStatsSpill - { raCode = code - , raGraph = graph_colored_lint - , raCoalesced = rmCoalesce - , raSpillStats = spillStats - , raSpillCosts = spillCosts - , raSpilled = code_spilled } - - -- Bundle up all the register allocator statistics. - -- .. but make sure to drop them on the floor if they're not - -- needed, otherwise we'll get a space leak. - let statList = - if dump - then [stat] ++ maybeToList stat1 ++ debug_codeGraphs - else [] - - -- Ensure all the statistics are evaluated, to avoid space leaks. - seqList statList (return ()) - - regAlloc_spin dflags (spinCount + 1) triv regsFree slotsFree' - slotsCount' statList code_relive cfg - - --- | Build a graph from the liveness and coalesce information in this code. -buildGraph - :: Instruction instr - => [LiveCmmDecl statics instr] - -> UniqSM (Color.Graph VirtualReg RegClass RealReg) - -buildGraph code - = do - -- Slurp out the conflicts and reg->reg moves from this code. - let (conflictList, moveList) = - unzip $ map slurpConflicts code - - -- Slurp out the spill/reload coalesces. - let moveList2 = map slurpReloadCoalesce code - - -- Add the reg-reg conflicts to the graph. - let conflictBag = unionManyBags conflictList - let graph_conflict - = foldr graphAddConflictSet Color.initGraph conflictBag - - -- Add the coalescences edges to the graph. - let moveBag - = unionBags (unionManyBags moveList2) - (unionManyBags moveList) - - let graph_coalesce - = foldr graphAddCoalesce graph_conflict moveBag - - return graph_coalesce - - --- | Add some conflict edges to the graph. --- Conflicts between virtual and real regs are recorded as exclusions. -graphAddConflictSet - :: UniqSet Reg - -> Color.Graph VirtualReg RegClass RealReg - -> Color.Graph VirtualReg RegClass RealReg - -graphAddConflictSet set graph - = let virtuals = mkUniqSet - [ vr | RegVirtual vr <- nonDetEltsUniqSet set ] - - graph1 = Color.addConflicts virtuals classOfVirtualReg graph - - graph2 = foldr (\(r1, r2) -> Color.addExclusion r1 classOfVirtualReg r2) - graph1 - [ (vr, rr) - | RegVirtual vr <- nonDetEltsUniqSet set - , RegReal rr <- nonDetEltsUniqSet set] - -- See Note [Unique Determinism and code generation] - - in graph2 - - --- | Add some coalesence edges to the graph --- Coalesences between virtual and real regs are recorded as preferences. -graphAddCoalesce - :: (Reg, Reg) - -> Color.Graph VirtualReg RegClass RealReg - -> Color.Graph VirtualReg RegClass RealReg - -graphAddCoalesce (r1, r2) graph - | RegReal rr <- r1 - , RegVirtual vr <- r2 - = Color.addPreference (vr, classOfVirtualReg vr) rr graph - - | RegReal rr <- r2 - , RegVirtual vr <- r1 - = Color.addPreference (vr, classOfVirtualReg vr) rr graph - - | RegVirtual vr1 <- r1 - , RegVirtual vr2 <- r2 - = Color.addCoalesce - (vr1, classOfVirtualReg vr1) - (vr2, classOfVirtualReg vr2) - graph - - -- We can't coalesce two real regs, but there could well be existing - -- hreg,hreg moves in the input code. We'll just ignore these - -- for coalescing purposes. - | RegReal _ <- r1 - , RegReal _ <- r2 - = graph - -#if __GLASGOW_HASKELL__ <= 810 - | otherwise - = panic "graphAddCoalesce" -#endif - - --- | Patch registers in code using the reg -> reg mapping in this graph. -patchRegsFromGraph - :: (Outputable statics, Outputable instr, Instruction instr) - => Platform -> Color.Graph VirtualReg RegClass RealReg - -> LiveCmmDecl statics instr -> LiveCmmDecl statics instr - -patchRegsFromGraph platform graph code - = patchEraseLive patchF code - where - -- Function to lookup the hardreg for a virtual reg from the graph. - patchF reg - -- leave real regs alone. - | RegReal{} <- reg - = reg - - -- this virtual has a regular node in the graph. - | RegVirtual vr <- reg - , Just node <- Color.lookupNode graph vr - = case Color.nodeColor node of - Just color -> RegReal color - Nothing -> RegVirtual vr - - -- no node in the graph for this virtual, bad news. - | otherwise - = pprPanic "patchRegsFromGraph: register mapping failed." - ( text "There is no node in the graph for register " - <> ppr reg - $$ ppr code - $$ Color.dotGraph - (\_ -> text "white") - (trivColorable platform - (targetVirtualRegSqueeze platform) - (targetRealRegSqueeze platform)) - graph) - - ------ --- for when laziness just isn't what you wanted... --- We need to deepSeq the whole graph before trying to colour it to avoid --- space leaks. -seqGraph :: Color.Graph VirtualReg RegClass RealReg -> () -seqGraph graph = seqNodes (nonDetEltsUFM (Color.graphMap graph)) - -- See Note [Unique Determinism and code generation] - -seqNodes :: [Color.Node VirtualReg RegClass RealReg] -> () -seqNodes ns - = case ns of - [] -> () - (n : ns) -> seqNode n `seq` seqNodes ns - -seqNode :: Color.Node VirtualReg RegClass RealReg -> () -seqNode node - = seqVirtualReg (Color.nodeId node) - `seq` seqRegClass (Color.nodeClass node) - `seq` seqMaybeRealReg (Color.nodeColor node) - `seq` (seqVirtualRegList (nonDetEltsUniqSet (Color.nodeConflicts node))) - `seq` (seqRealRegList (nonDetEltsUniqSet (Color.nodeExclusions node))) - `seq` (seqRealRegList (Color.nodePreference node)) - `seq` (seqVirtualRegList (nonDetEltsUniqSet (Color.nodeCoalesce node))) - -- It's OK to use nonDetEltsUniqSet for seq - -seqVirtualReg :: VirtualReg -> () -seqVirtualReg reg = reg `seq` () - -seqRealReg :: RealReg -> () -seqRealReg reg = reg `seq` () - -seqRegClass :: RegClass -> () -seqRegClass c = c `seq` () - -seqMaybeRealReg :: Maybe RealReg -> () -seqMaybeRealReg mr - = case mr of - Nothing -> () - Just r -> seqRealReg r - -seqVirtualRegList :: [VirtualReg] -> () -seqVirtualRegList rs - = case rs of - [] -> () - (r : rs) -> seqVirtualReg r `seq` seqVirtualRegList rs - -seqRealRegList :: [RealReg] -> () -seqRealRegList rs - = case rs of - [] -> () - (r : rs) -> seqRealReg r `seq` seqRealRegList rs diff --git a/compiler/nativeGen/RegAlloc/Graph/Spill.hs b/compiler/nativeGen/RegAlloc/Graph/Spill.hs deleted file mode 100644 index 9ffb51ee29..0000000000 --- a/compiler/nativeGen/RegAlloc/Graph/Spill.hs +++ /dev/null @@ -1,382 +0,0 @@ - --- | When there aren't enough registers to hold all the vregs we have to spill --- some of those vregs to slots on the stack. This module is used modify the --- code to use those slots. -module RegAlloc.Graph.Spill ( - regSpill, - SpillStats(..), - accSpillSL -) where -import GhcPrelude - -import RegAlloc.Liveness -import Instruction -import Reg -import GHC.Cmm hiding (RegSet) -import GHC.Cmm.BlockId -import GHC.Cmm.Dataflow.Collections - -import MonadUtils -import State -import Unique -import UniqFM -import UniqSet -import UniqSupply -import Outputable -import GHC.Platform - -import Data.List -import Data.Maybe -import Data.IntSet (IntSet) -import qualified Data.IntSet as IntSet - - --- | Spill all these virtual regs to stack slots. --- --- Bumps the number of required stack slots if required. --- --- --- TODO: See if we can split some of the live ranges instead of just globally --- spilling the virtual reg. This might make the spill cleaner's job easier. --- --- TODO: On CISCy x86 and x86_64 we don't necessarily have to add a mov instruction --- when making spills. If an instr is using a spilled virtual we may be able to --- address the spill slot directly. --- -regSpill - :: Instruction instr - => Platform - -> [LiveCmmDecl statics instr] -- ^ the code - -> UniqSet Int -- ^ available stack slots - -> Int -- ^ current number of spill slots. - -> UniqSet VirtualReg -- ^ the regs to spill - -> UniqSM - ([LiveCmmDecl statics instr] - -- code with SPILL and RELOAD meta instructions added. - , UniqSet Int -- left over slots - , Int -- slot count in use now. - , SpillStats ) -- stats about what happened during spilling - -regSpill platform code slotsFree slotCount regs - - -- Not enough slots to spill these regs. - | sizeUniqSet slotsFree < sizeUniqSet regs - = -- pprTrace "Bumping slot count:" (ppr slotCount <> text " -> " <> ppr (slotCount+512)) $ - let slotsFree' = (addListToUniqSet slotsFree [slotCount+1 .. slotCount+512]) - in regSpill platform code slotsFree' (slotCount+512) regs - - | otherwise - = do - -- Allocate a slot for each of the spilled regs. - let slots = take (sizeUniqSet regs) $ nonDetEltsUniqSet slotsFree - let regSlotMap = listToUFM - $ zip (nonDetEltsUniqSet regs) slots - -- This is non-deterministic but we do not - -- currently support deterministic code-generation. - -- See Note [Unique Determinism and code generation] - - -- Grab the unique supply from the monad. - us <- getUniqueSupplyM - - -- Run the spiller on all the blocks. - let (code', state') = - runState (mapM (regSpill_top platform regSlotMap) code) - (initSpillS us) - - return ( code' - , minusUniqSet slotsFree (mkUniqSet slots) - , slotCount - , makeSpillStats state') - - --- | Spill some registers to stack slots in a top-level thing. -regSpill_top - :: Instruction instr - => Platform - -> RegMap Int - -- ^ map of vregs to slots they're being spilled to. - -> LiveCmmDecl statics instr - -- ^ the top level thing. - -> SpillM (LiveCmmDecl statics instr) - -regSpill_top platform regSlotMap cmm - = case cmm of - CmmData{} - -> return cmm - - CmmProc info label live sccs - | LiveInfo static firstId liveVRegsOnEntry liveSlotsOnEntry <- info - -> do - -- The liveVRegsOnEntry contains the set of vregs that are live - -- on entry to each basic block. If we spill one of those vregs - -- we remove it from that set and add the corresponding slot - -- number to the liveSlotsOnEntry set. The spill cleaner needs - -- this information to erase unneeded spill and reload instructions - -- after we've done a successful allocation. - let liveSlotsOnEntry' :: BlockMap IntSet - liveSlotsOnEntry' - = mapFoldlWithKey patchLiveSlot - liveSlotsOnEntry liveVRegsOnEntry - - let info' - = LiveInfo static firstId - liveVRegsOnEntry - liveSlotsOnEntry' - - -- Apply the spiller to all the basic blocks in the CmmProc. - sccs' <- mapM (mapSCCM (regSpill_block platform regSlotMap)) sccs - - return $ CmmProc info' label live sccs' - - where -- Given a BlockId and the set of registers live in it, - -- if registers in this block are being spilled to stack slots, - -- then record the fact that these slots are now live in those blocks - -- in the given slotmap. - patchLiveSlot - :: BlockMap IntSet -> BlockId -> RegSet -> BlockMap IntSet - - patchLiveSlot slotMap blockId regsLive - = let - -- Slots that are already recorded as being live. - curSlotsLive = fromMaybe IntSet.empty - $ mapLookup blockId slotMap - - moreSlotsLive = IntSet.fromList - $ catMaybes - $ map (lookupUFM regSlotMap) - $ nonDetEltsUniqSet regsLive - -- See Note [Unique Determinism and code generation] - - slotMap' - = mapInsert blockId (IntSet.union curSlotsLive moreSlotsLive) - slotMap - - in slotMap' - - --- | Spill some registers to stack slots in a basic block. -regSpill_block - :: Instruction instr - => Platform - -> UniqFM Int -- ^ map of vregs to slots they're being spilled to. - -> LiveBasicBlock instr - -> SpillM (LiveBasicBlock instr) - -regSpill_block platform regSlotMap (BasicBlock i instrs) - = do instrss' <- mapM (regSpill_instr platform regSlotMap) instrs - return $ BasicBlock i (concat instrss') - - --- | Spill some registers to stack slots in a single instruction. --- If the instruction uses registers that need to be spilled, then it is --- prefixed (or postfixed) with the appropriate RELOAD or SPILL meta --- instructions. -regSpill_instr - :: Instruction instr - => Platform - -> UniqFM Int -- ^ map of vregs to slots they're being spilled to. - -> LiveInstr instr - -> SpillM [LiveInstr instr] - -regSpill_instr _ _ li@(LiveInstr _ Nothing) - = do return [li] - -regSpill_instr platform regSlotMap - (LiveInstr instr (Just _)) - = do - -- work out which regs are read and written in this instr - let RU rlRead rlWritten = regUsageOfInstr platform instr - - -- sometimes a register is listed as being read more than once, - -- nub this so we don't end up inserting two lots of spill code. - let rsRead_ = nub rlRead - let rsWritten_ = nub rlWritten - - -- if a reg is modified, it appears in both lists, want to undo this.. - let rsRead = rsRead_ \\ rsWritten_ - let rsWritten = rsWritten_ \\ rsRead_ - let rsModify = intersect rsRead_ rsWritten_ - - -- work out if any of the regs being used are currently being spilled. - let rsSpillRead = filter (\r -> elemUFM r regSlotMap) rsRead - let rsSpillWritten = filter (\r -> elemUFM r regSlotMap) rsWritten - let rsSpillModify = filter (\r -> elemUFM r regSlotMap) rsModify - - -- rewrite the instr and work out spill code. - (instr1, prepost1) <- mapAccumLM (spillRead regSlotMap) instr rsSpillRead - (instr2, prepost2) <- mapAccumLM (spillWrite regSlotMap) instr1 rsSpillWritten - (instr3, prepost3) <- mapAccumLM (spillModify regSlotMap) instr2 rsSpillModify - - let (mPrefixes, mPostfixes) = unzip (prepost1 ++ prepost2 ++ prepost3) - let prefixes = concat mPrefixes - let postfixes = concat mPostfixes - - -- final code - let instrs' = prefixes - ++ [LiveInstr instr3 Nothing] - ++ postfixes - - return $ instrs' - - --- | Add a RELOAD met a instruction to load a value for an instruction that --- writes to a vreg that is being spilled. -spillRead - :: Instruction instr - => UniqFM Int - -> instr - -> Reg - -> SpillM (instr, ([LiveInstr instr'], [LiveInstr instr'])) - -spillRead regSlotMap instr reg - | Just slot <- lookupUFM regSlotMap reg - = do (instr', nReg) <- patchInstr reg instr - - modify $ \s -> s - { stateSpillSL = addToUFM_C accSpillSL (stateSpillSL s) reg (reg, 0, 1) } - - return ( instr' - , ( [LiveInstr (RELOAD slot nReg) Nothing] - , []) ) - - | otherwise = panic "RegSpill.spillRead: no slot defined for spilled reg" - - --- | Add a SPILL meta instruction to store a value for an instruction that --- writes to a vreg that is being spilled. -spillWrite - :: Instruction instr - => UniqFM Int - -> instr - -> Reg - -> SpillM (instr, ([LiveInstr instr'], [LiveInstr instr'])) - -spillWrite regSlotMap instr reg - | Just slot <- lookupUFM regSlotMap reg - = do (instr', nReg) <- patchInstr reg instr - - modify $ \s -> s - { stateSpillSL = addToUFM_C accSpillSL (stateSpillSL s) reg (reg, 1, 0) } - - return ( instr' - , ( [] - , [LiveInstr (SPILL nReg slot) Nothing])) - - | otherwise = panic "RegSpill.spillWrite: no slot defined for spilled reg" - - --- | Add both RELOAD and SPILL meta instructions for an instruction that --- both reads and writes to a vreg that is being spilled. -spillModify - :: Instruction instr - => UniqFM Int - -> instr - -> Reg - -> SpillM (instr, ([LiveInstr instr'], [LiveInstr instr'])) - -spillModify regSlotMap instr reg - | Just slot <- lookupUFM regSlotMap reg - = do (instr', nReg) <- patchInstr reg instr - - modify $ \s -> s - { stateSpillSL = addToUFM_C accSpillSL (stateSpillSL s) reg (reg, 1, 1) } - - return ( instr' - , ( [LiveInstr (RELOAD slot nReg) Nothing] - , [LiveInstr (SPILL nReg slot) Nothing])) - - | otherwise = panic "RegSpill.spillModify: no slot defined for spilled reg" - - --- | Rewrite uses of this virtual reg in an instr to use a different --- virtual reg. -patchInstr - :: Instruction instr - => Reg -> instr -> SpillM (instr, Reg) - -patchInstr reg instr - = do nUnique <- newUnique - - -- The register we're rewriting is supposed to be virtual. - -- If it's not then something has gone horribly wrong. - let nReg - = case reg of - RegVirtual vr - -> RegVirtual (renameVirtualReg nUnique vr) - - RegReal{} - -> panic "RegAlloc.Graph.Spill.patchIntr: not patching real reg" - - let instr' = patchReg1 reg nReg instr - return (instr', nReg) - - -patchReg1 - :: Instruction instr - => Reg -> Reg -> instr -> instr - -patchReg1 old new instr - = let patchF r - | r == old = new - | otherwise = r - in patchRegsOfInstr instr patchF - - --- Spiller monad -------------------------------------------------------------- --- | State monad for the spill code generator. -type SpillM a - = State SpillS a - --- | Spill code generator state. -data SpillS - = SpillS - { -- | Unique supply for generating fresh vregs. - stateUS :: UniqSupply - - -- | Spilled vreg vs the number of times it was loaded, stored. - , stateSpillSL :: UniqFM (Reg, Int, Int) } - - --- | Create a new spiller state. -initSpillS :: UniqSupply -> SpillS -initSpillS uniqueSupply - = SpillS - { stateUS = uniqueSupply - , stateSpillSL = emptyUFM } - - --- | Allocate a new unique in the spiller monad. -newUnique :: SpillM Unique -newUnique - = do us <- gets stateUS - case takeUniqFromSupply us of - (uniq, us') - -> do modify $ \s -> s { stateUS = us' } - return uniq - - --- | Add a spill/reload count to a stats record for a register. -accSpillSL :: (Reg, Int, Int) -> (Reg, Int, Int) -> (Reg, Int, Int) -accSpillSL (r1, s1, l1) (_, s2, l2) - = (r1, s1 + s2, l1 + l2) - - --- Spiller stats -------------------------------------------------------------- --- | Spiller statistics. --- Tells us what registers were spilled. -data SpillStats - = SpillStats - { spillStoreLoad :: UniqFM (Reg, Int, Int) } - - --- | Extract spiller statistics from the spiller state. -makeSpillStats :: SpillS -> SpillStats -makeSpillStats s - = SpillStats - { spillStoreLoad = stateSpillSL s } - - -instance Outputable SpillStats where - ppr stats - = pprUFM (spillStoreLoad stats) - (vcat . map (\(r, s, l) -> ppr r <+> int s <+> int l)) diff --git a/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs b/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs deleted file mode 100644 index bd8b449cbb..0000000000 --- a/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs +++ /dev/null @@ -1,616 +0,0 @@ -{-# LANGUAGE CPP #-} - --- | Clean out unneeded spill\/reload instructions. --- --- Handling of join points --- ~~~~~~~~~~~~~~~~~~~~~~~ --- --- B1: B2: --- ... ... --- RELOAD SLOT(0), %r1 RELOAD SLOT(0), %r1 --- ... A ... ... B ... --- jump B3 jump B3 --- --- B3: ... C ... --- RELOAD SLOT(0), %r1 --- ... --- --- The Plan --- ~~~~~~~~ --- As long as %r1 hasn't been written to in A, B or C then we don't need --- the reload in B3. --- --- What we really care about here is that on the entry to B3, %r1 will --- always have the same value that is in SLOT(0) (ie, %r1 is _valid_) --- --- This also works if the reloads in B1\/B2 were spills instead, because --- spilling %r1 to a slot makes that slot have the same value as %r1. --- -module RegAlloc.Graph.SpillClean ( - cleanSpills -) where -import GhcPrelude - -import RegAlloc.Liveness -import Instruction -import Reg - -import GHC.Cmm.BlockId -import GHC.Cmm -import UniqSet -import UniqFM -import Unique -import State -import Outputable -import GHC.Platform -import GHC.Cmm.Dataflow.Collections - -import Data.List -import Data.Maybe -import Data.IntSet (IntSet) -import qualified Data.IntSet as IntSet - - --- | The identification number of a spill slot. --- A value is stored in a spill slot when we don't have a free --- register to hold it. -type Slot = Int - - --- | Clean out unneeded spill\/reloads from this top level thing. -cleanSpills - :: Instruction instr - => Platform - -> LiveCmmDecl statics instr - -> LiveCmmDecl statics instr - -cleanSpills platform cmm - = evalState (cleanSpin platform 0 cmm) initCleanS - - --- | Do one pass of cleaning. -cleanSpin - :: Instruction instr - => Platform - -> Int -- ^ Iteration number for the cleaner. - -> LiveCmmDecl statics instr -- ^ Liveness annotated code to clean. - -> CleanM (LiveCmmDecl statics instr) - -cleanSpin platform spinCount code - = do - -- Initialise count of cleaned spill and reload instructions. - modify $ \s -> s - { sCleanedSpillsAcc = 0 - , sCleanedReloadsAcc = 0 - , sReloadedBy = emptyUFM } - - code_forward <- mapBlockTopM (cleanBlockForward platform) code - code_backward <- cleanTopBackward code_forward - - -- During the cleaning of each block we collected information about - -- what regs were valid across each jump. Based on this, work out - -- whether it will be safe to erase reloads after join points for - -- the next pass. - collateJoinPoints - - -- Remember how many spill and reload instructions we cleaned in this pass. - spills <- gets sCleanedSpillsAcc - reloads <- gets sCleanedReloadsAcc - modify $ \s -> s - { sCleanedCount = (spills, reloads) : sCleanedCount s } - - -- If nothing was cleaned in this pass or the last one - -- then we're done and it's time to bail out. - cleanedCount <- gets sCleanedCount - if take 2 cleanedCount == [(0, 0), (0, 0)] - then return code - - -- otherwise go around again - else cleanSpin platform (spinCount + 1) code_backward - - -------------------------------------------------------------------------------- --- | Clean out unneeded reload instructions, --- while walking forward over the code. -cleanBlockForward - :: Instruction instr - => Platform - -> LiveBasicBlock instr - -> CleanM (LiveBasicBlock instr) - -cleanBlockForward platform (BasicBlock blockId instrs) - = do - -- See if we have a valid association for the entry to this block. - jumpValid <- gets sJumpValid - let assoc = case lookupUFM jumpValid blockId of - Just assoc -> assoc - Nothing -> emptyAssoc - - instrs_reload <- cleanForward platform blockId assoc [] instrs - return $ BasicBlock blockId instrs_reload - - - --- | Clean out unneeded reload instructions. --- --- Walking forwards across the code --- On a reload, if we know a reg already has the same value as a slot --- then we don't need to do the reload. --- -cleanForward - :: Instruction instr - => Platform - -> BlockId -- ^ the block that we're currently in - -> Assoc Store -- ^ two store locations are associated if - -- they have the same value - -> [LiveInstr instr] -- ^ acc - -> [LiveInstr instr] -- ^ instrs to clean (in backwards order) - -> CleanM [LiveInstr instr] -- ^ cleaned instrs (in forward order) - -cleanForward _ _ _ acc [] - = return acc - --- Rewrite live range joins via spill slots to just a spill and a reg-reg move --- hopefully the spill will be also be cleaned in the next pass -cleanForward platform blockId assoc acc (li1 : li2 : instrs) - - | LiveInstr (SPILL reg1 slot1) _ <- li1 - , LiveInstr (RELOAD slot2 reg2) _ <- li2 - , slot1 == slot2 - = do - modify $ \s -> s { sCleanedReloadsAcc = sCleanedReloadsAcc s + 1 } - cleanForward platform blockId assoc acc - $ li1 : LiveInstr (mkRegRegMoveInstr platform reg1 reg2) Nothing - : instrs - -cleanForward platform blockId assoc acc (li@(LiveInstr i1 _) : instrs) - | Just (r1, r2) <- takeRegRegMoveInstr i1 - = if r1 == r2 - -- Erase any left over nop reg reg moves while we're here - -- this will also catch any nop moves that the previous case - -- happens to add. - then cleanForward platform blockId assoc acc instrs - - -- If r1 has the same value as some slots and we copy r1 to r2, - -- then r2 is now associated with those slots instead - else do let assoc' = addAssoc (SReg r1) (SReg r2) - $ delAssoc (SReg r2) - $ assoc - - cleanForward platform blockId assoc' (li : acc) instrs - - -cleanForward platform blockId assoc acc (li : instrs) - - -- Update association due to the spill. - | LiveInstr (SPILL reg slot) _ <- li - = let assoc' = addAssoc (SReg reg) (SSlot slot) - $ delAssoc (SSlot slot) - $ assoc - in cleanForward platform blockId assoc' (li : acc) instrs - - -- Clean a reload instr. - | LiveInstr (RELOAD{}) _ <- li - = do (assoc', mli) <- cleanReload platform blockId assoc li - case mli of - Nothing -> cleanForward platform blockId assoc' acc - instrs - - Just li' -> cleanForward platform blockId assoc' (li' : acc) - instrs - - -- Remember the association over a jump. - | LiveInstr instr _ <- li - , targets <- jumpDestsOfInstr instr - , not $ null targets - = do mapM_ (accJumpValid assoc) targets - cleanForward platform blockId assoc (li : acc) instrs - - -- Writing to a reg changes its value. - | LiveInstr instr _ <- li - , RU _ written <- regUsageOfInstr platform instr - = let assoc' = foldr delAssoc assoc (map SReg $ nub written) - in cleanForward platform blockId assoc' (li : acc) instrs - - - --- | Try and rewrite a reload instruction to something more pleasing -cleanReload - :: Instruction instr - => Platform - -> BlockId - -> Assoc Store - -> LiveInstr instr - -> CleanM (Assoc Store, Maybe (LiveInstr instr)) - -cleanReload platform blockId assoc li@(LiveInstr (RELOAD slot reg) _) - - -- If the reg we're reloading already has the same value as the slot - -- then we can erase the instruction outright. - | elemAssoc (SSlot slot) (SReg reg) assoc - = do modify $ \s -> s { sCleanedReloadsAcc = sCleanedReloadsAcc s + 1 } - return (assoc, Nothing) - - -- If we can find another reg with the same value as this slot then - -- do a move instead of a reload. - | Just reg2 <- findRegOfSlot assoc slot - = do modify $ \s -> s { sCleanedReloadsAcc = sCleanedReloadsAcc s + 1 } - - let assoc' = addAssoc (SReg reg) (SReg reg2) - $ delAssoc (SReg reg) - $ assoc - - return ( assoc' - , Just $ LiveInstr (mkRegRegMoveInstr platform reg2 reg) Nothing) - - -- Gotta keep this instr. - | otherwise - = do -- Update the association. - let assoc' - = addAssoc (SReg reg) (SSlot slot) - -- doing the reload makes reg and slot the same value - $ delAssoc (SReg reg) - -- reg value changes on reload - $ assoc - - -- Remember that this block reloads from this slot. - accBlockReloadsSlot blockId slot - - return (assoc', Just li) - -cleanReload _ _ _ _ - = panic "RegSpillClean.cleanReload: unhandled instr" - - -------------------------------------------------------------------------------- --- | Clean out unneeded spill instructions, --- while walking backwards over the code. --- --- If there were no reloads from a slot between a spill and the last one --- then the slot was never read and we don't need the spill. --- --- SPILL r0 -> s1 --- RELOAD s1 -> r2 --- SPILL r3 -> s1 <--- don't need this spill --- SPILL r4 -> s1 --- RELOAD s1 -> r5 --- --- Maintain a set of --- "slots which were spilled to but not reloaded from yet" --- --- Walking backwards across the code: --- a) On a reload from a slot, remove it from the set. --- --- a) On a spill from a slot --- If the slot is in set then we can erase the spill, --- because it won't be reloaded from until after the next spill. --- --- otherwise --- keep the spill and add the slot to the set --- --- TODO: This is mostly inter-block --- we should really be updating the noReloads set as we cross jumps also. --- --- TODO: generate noReloads from liveSlotsOnEntry --- -cleanTopBackward - :: Instruction instr - => LiveCmmDecl statics instr - -> CleanM (LiveCmmDecl statics instr) - -cleanTopBackward cmm - = case cmm of - CmmData{} - -> return cmm - - CmmProc info label live sccs - | LiveInfo _ _ _ liveSlotsOnEntry <- info - -> do sccs' <- mapM (mapSCCM (cleanBlockBackward liveSlotsOnEntry)) sccs - return $ CmmProc info label live sccs' - - -cleanBlockBackward - :: Instruction instr - => BlockMap IntSet - -> LiveBasicBlock instr - -> CleanM (LiveBasicBlock instr) - -cleanBlockBackward liveSlotsOnEntry (BasicBlock blockId instrs) - = do instrs_spill <- cleanBackward liveSlotsOnEntry emptyUniqSet [] instrs - return $ BasicBlock blockId instrs_spill - - - -cleanBackward - :: Instruction instr - => BlockMap IntSet -- ^ Slots live on entry to each block - -> UniqSet Int -- ^ Slots that have been spilled, but not reloaded from - -> [LiveInstr instr] -- ^ acc - -> [LiveInstr instr] -- ^ Instrs to clean (in forwards order) - -> CleanM [LiveInstr instr] -- ^ Cleaned instrs (in backwards order) - -cleanBackward liveSlotsOnEntry noReloads acc lis - = do reloadedBy <- gets sReloadedBy - cleanBackward' liveSlotsOnEntry reloadedBy noReloads acc lis - - -cleanBackward' - :: Instruction instr - => BlockMap IntSet - -> UniqFM [BlockId] - -> UniqSet Int - -> [LiveInstr instr] - -> [LiveInstr instr] - -> State CleanS [LiveInstr instr] - -cleanBackward' _ _ _ acc [] - = return acc - -cleanBackward' liveSlotsOnEntry reloadedBy noReloads acc (li : instrs) - - -- If nothing ever reloads from this slot then we don't need the spill. - | LiveInstr (SPILL _ slot) _ <- li - , Nothing <- lookupUFM reloadedBy (SSlot slot) - = do modify $ \s -> s { sCleanedSpillsAcc = sCleanedSpillsAcc s + 1 } - cleanBackward liveSlotsOnEntry noReloads acc instrs - - | LiveInstr (SPILL _ slot) _ <- li - = if elementOfUniqSet slot noReloads - - -- We can erase this spill because the slot won't be read until - -- after the next one - then do - modify $ \s -> s { sCleanedSpillsAcc = sCleanedSpillsAcc s + 1 } - cleanBackward liveSlotsOnEntry noReloads acc instrs - - else do - -- This slot is being spilled to, but we haven't seen any reloads yet. - let noReloads' = addOneToUniqSet noReloads slot - cleanBackward liveSlotsOnEntry noReloads' (li : acc) instrs - - -- if we reload from a slot then it's no longer unused - | LiveInstr (RELOAD slot _) _ <- li - , noReloads' <- delOneFromUniqSet noReloads slot - = cleanBackward liveSlotsOnEntry noReloads' (li : acc) instrs - - -- If a slot is live in a jump target then assume it's reloaded there. - -- - -- TODO: A real dataflow analysis would do a better job here. - -- If the target block _ever_ used the slot then we assume - -- it always does, but if those reloads are cleaned the slot - -- liveness map doesn't get updated. - | LiveInstr instr _ <- li - , targets <- jumpDestsOfInstr instr - = do - let slotsReloadedByTargets - = IntSet.unions - $ catMaybes - $ map (flip mapLookup liveSlotsOnEntry) - $ targets - - let noReloads' - = foldl' delOneFromUniqSet noReloads - $ IntSet.toList slotsReloadedByTargets - - cleanBackward liveSlotsOnEntry noReloads' (li : acc) instrs - -#if __GLASGOW_HASKELL__ <= 810 - -- some other instruction - | otherwise - = cleanBackward liveSlotsOnEntry noReloads (li : acc) instrs -#endif - - --- | Combine the associations from all the inward control flow edges. --- -collateJoinPoints :: CleanM () -collateJoinPoints - = modify $ \s -> s - { sJumpValid = mapUFM intersects (sJumpValidAcc s) - , sJumpValidAcc = emptyUFM } - -intersects :: [Assoc Store] -> Assoc Store -intersects [] = emptyAssoc -intersects assocs = foldl1' intersectAssoc assocs - - --- | See if we have a reg with the same value as this slot in the association table. -findRegOfSlot :: Assoc Store -> Int -> Maybe Reg -findRegOfSlot assoc slot - | close <- closeAssoc (SSlot slot) assoc - , Just (SReg reg) <- find isStoreReg $ nonDetEltsUniqSet close - -- See Note [Unique Determinism and code generation] - = Just reg - - | otherwise - = Nothing - - -------------------------------------------------------------------------------- --- | Cleaner monad. -type CleanM - = State CleanS - --- | Cleaner state. -data CleanS - = CleanS - { -- | Regs which are valid at the start of each block. - sJumpValid :: UniqFM (Assoc Store) - - -- | Collecting up what regs were valid across each jump. - -- in the next pass we can collate these and write the results - -- to sJumpValid. - , sJumpValidAcc :: UniqFM [Assoc Store] - - -- | Map of (slot -> blocks which reload from this slot) - -- used to decide if whether slot spilled to will ever be - -- reloaded from on this path. - , sReloadedBy :: UniqFM [BlockId] - - -- | Spills and reloads cleaned each pass (latest at front) - , sCleanedCount :: [(Int, Int)] - - -- | Spills and reloads that have been cleaned in this pass so far. - , sCleanedSpillsAcc :: Int - , sCleanedReloadsAcc :: Int } - - --- | Construct the initial cleaner state. -initCleanS :: CleanS -initCleanS - = CleanS - { sJumpValid = emptyUFM - , sJumpValidAcc = emptyUFM - - , sReloadedBy = emptyUFM - - , sCleanedCount = [] - - , sCleanedSpillsAcc = 0 - , sCleanedReloadsAcc = 0 } - - --- | Remember the associations before a jump. -accJumpValid :: Assoc Store -> BlockId -> CleanM () -accJumpValid assocs target - = modify $ \s -> s { - sJumpValidAcc = addToUFM_C (++) - (sJumpValidAcc s) - target - [assocs] } - - -accBlockReloadsSlot :: BlockId -> Slot -> CleanM () -accBlockReloadsSlot blockId slot - = modify $ \s -> s { - sReloadedBy = addToUFM_C (++) - (sReloadedBy s) - (SSlot slot) - [blockId] } - - -------------------------------------------------------------------------------- --- A store location can be a stack slot or a register -data Store - = SSlot Int - | SReg Reg - - --- | Check if this is a reg store. -isStoreReg :: Store -> Bool -isStoreReg ss - = case ss of - SSlot _ -> False - SReg _ -> True - - --- Spill cleaning is only done once all virtuals have been allocated to realRegs -instance Uniquable Store where - getUnique (SReg r) - | RegReal (RealRegSingle i) <- r - = mkRegSingleUnique i - - | RegReal (RealRegPair r1 r2) <- r - = mkRegPairUnique (r1 * 65535 + r2) - - | otherwise - = error $ "RegSpillClean.getUnique: found virtual reg during spill clean," - ++ "only real regs expected." - - getUnique (SSlot i) = mkRegSubUnique i -- [SLPJ] I hope "SubUnique" is ok - - -instance Outputable Store where - ppr (SSlot i) = text "slot" <> int i - ppr (SReg r) = ppr r - - -------------------------------------------------------------------------------- --- Association graphs. --- In the spill cleaner, two store locations are associated if they are known --- to hold the same value. --- -type Assoc a = UniqFM (UniqSet a) - --- | An empty association -emptyAssoc :: Assoc a -emptyAssoc = emptyUFM - - --- | Add an association between these two things. -addAssoc :: Uniquable a - => a -> a -> Assoc a -> Assoc a - -addAssoc a b m - = let m1 = addToUFM_C unionUniqSets m a (unitUniqSet b) - m2 = addToUFM_C unionUniqSets m1 b (unitUniqSet a) - in m2 - - --- | Delete all associations to a node. -delAssoc :: (Uniquable a) - => a -> Assoc a -> Assoc a - -delAssoc a m - | Just aSet <- lookupUFM m a - , m1 <- delFromUFM m a - = nonDetFoldUniqSet (\x m -> delAssoc1 x a m) m1 aSet - -- It's OK to use nonDetFoldUFM here because deletion is commutative - - | otherwise = m - - --- | Delete a single association edge (a -> b). -delAssoc1 :: Uniquable a - => a -> a -> Assoc a -> Assoc a - -delAssoc1 a b m - | Just aSet <- lookupUFM m a - = addToUFM m a (delOneFromUniqSet aSet b) - - | otherwise = m - - --- | Check if these two things are associated. -elemAssoc :: (Uniquable a) - => a -> a -> Assoc a -> Bool - -elemAssoc a b m - = elementOfUniqSet b (closeAssoc a m) - - --- | Find the refl. trans. closure of the association from this point. -closeAssoc :: (Uniquable a) - => a -> Assoc a -> UniqSet a - -closeAssoc a assoc - = closeAssoc' assoc emptyUniqSet (unitUniqSet a) - where - closeAssoc' assoc visited toVisit - = case nonDetEltsUniqSet toVisit of - -- See Note [Unique Determinism and code generation] - - -- nothing else to visit, we're done - [] -> visited - - (x:_) - -- we've already seen this node - | elementOfUniqSet x visited - -> closeAssoc' assoc visited (delOneFromUniqSet toVisit x) - - -- haven't seen this node before, - -- remember to visit all its neighbors - | otherwise - -> let neighbors - = case lookupUFM assoc x of - Nothing -> emptyUniqSet - Just set -> set - - in closeAssoc' assoc - (addOneToUniqSet visited x) - (unionUniqSets toVisit neighbors) - --- | Intersect two associations. -intersectAssoc :: Assoc a -> Assoc a -> Assoc a -intersectAssoc a b - = intersectUFM_C (intersectUniqSets) a b diff --git a/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs b/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs deleted file mode 100644 index 4870bf5269..0000000000 --- a/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs +++ /dev/null @@ -1,317 +0,0 @@ -{-# LANGUAGE ScopedTypeVariables, GADTs, BangPatterns #-} -module RegAlloc.Graph.SpillCost ( - SpillCostRecord, - plusSpillCostRecord, - pprSpillCostRecord, - - SpillCostInfo, - zeroSpillCostInfo, - plusSpillCostInfo, - - slurpSpillCostInfo, - chooseSpill, - - lifeMapFromSpillCostInfo -) where -import GhcPrelude - -import RegAlloc.Liveness -import Instruction -import RegClass -import Reg - -import GraphBase - -import GHC.Cmm.Dataflow.Collections (mapLookup) -import GHC.Cmm.Dataflow.Label -import GHC.Cmm -import UniqFM -import UniqSet -import Digraph (flattenSCCs) -import Outputable -import GHC.Platform -import State -import CFG - -import Data.List (nub, minimumBy) -import Data.Maybe -import Control.Monad (join) - - --- | Records the expected cost to spill some register. -type SpillCostRecord - = ( VirtualReg -- register name - , Int -- number of writes to this reg - , Int -- number of reads from this reg - , Int) -- number of instrs this reg was live on entry to - - --- | Map of `SpillCostRecord` -type SpillCostInfo - = UniqFM SpillCostRecord - -type SpillCostState = State (UniqFM SpillCostRecord) () - --- | An empty map of spill costs. -zeroSpillCostInfo :: SpillCostInfo -zeroSpillCostInfo = emptyUFM - - --- | Add two spill cost infos. -plusSpillCostInfo :: SpillCostInfo -> SpillCostInfo -> SpillCostInfo -plusSpillCostInfo sc1 sc2 - = plusUFM_C plusSpillCostRecord sc1 sc2 - - --- | Add two spill cost records. -plusSpillCostRecord :: SpillCostRecord -> SpillCostRecord -> SpillCostRecord -plusSpillCostRecord (r1, a1, b1, c1) (r2, a2, b2, c2) - | r1 == r2 = (r1, a1 + a2, b1 + b2, c1 + c2) - | otherwise = error "RegSpillCost.plusRegInt: regs don't match" - - --- | Slurp out information used for determining spill costs. --- --- For each vreg, the number of times it was written to, read from, --- and the number of instructions it was live on entry to (lifetime) --- -slurpSpillCostInfo :: forall instr statics. (Outputable instr, Instruction instr) - => Platform - -> Maybe CFG - -> LiveCmmDecl statics instr - -> SpillCostInfo - -slurpSpillCostInfo platform cfg cmm - = execState (countCmm cmm) zeroSpillCostInfo - where - countCmm CmmData{} = return () - countCmm (CmmProc info _ _ sccs) - = mapM_ (countBlock info freqMap) - $ flattenSCCs sccs - where - LiveInfo _ entries _ _ = info - freqMap = (fst . mkGlobalWeights (head entries)) <$> cfg - - -- Lookup the regs that are live on entry to this block in - -- the info table from the CmmProc. - countBlock info freqMap (BasicBlock blockId instrs) - | LiveInfo _ _ blockLive _ <- info - , Just rsLiveEntry <- mapLookup blockId blockLive - , rsLiveEntry_virt <- takeVirtuals rsLiveEntry - = countLIs (ceiling $ blockFreq freqMap blockId) rsLiveEntry_virt instrs - - | otherwise - = error "RegAlloc.SpillCost.slurpSpillCostInfo: bad block" - - - countLIs :: Int -> UniqSet VirtualReg -> [LiveInstr instr] -> SpillCostState - countLIs _ _ [] - = return () - - -- Skip over comment and delta pseudo instrs. - countLIs scale rsLive (LiveInstr instr Nothing : lis) - | isMetaInstr instr - = countLIs scale rsLive lis - - | otherwise - = pprPanic "RegSpillCost.slurpSpillCostInfo" - $ text "no liveness information on instruction " <> ppr instr - - countLIs scale rsLiveEntry (LiveInstr instr (Just live) : lis) - = do - -- Increment the lifetime counts for regs live on entry to this instr. - mapM_ incLifetime $ nonDetEltsUniqSet rsLiveEntry - -- This is non-deterministic but we do not - -- currently support deterministic code-generation. - -- See Note [Unique Determinism and code generation] - - -- Increment counts for what regs were read/written from. - let (RU read written) = regUsageOfInstr platform instr - mapM_ (incUses scale) $ catMaybes $ map takeVirtualReg $ nub read - mapM_ (incDefs scale) $ catMaybes $ map takeVirtualReg $ nub written - - -- Compute liveness for entry to next instruction. - let liveDieRead_virt = takeVirtuals (liveDieRead live) - let liveDieWrite_virt = takeVirtuals (liveDieWrite live) - let liveBorn_virt = takeVirtuals (liveBorn live) - - let rsLiveAcross - = rsLiveEntry `minusUniqSet` liveDieRead_virt - - let rsLiveNext - = (rsLiveAcross `unionUniqSets` liveBorn_virt) - `minusUniqSet` liveDieWrite_virt - - countLIs scale rsLiveNext lis - - incDefs count reg = modify $ \s -> addToUFM_C plusSpillCostRecord s reg (reg, count, 0, 0) - incUses count reg = modify $ \s -> addToUFM_C plusSpillCostRecord s reg (reg, 0, count, 0) - incLifetime reg = modify $ \s -> addToUFM_C plusSpillCostRecord s reg (reg, 0, 0, 1) - - blockFreq :: Maybe (LabelMap Double) -> Label -> Double - blockFreq freqs bid - | Just freq <- join (mapLookup bid <$> freqs) - = max 1.0 (10000 * freq) - | otherwise - = 1.0 -- Only if no cfg given - --- | Take all the virtual registers from this set. -takeVirtuals :: UniqSet Reg -> UniqSet VirtualReg -takeVirtuals set = mkUniqSet - [ vr | RegVirtual vr <- nonDetEltsUniqSet set ] - -- See Note [Unique Determinism and code generation] - - --- | Choose a node to spill from this graph -chooseSpill - :: SpillCostInfo - -> Graph VirtualReg RegClass RealReg - -> VirtualReg - -chooseSpill info graph - = let cost = spillCost_length info graph - node = minimumBy (\n1 n2 -> compare (cost $ nodeId n1) (cost $ nodeId n2)) - $ nonDetEltsUFM $ graphMap graph - -- See Note [Unique Determinism and code generation] - - in nodeId node - - -------------------------------------------------------------------------------- --- | Chaitins spill cost function is: --- --- cost = sum loadCost * freq (u) + sum storeCost * freq (d) --- u <- uses (v) d <- defs (v) --- --- There are no loops in our code at the moment, so we can set the freq's to 1. --- --- If we don't have live range splitting then Chaitins function performs badly --- if we have lots of nested live ranges and very few registers. --- --- v1 v2 v3 --- def v1 . --- use v1 . --- def v2 . . --- def v3 . . . --- use v1 . . . --- use v3 . . . --- use v2 . . --- use v1 . --- --- defs uses degree cost --- v1: 1 3 3 1.5 --- v2: 1 2 3 1.0 --- v3: 1 1 3 0.666 --- --- v3 has the lowest cost, but if we only have 2 hardregs and we insert --- spill code for v3 then this isn't going to improve the colorability of --- the graph. --- --- When compiling SHA1, which as very long basic blocks and some vregs --- with very long live ranges the allocator seems to try and spill from --- the inside out and eventually run out of stack slots. --- --- Without live range splitting, its's better to spill from the outside --- in so set the cost of very long live ranges to zero --- - --- spillCost_chaitin --- :: SpillCostInfo --- -> Graph VirtualReg RegClass RealReg --- -> VirtualReg --- -> Float - --- spillCost_chaitin info graph reg --- -- Spilling a live range that only lives for 1 instruction --- -- isn't going to help us at all - and we definitely want to avoid --- -- trying to re-spill previously inserted spill code. --- | lifetime <= 1 = 1/0 - --- -- It's unlikely that we'll find a reg for a live range this long --- -- better to spill it straight up and not risk trying to keep it around --- -- and have to go through the build/color cycle again. - --- -- To facility this we scale down the spill cost of long ranges. --- -- This makes sure long ranges are still spilled first. --- -- But this way spill cost remains relevant for long live --- -- ranges. --- | lifetime >= 128 --- = (spillCost / conflicts) / 10.0 - - --- -- Otherwise revert to chaitin's regular cost function. --- | otherwise = (spillCost / conflicts) --- where --- !spillCost = fromIntegral (uses + defs) :: Float --- conflicts = fromIntegral (nodeDegree classOfVirtualReg graph reg) --- (_, defs, uses, lifetime) --- = fromMaybe (reg, 0, 0, 0) $ lookupUFM info reg - - --- Just spill the longest live range. -spillCost_length - :: SpillCostInfo - -> Graph VirtualReg RegClass RealReg - -> VirtualReg - -> Float - -spillCost_length info _ reg - | lifetime <= 1 = 1/0 - | otherwise = 1 / fromIntegral lifetime - where (_, _, _, lifetime) - = fromMaybe (reg, 0, 0, 0) - $ lookupUFM info reg - - --- | Extract a map of register lifetimes from a `SpillCostInfo`. -lifeMapFromSpillCostInfo :: SpillCostInfo -> UniqFM (VirtualReg, Int) -lifeMapFromSpillCostInfo info - = listToUFM - $ map (\(r, _, _, life) -> (r, (r, life))) - $ nonDetEltsUFM info - -- See Note [Unique Determinism and code generation] - - --- | Determine the degree (number of neighbors) of this node which --- have the same class. -nodeDegree - :: (VirtualReg -> RegClass) - -> Graph VirtualReg RegClass RealReg - -> VirtualReg - -> Int - -nodeDegree classOfVirtualReg graph reg - | Just node <- lookupUFM (graphMap graph) reg - - , virtConflicts - <- length - $ filter (\r -> classOfVirtualReg r == classOfVirtualReg reg) - $ nonDetEltsUniqSet - -- See Note [Unique Determinism and code generation] - $ nodeConflicts node - - = virtConflicts + sizeUniqSet (nodeExclusions node) - - | otherwise - = 0 - - --- | Show a spill cost record, including the degree from the graph --- and final calculated spill cost. -pprSpillCostRecord - :: (VirtualReg -> RegClass) - -> (Reg -> SDoc) - -> Graph VirtualReg RegClass RealReg - -> SpillCostRecord - -> SDoc - -pprSpillCostRecord regClass pprReg graph (reg, uses, defs, life) - = hsep - [ pprReg (RegVirtual reg) - , ppr uses - , ppr defs - , ppr life - , ppr $ nodeDegree regClass graph reg - , text $ show $ (fromIntegral (uses + defs) - / fromIntegral (nodeDegree regClass graph reg) :: Float) ] - diff --git a/compiler/nativeGen/RegAlloc/Graph/Stats.hs b/compiler/nativeGen/RegAlloc/Graph/Stats.hs deleted file mode 100644 index 2159548437..0000000000 --- a/compiler/nativeGen/RegAlloc/Graph/Stats.hs +++ /dev/null @@ -1,346 +0,0 @@ -{-# LANGUAGE BangPatterns, CPP #-} - -{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} - --- | Carries interesting info for debugging / profiling of the --- graph coloring register allocator. -module RegAlloc.Graph.Stats ( - RegAllocStats (..), - - pprStats, - pprStatsSpills, - pprStatsLifetimes, - pprStatsConflict, - pprStatsLifeConflict, - - countSRMs, addSRM -) where - -import GhcPrelude - -import qualified GraphColor as Color -import RegAlloc.Liveness -import RegAlloc.Graph.Spill -import RegAlloc.Graph.SpillCost -import RegAlloc.Graph.TrivColorable -import Instruction -import RegClass -import Reg -import TargetReg - -import Outputable -import UniqFM -import UniqSet -import State - --- | Holds interesting statistics from the register allocator. -data RegAllocStats statics instr - - -- Information about the initial conflict graph. - = RegAllocStatsStart - { -- | Initial code, with liveness. - raLiveCmm :: [LiveCmmDecl statics instr] - - -- | The initial, uncolored graph. - , raGraph :: Color.Graph VirtualReg RegClass RealReg - - -- | Information to help choose which regs to spill. - , raSpillCosts :: SpillCostInfo } - - - -- Information about an intermediate graph. - -- This is one that we couldn't color, so had to insert spill code - -- instruction stream. - | RegAllocStatsSpill - { -- | Code we tried to allocate registers for. - raCode :: [LiveCmmDecl statics instr] - - -- | Partially colored graph. - , raGraph :: Color.Graph VirtualReg RegClass RealReg - - -- | The regs that were coalesced. - , raCoalesced :: UniqFM VirtualReg - - -- | Spiller stats. - , raSpillStats :: SpillStats - - -- | Number of instructions each reg lives for. - , raSpillCosts :: SpillCostInfo - - -- | Code with spill instructions added. - , raSpilled :: [LiveCmmDecl statics instr] } - - - -- a successful coloring - | RegAllocStatsColored - { -- | Code we tried to allocate registers for. - raCode :: [LiveCmmDecl statics instr] - - -- | Uncolored graph. - , raGraph :: Color.Graph VirtualReg RegClass RealReg - - -- | Coalesced and colored graph. - , raGraphColored :: Color.Graph VirtualReg RegClass RealReg - - -- | Regs that were coalesced. - , raCoalesced :: UniqFM VirtualReg - - -- | Code with coalescings applied. - , raCodeCoalesced :: [LiveCmmDecl statics instr] - - -- | Code with vregs replaced by hregs. - , raPatched :: [LiveCmmDecl statics instr] - - -- | Code with unneeded spill\/reloads cleaned out. - , raSpillClean :: [LiveCmmDecl statics instr] - - -- | Final code. - , raFinal :: [NatCmmDecl statics instr] - - -- | Spill\/reload\/reg-reg moves present in this code. - , raSRMs :: (Int, Int, Int) } - - -instance (Outputable statics, Outputable instr) - => Outputable (RegAllocStats statics instr) where - - ppr (s@RegAllocStatsStart{}) = sdocWithPlatform $ \platform -> - text "# Start" - $$ text "# Native code with liveness information." - $$ ppr (raLiveCmm s) - $$ text "" - $$ text "# Initial register conflict graph." - $$ Color.dotGraph - (targetRegDotColor platform) - (trivColorable platform - (targetVirtualRegSqueeze platform) - (targetRealRegSqueeze platform)) - (raGraph s) - - - ppr (s@RegAllocStatsSpill{}) = - text "# Spill" - - $$ text "# Code with liveness information." - $$ ppr (raCode s) - $$ text "" - - $$ (if (not $ isNullUFM $ raCoalesced s) - then text "# Registers coalesced." - $$ pprUFMWithKeys (raCoalesced s) (vcat . map ppr) - $$ text "" - else empty) - - $$ text "# Spills inserted." - $$ ppr (raSpillStats s) - $$ text "" - - $$ text "# Code with spills inserted." - $$ ppr (raSpilled s) - - - ppr (s@RegAllocStatsColored { raSRMs = (spills, reloads, moves) }) - = sdocWithPlatform $ \platform -> - text "# Colored" - - $$ text "# Code with liveness information." - $$ ppr (raCode s) - $$ text "" - - $$ text "# Register conflict graph (colored)." - $$ Color.dotGraph - (targetRegDotColor platform) - (trivColorable platform - (targetVirtualRegSqueeze platform) - (targetRealRegSqueeze platform)) - (raGraphColored s) - $$ text "" - - $$ (if (not $ isNullUFM $ raCoalesced s) - then text "# Registers coalesced." - $$ pprUFMWithKeys (raCoalesced s) (vcat . map ppr) - $$ text "" - else empty) - - $$ text "# Native code after coalescings applied." - $$ ppr (raCodeCoalesced s) - $$ text "" - - $$ text "# Native code after register allocation." - $$ ppr (raPatched s) - $$ text "" - - $$ text "# Clean out unneeded spill/reloads." - $$ ppr (raSpillClean s) - $$ text "" - - $$ text "# Final code, after rewriting spill/rewrite pseudo instrs." - $$ ppr (raFinal s) - $$ text "" - $$ text "# Score:" - $$ (text "# spills inserted: " <> int spills) - $$ (text "# reloads inserted: " <> int reloads) - $$ (text "# reg-reg moves remaining: " <> int moves) - $$ text "" - - --- | Do all the different analysis on this list of RegAllocStats -pprStats - :: [RegAllocStats statics instr] - -> Color.Graph VirtualReg RegClass RealReg - -> SDoc - -pprStats stats graph - = let outSpills = pprStatsSpills stats - outLife = pprStatsLifetimes stats - outConflict = pprStatsConflict stats - outScatter = pprStatsLifeConflict stats graph - - in vcat [outSpills, outLife, outConflict, outScatter] - - --- | Dump a table of how many spill loads \/ stores were inserted for each vreg. -pprStatsSpills - :: [RegAllocStats statics instr] -> SDoc - -pprStatsSpills stats - = let - finals = [ s | s@RegAllocStatsColored{} <- stats] - - -- sum up how many stores\/loads\/reg-reg-moves were left in the code - total = foldl' addSRM (0, 0, 0) - $ map raSRMs finals - - in ( text "-- spills-added-total" - $$ text "-- (stores, loads, reg_reg_moves_remaining)" - $$ ppr total - $$ text "") - - --- | Dump a table of how long vregs tend to live for in the initial code. -pprStatsLifetimes - :: [RegAllocStats statics instr] -> SDoc - -pprStatsLifetimes stats - = let info = foldl' plusSpillCostInfo zeroSpillCostInfo - [ raSpillCosts s - | s@RegAllocStatsStart{} <- stats ] - - lifeBins = binLifetimeCount $ lifeMapFromSpillCostInfo info - - in ( text "-- vreg-population-lifetimes" - $$ text "-- (instruction_count, number_of_vregs_that_lived_that_long)" - $$ pprUFM lifeBins (vcat . map ppr) - $$ text "\n") - - -binLifetimeCount :: UniqFM (VirtualReg, Int) -> UniqFM (Int, Int) -binLifetimeCount fm - = let lifes = map (\l -> (l, (l, 1))) - $ map snd - $ nonDetEltsUFM fm - -- See Note [Unique Determinism and code generation] - - in addListToUFM_C - (\(l1, c1) (_, c2) -> (l1, c1 + c2)) - emptyUFM - lifes - - --- | Dump a table of how many conflicts vregs tend to have in the initial code. -pprStatsConflict - :: [RegAllocStats statics instr] -> SDoc - -pprStatsConflict stats - = let confMap = foldl' (plusUFM_C (\(c1, n1) (_, n2) -> (c1, n1 + n2))) - emptyUFM - $ map Color.slurpNodeConflictCount - [ raGraph s | s@RegAllocStatsStart{} <- stats ] - - in ( text "-- vreg-conflicts" - $$ text "-- (conflict_count, number_of_vregs_that_had_that_many_conflicts)" - $$ pprUFM confMap (vcat . map ppr) - $$ text "\n") - - --- | For every vreg, dump how many conflicts it has, and its lifetime. --- Good for making a scatter plot. -pprStatsLifeConflict - :: [RegAllocStats statics instr] - -> Color.Graph VirtualReg RegClass RealReg -- ^ global register conflict graph - -> SDoc - -pprStatsLifeConflict stats graph - = let lifeMap = lifeMapFromSpillCostInfo - $ foldl' plusSpillCostInfo zeroSpillCostInfo - $ [ raSpillCosts s | s@RegAllocStatsStart{} <- stats ] - - scatter = map (\r -> let lifetime = case lookupUFM lifeMap r of - Just (_, l) -> l - Nothing -> 0 - Just node = Color.lookupNode graph r - in parens $ hcat $ punctuate (text ", ") - [ doubleQuotes $ ppr $ Color.nodeId node - , ppr $ sizeUniqSet (Color.nodeConflicts node) - , ppr $ lifetime ]) - $ map Color.nodeId - $ nonDetEltsUFM - -- See Note [Unique Determinism and code generation] - $ Color.graphMap graph - - in ( text "-- vreg-conflict-lifetime" - $$ text "-- (vreg, vreg_conflicts, vreg_lifetime)" - $$ (vcat scatter) - $$ text "\n") - - --- | Count spill/reload/reg-reg moves. --- Lets us see how well the register allocator has done. -countSRMs - :: Instruction instr - => LiveCmmDecl statics instr -> (Int, Int, Int) - -countSRMs cmm - = execState (mapBlockTopM countSRM_block cmm) (0, 0, 0) - - -countSRM_block - :: Instruction instr - => GenBasicBlock (LiveInstr instr) - -> State (Int, Int, Int) (GenBasicBlock (LiveInstr instr)) - -countSRM_block (BasicBlock i instrs) - = do instrs' <- mapM countSRM_instr instrs - return $ BasicBlock i instrs' - - -countSRM_instr - :: Instruction instr - => LiveInstr instr -> State (Int, Int, Int) (LiveInstr instr) - -countSRM_instr li - | LiveInstr SPILL{} _ <- li - = do modify $ \(s, r, m) -> (s + 1, r, m) - return li - - | LiveInstr RELOAD{} _ <- li - = do modify $ \(s, r, m) -> (s, r + 1, m) - return li - - | LiveInstr instr _ <- li - , Just _ <- takeRegRegMoveInstr instr - = do modify $ \(s, r, m) -> (s, r, m + 1) - return li - - | otherwise - = return li - - --- sigh.. -addSRM :: (Int, Int, Int) -> (Int, Int, Int) -> (Int, Int, Int) -addSRM (s1, r1, m1) (s2, r2, m2) - = let !s = s1 + s2 - !r = r1 + r2 - !m = m1 + m2 - in (s, r, m) - diff --git a/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs b/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs deleted file mode 100644 index cc2ad7d594..0000000000 --- a/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs +++ /dev/null @@ -1,274 +0,0 @@ -{-# LANGUAGE CPP #-} - -module RegAlloc.Graph.TrivColorable ( - trivColorable, -) - -where - -#include "HsVersions.h" - -import GhcPrelude - -import RegClass -import Reg - -import GraphBase - -import UniqSet -import GHC.Platform -import Panic - --- trivColorable --------------------------------------------------------------- - --- trivColorable function for the graph coloring allocator --- --- This gets hammered by scanGraph during register allocation, --- so needs to be fairly efficient. --- --- NOTE: This only works for architectures with just RcInteger and RcDouble --- (which are disjoint) ie. x86, x86_64 and ppc --- --- The number of allocatable regs is hard coded in here so we can do --- a fast comparison in trivColorable. --- --- It's ok if these numbers are _less_ than the actual number of free --- regs, but they can't be more or the register conflict --- graph won't color. --- --- If the graph doesn't color then the allocator will panic, but it won't --- generate bad object code or anything nasty like that. --- --- There is an allocatableRegsInClass :: RegClass -> Int, but doing --- the unboxing is too slow for us here. --- TODO: Is that still true? Could we use allocatableRegsInClass --- without losing performance now? --- --- Look at includes/stg/MachRegs.h to get the numbers. --- - - --- Disjoint registers ---------------------------------------------------------- --- --- The definition has been unfolded into individual cases for speed. --- Each architecture has a different register setup, so we use a --- different regSqueeze function for each. --- -accSqueeze - :: Int - -> Int - -> (reg -> Int) - -> UniqSet reg - -> Int - -accSqueeze count maxCount squeeze us = acc count (nonDetEltsUniqSet us) - -- See Note [Unique Determinism and code generation] - where acc count [] = count - acc count _ | count >= maxCount = count - acc count (r:rs) = acc (count + squeeze r) rs - -{- Note [accSqueeze] -~~~~~~~~~~~~~~~~~~~~ -BL 2007/09 -Doing a nice fold over the UniqSet makes trivColorable use -32% of total compile time and 42% of total alloc when compiling SHA1.hs from darcs. -Therefore the UniqFM is made non-abstract and we use custom fold. - -MS 2010/04 -When converting UniqFM to use Data.IntMap, the fold cannot use UniqFM internal -representation any more. But it is imperative that the accSqueeze stops -the folding if the count gets greater or equal to maxCount. We thus convert -UniqFM to a (lazy) list, do the fold and stops if necessary, which was -the most efficient variant tried. Benchmark compiling 10-times SHA1.hs follows. -(original = previous implementation, folding = fold of the whole UFM, - lazyFold = the current implementation, - hackFold = using internal representation of Data.IntMap) - - original folding hackFold lazyFold - -O -fasm (used everywhere) 31.509s 30.387s 30.791s 30.603s - 100.00% 96.44% 97.72% 97.12% - -fregs-graph 67.938s 74.875s 62.673s 64.679s - 100.00% 110.21% 92.25% 95.20% - -fregs-iterative 89.761s 143.913s 81.075s 86.912s - 100.00% 160.33% 90.32% 96.83% - -fnew-codegen 38.225s 37.142s 37.551s 37.119s - 100.00% 97.17% 98.24% 97.11% - -fnew-codegen -fregs-graph 91.786s 91.51s 87.368s 86.88s - 100.00% 99.70% 95.19% 94.65% - -fnew-codegen -fregs-iterative 206.72s 343.632s 194.694s 208.677s - 100.00% 166.23% 94.18% 100.95% --} - -trivColorable - :: Platform - -> (RegClass -> VirtualReg -> Int) - -> (RegClass -> RealReg -> Int) - -> Triv VirtualReg RegClass RealReg - -trivColorable platform virtualRegSqueeze realRegSqueeze RcInteger conflicts exclusions - | let cALLOCATABLE_REGS_INTEGER - = (case platformArch platform of - ArchX86 -> 3 - ArchX86_64 -> 5 - ArchPPC -> 16 - ArchSPARC -> 14 - ArchSPARC64 -> panic "trivColorable ArchSPARC64" - ArchPPC_64 _ -> 15 - ArchARM _ _ _ -> panic "trivColorable ArchARM" - ArchARM64 -> panic "trivColorable ArchARM64" - ArchAlpha -> panic "trivColorable ArchAlpha" - ArchMipseb -> panic "trivColorable ArchMipseb" - ArchMipsel -> panic "trivColorable ArchMipsel" - ArchS390X -> panic "trivColorable ArchS390X" - ArchJavaScript-> panic "trivColorable ArchJavaScript" - ArchUnknown -> panic "trivColorable ArchUnknown") - , count2 <- accSqueeze 0 cALLOCATABLE_REGS_INTEGER - (virtualRegSqueeze RcInteger) - conflicts - - , count3 <- accSqueeze count2 cALLOCATABLE_REGS_INTEGER - (realRegSqueeze RcInteger) - exclusions - - = count3 < cALLOCATABLE_REGS_INTEGER - -trivColorable platform virtualRegSqueeze realRegSqueeze RcFloat conflicts exclusions - | let cALLOCATABLE_REGS_FLOAT - = (case platformArch platform of - -- On x86_64 and x86, Float and RcDouble - -- use the same registers, - -- so we only use RcDouble to represent the - -- register allocation problem on those types. - ArchX86 -> 0 - ArchX86_64 -> 0 - ArchPPC -> 0 - ArchSPARC -> 22 - ArchSPARC64 -> panic "trivColorable ArchSPARC64" - ArchPPC_64 _ -> 0 - ArchARM _ _ _ -> panic "trivColorable ArchARM" - ArchARM64 -> panic "trivColorable ArchARM64" - ArchAlpha -> panic "trivColorable ArchAlpha" - ArchMipseb -> panic "trivColorable ArchMipseb" - ArchMipsel -> panic "trivColorable ArchMipsel" - ArchS390X -> panic "trivColorable ArchS390X" - ArchJavaScript-> panic "trivColorable ArchJavaScript" - ArchUnknown -> panic "trivColorable ArchUnknown") - , count2 <- accSqueeze 0 cALLOCATABLE_REGS_FLOAT - (virtualRegSqueeze RcFloat) - conflicts - - , count3 <- accSqueeze count2 cALLOCATABLE_REGS_FLOAT - (realRegSqueeze RcFloat) - exclusions - - = count3 < cALLOCATABLE_REGS_FLOAT - -trivColorable platform virtualRegSqueeze realRegSqueeze RcDouble conflicts exclusions - | let cALLOCATABLE_REGS_DOUBLE - = (case platformArch platform of - ArchX86 -> 8 - -- in x86 32bit mode sse2 there are only - -- 8 XMM registers xmm0 ... xmm7 - ArchX86_64 -> 10 - -- in x86_64 there are 16 XMM registers - -- xmm0 .. xmm15, here 10 is a - -- "dont need to solve conflicts" count that - -- was chosen at some point in the past. - ArchPPC -> 26 - ArchSPARC -> 11 - ArchSPARC64 -> panic "trivColorable ArchSPARC64" - ArchPPC_64 _ -> 20 - ArchARM _ _ _ -> panic "trivColorable ArchARM" - ArchARM64 -> panic "trivColorable ArchARM64" - ArchAlpha -> panic "trivColorable ArchAlpha" - ArchMipseb -> panic "trivColorable ArchMipseb" - ArchMipsel -> panic "trivColorable ArchMipsel" - ArchS390X -> panic "trivColorable ArchS390X" - ArchJavaScript-> panic "trivColorable ArchJavaScript" - ArchUnknown -> panic "trivColorable ArchUnknown") - , count2 <- accSqueeze 0 cALLOCATABLE_REGS_DOUBLE - (virtualRegSqueeze RcDouble) - conflicts - - , count3 <- accSqueeze count2 cALLOCATABLE_REGS_DOUBLE - (realRegSqueeze RcDouble) - exclusions - - = count3 < cALLOCATABLE_REGS_DOUBLE - - - - --- Specification Code ---------------------------------------------------------- --- --- The trivColorable function for each particular architecture should --- implement the following function, but faster. --- - -{- -trivColorable :: RegClass -> UniqSet Reg -> UniqSet Reg -> Bool -trivColorable classN conflicts exclusions - = let - - acc :: Reg -> (Int, Int) -> (Int, Int) - acc r (cd, cf) - = case regClass r of - RcInteger -> (cd+1, cf) - RcFloat -> (cd, cf+1) - _ -> panic "Regs.trivColorable: reg class not handled" - - tmp = nonDetFoldUFM acc (0, 0) conflicts - (countInt, countFloat) = nonDetFoldUFM acc tmp exclusions - - squeese = worst countInt classN RcInteger - + worst countFloat classN RcFloat - - in squeese < allocatableRegsInClass classN - --- | Worst case displacement --- node N of classN has n neighbors of class C. --- --- We currently only have RcInteger and RcDouble, which don't conflict at all. --- This is a bit boring compared to what's in RegArchX86. --- -worst :: Int -> RegClass -> RegClass -> Int -worst n classN classC - = case classN of - RcInteger - -> case classC of - RcInteger -> min n (allocatableRegsInClass RcInteger) - RcFloat -> 0 - - RcDouble - -> case classC of - RcFloat -> min n (allocatableRegsInClass RcFloat) - RcInteger -> 0 - --- allocatableRegs is allMachRegNos with the fixed-use regs removed. --- i.e., these are the regs for which we are prepared to allow the --- register allocator to attempt to map VRegs to. -allocatableRegs :: [RegNo] -allocatableRegs - = let isFree i = freeReg i - in filter isFree allMachRegNos - - --- | The number of regs in each class. --- We go via top level CAFs to ensure that we're not recomputing --- the length of these lists each time the fn is called. -allocatableRegsInClass :: RegClass -> Int -allocatableRegsInClass cls - = case cls of - RcInteger -> allocatableRegsInteger - RcFloat -> allocatableRegsDouble - -allocatableRegsInteger :: Int -allocatableRegsInteger - = length $ filter (\r -> regClass r == RcInteger) - $ map RealReg allocatableRegs - -allocatableRegsFloat :: Int -allocatableRegsFloat - = length $ filter (\r -> regClass r == RcFloat - $ map RealReg allocatableRegs --} diff --git a/compiler/nativeGen/RegAlloc/Linear/Base.hs b/compiler/nativeGen/RegAlloc/Linear/Base.hs deleted file mode 100644 index 552f14929d..0000000000 --- a/compiler/nativeGen/RegAlloc/Linear/Base.hs +++ /dev/null @@ -1,141 +0,0 @@ - --- | Put common type definitions here to break recursive module dependencies. - -module RegAlloc.Linear.Base ( - BlockAssignment, - - Loc(..), - regsOfLoc, - - -- for stats - SpillReason(..), - RegAllocStats(..), - - -- the allocator monad - RA_State(..), -) - -where - -import GhcPrelude - -import RegAlloc.Linear.StackMap -import RegAlloc.Liveness -import Reg - -import GHC.Driver.Session -import Outputable -import Unique -import UniqFM -import UniqSupply -import GHC.Cmm.BlockId - - --- | Used to store the register assignment on entry to a basic block. --- We use this to handle join points, where multiple branch instructions --- target a particular label. We have to insert fixup code to make --- the register assignments from the different sources match up. --- -type BlockAssignment freeRegs - = BlockMap (freeRegs, RegMap Loc) - - --- | Where a vreg is currently stored --- A temporary can be marked as living in both a register and memory --- (InBoth), for example if it was recently loaded from a spill location. --- This makes it cheap to spill (no save instruction required), but we --- have to be careful to turn this into InReg if the value in the --- register is changed. - --- This is also useful when a temporary is about to be clobbered. We --- save it in a spill location, but mark it as InBoth because the current --- instruction might still want to read it. --- -data Loc - -- | vreg is in a register - = InReg !RealReg - - -- | vreg is held in a stack slot - | InMem {-# UNPACK #-} !StackSlot - - - -- | vreg is held in both a register and a stack slot - | InBoth !RealReg - {-# UNPACK #-} !StackSlot - deriving (Eq, Show, Ord) - -instance Outputable Loc where - ppr l = text (show l) - - --- | Get the reg numbers stored in this Loc. -regsOfLoc :: Loc -> [RealReg] -regsOfLoc (InReg r) = [r] -regsOfLoc (InBoth r _) = [r] -regsOfLoc (InMem _) = [] - - --- | Reasons why instructions might be inserted by the spiller. --- Used when generating stats for -ddrop-asm-stats. --- -data SpillReason - -- | vreg was spilled to a slot so we could use its - -- current hreg for another vreg - = SpillAlloc !Unique - - -- | vreg was moved because its hreg was clobbered - | SpillClobber !Unique - - -- | vreg was loaded from a spill slot - | SpillLoad !Unique - - -- | reg-reg move inserted during join to targets - | SpillJoinRR !Unique - - -- | reg-mem move inserted during join to targets - | SpillJoinRM !Unique - - --- | Used to carry interesting stats out of the register allocator. -data RegAllocStats - = RegAllocStats - { ra_spillInstrs :: UniqFM [Int] - , ra_fixupList :: [(BlockId,BlockId,BlockId)] - -- ^ (from,fixup,to) : We inserted fixup code between from and to - } - - --- | The register allocator state -data RA_State freeRegs - = RA_State - - { - -- | the current mapping from basic blocks to - -- the register assignments at the beginning of that block. - ra_blockassig :: BlockAssignment freeRegs - - -- | free machine registers - , ra_freeregs :: !freeRegs - - -- | assignment of temps to locations - , ra_assig :: RegMap Loc - - -- | current stack delta - , ra_delta :: Int - - -- | free stack slots for spilling - , ra_stack :: StackMap - - -- | unique supply for generating names for join point fixup blocks. - , ra_us :: UniqSupply - - -- | Record why things were spilled, for -ddrop-asm-stats. - -- Just keep a list here instead of a map of regs -> reasons. - -- We don't want to slow down the allocator if we're not going to emit the stats. - , ra_spills :: [SpillReason] - , ra_DynFlags :: DynFlags - - -- | (from,fixup,to) : We inserted fixup code between from and to - , ra_fixups :: [(BlockId,BlockId,BlockId)] } - - diff --git a/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs deleted file mode 100644 index b2b9cff5bb..0000000000 --- a/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs +++ /dev/null @@ -1,89 +0,0 @@ -{-# LANGUAGE CPP #-} - -module RegAlloc.Linear.FreeRegs ( - FR(..), - maxSpillSlots -) - -#include "HsVersions.h" - -where - -import GhcPrelude - -import Reg -import RegClass - -import GHC.Driver.Session -import Panic -import GHC.Platform - --- ----------------------------------------------------------------------------- --- The free register set --- This needs to be *efficient* --- Here's an inefficient 'executable specification' of the FreeRegs data type: --- --- type FreeRegs = [RegNo] --- noFreeRegs = 0 --- releaseReg n f = if n `elem` f then f else (n : f) --- initFreeRegs = allocatableRegs --- getFreeRegs cls f = filter ( (==cls) . regClass . RealReg ) f --- allocateReg f r = filter (/= r) f - -import qualified RegAlloc.Linear.PPC.FreeRegs as PPC -import qualified RegAlloc.Linear.SPARC.FreeRegs as SPARC -import qualified RegAlloc.Linear.X86.FreeRegs as X86 -import qualified RegAlloc.Linear.X86_64.FreeRegs as X86_64 - -import qualified PPC.Instr -import qualified SPARC.Instr -import qualified X86.Instr - -class Show freeRegs => FR freeRegs where - frAllocateReg :: Platform -> RealReg -> freeRegs -> freeRegs - frGetFreeRegs :: Platform -> RegClass -> freeRegs -> [RealReg] - frInitFreeRegs :: Platform -> freeRegs - frReleaseReg :: Platform -> RealReg -> freeRegs -> freeRegs - -instance FR X86.FreeRegs where - frAllocateReg = \_ -> X86.allocateReg - frGetFreeRegs = X86.getFreeRegs - frInitFreeRegs = X86.initFreeRegs - frReleaseReg = \_ -> X86.releaseReg - -instance FR X86_64.FreeRegs where - frAllocateReg = \_ -> X86_64.allocateReg - frGetFreeRegs = X86_64.getFreeRegs - frInitFreeRegs = X86_64.initFreeRegs - frReleaseReg = \_ -> X86_64.releaseReg - -instance FR PPC.FreeRegs where - frAllocateReg = \_ -> PPC.allocateReg - frGetFreeRegs = \_ -> PPC.getFreeRegs - frInitFreeRegs = PPC.initFreeRegs - frReleaseReg = \_ -> PPC.releaseReg - -instance FR SPARC.FreeRegs where - frAllocateReg = SPARC.allocateReg - frGetFreeRegs = \_ -> SPARC.getFreeRegs - frInitFreeRegs = SPARC.initFreeRegs - frReleaseReg = SPARC.releaseReg - -maxSpillSlots :: DynFlags -> Int -maxSpillSlots dflags - = case platformArch (targetPlatform dflags) of - ArchX86 -> X86.Instr.maxSpillSlots dflags - ArchX86_64 -> X86.Instr.maxSpillSlots dflags - ArchPPC -> PPC.Instr.maxSpillSlots dflags - ArchS390X -> panic "maxSpillSlots ArchS390X" - ArchSPARC -> SPARC.Instr.maxSpillSlots dflags - ArchSPARC64 -> panic "maxSpillSlots ArchSPARC64" - ArchARM _ _ _ -> panic "maxSpillSlots ArchARM" - ArchARM64 -> panic "maxSpillSlots ArchARM64" - ArchPPC_64 _ -> PPC.Instr.maxSpillSlots dflags - ArchAlpha -> panic "maxSpillSlots ArchAlpha" - ArchMipseb -> panic "maxSpillSlots ArchMipseb" - ArchMipsel -> panic "maxSpillSlots ArchMipsel" - ArchJavaScript-> panic "maxSpillSlots ArchJavaScript" - ArchUnknown -> panic "maxSpillSlots ArchUnknown" - diff --git a/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs b/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs deleted file mode 100644 index 4362ca8a17..0000000000 --- a/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs +++ /dev/null @@ -1,378 +0,0 @@ -{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} - --- | Handles joining of a jump instruction to its targets. - --- The first time we encounter a jump to a particular basic block, we --- record the assignment of temporaries. The next time we encounter a --- jump to the same block, we compare our current assignment to the --- stored one. They might be different if spilling has occurred in one --- branch; so some fixup code will be required to match up the assignments. --- -module RegAlloc.Linear.JoinToTargets (joinToTargets) where - -import GhcPrelude - -import RegAlloc.Linear.State -import RegAlloc.Linear.Base -import RegAlloc.Linear.FreeRegs -import RegAlloc.Liveness -import Instruction -import Reg - -import GHC.Cmm.BlockId -import GHC.Cmm.Dataflow.Collections -import Digraph -import GHC.Driver.Session -import Outputable -import Unique -import UniqFM -import UniqSet - --- | For a jump instruction at the end of a block, generate fixup code so its --- vregs are in the correct regs for its destination. --- -joinToTargets - :: (FR freeRegs, Instruction instr, Outputable instr) - => BlockMap RegSet -- ^ maps the unique of the blockid to the set of vregs - -- that are known to be live on the entry to each block. - - -> BlockId -- ^ id of the current block - -> instr -- ^ branch instr on the end of the source block. - - -> RegM freeRegs ([NatBasicBlock instr] -- fresh blocks of fixup code. - , instr) -- the original branch - -- instruction, but maybe - -- patched to jump - -- to a fixup block first. - -joinToTargets block_live id instr - - -- we only need to worry about jump instructions. - | not $ isJumpishInstr instr - = return ([], instr) - - | otherwise - = joinToTargets' block_live [] id instr (jumpDestsOfInstr instr) - ------ -joinToTargets' - :: (FR freeRegs, Instruction instr, Outputable instr) - => BlockMap RegSet -- ^ maps the unique of the blockid to the set of vregs - -- that are known to be live on the entry to each block. - - -> [NatBasicBlock instr] -- ^ acc blocks of fixup code. - - -> BlockId -- ^ id of the current block - -> instr -- ^ branch instr on the end of the source block. - - -> [BlockId] -- ^ branch destinations still to consider. - - -> RegM freeRegs ([NatBasicBlock instr], instr) - --- no more targets to consider. all done. -joinToTargets' _ new_blocks _ instr [] - = return (new_blocks, instr) - --- handle a branch target. -joinToTargets' block_live new_blocks block_id instr (dest:dests) - = do - -- get the map of where the vregs are stored on entry to each basic block. - block_assig <- getBlockAssigR - - -- get the assignment on entry to the branch instruction. - assig <- getAssigR - - -- adjust the current assignment to remove any vregs that are not live - -- on entry to the destination block. - let Just live_set = mapLookup dest block_live - let still_live uniq _ = uniq `elemUniqSet_Directly` live_set - let adjusted_assig = filterUFM_Directly still_live assig - - -- and free up those registers which are now free. - let to_free = - [ r | (reg, loc) <- nonDetUFMToList assig - -- This is non-deterministic but we do not - -- currently support deterministic code-generation. - -- See Note [Unique Determinism and code generation] - , not (elemUniqSet_Directly reg live_set) - , r <- regsOfLoc loc ] - - case mapLookup dest block_assig of - Nothing - -> joinToTargets_first - block_live new_blocks block_id instr dest dests - block_assig adjusted_assig to_free - - Just (_, dest_assig) - -> joinToTargets_again - block_live new_blocks block_id instr dest dests - adjusted_assig dest_assig - - --- this is the first time we jumped to this block. -joinToTargets_first :: (FR freeRegs, Instruction instr, Outputable instr) - => BlockMap RegSet - -> [NatBasicBlock instr] - -> BlockId - -> instr - -> BlockId - -> [BlockId] - -> BlockAssignment freeRegs - -> RegMap Loc - -> [RealReg] - -> RegM freeRegs ([NatBasicBlock instr], instr) -joinToTargets_first block_live new_blocks block_id instr dest dests - block_assig src_assig - to_free - - = do dflags <- getDynFlags - let platform = targetPlatform dflags - - -- free up the regs that are not live on entry to this block. - freeregs <- getFreeRegsR - let freeregs' = foldl' (flip $ frReleaseReg platform) freeregs to_free - - -- remember the current assignment on entry to this block. - setBlockAssigR (mapInsert dest (freeregs', src_assig) block_assig) - - joinToTargets' block_live new_blocks block_id instr dests - - --- we've jumped to this block before -joinToTargets_again :: (Instruction instr, FR freeRegs, Outputable instr) - => BlockMap RegSet - -> [NatBasicBlock instr] - -> BlockId - -> instr - -> BlockId - -> [BlockId] - -> UniqFM Loc - -> UniqFM Loc - -> RegM freeRegs ([NatBasicBlock instr], instr) -joinToTargets_again - block_live new_blocks block_id instr dest dests - src_assig dest_assig - - -- the assignments already match, no problem. - | nonDetUFMToList dest_assig == nonDetUFMToList src_assig - -- This is non-deterministic but we do not - -- currently support deterministic code-generation. - -- See Note [Unique Determinism and code generation] - = joinToTargets' block_live new_blocks block_id instr dests - - -- assignments don't match, need fixup code - | otherwise - = do - - -- make a graph of what things need to be moved where. - let graph = makeRegMovementGraph src_assig dest_assig - - -- look for cycles in the graph. This can happen if regs need to be swapped. - -- Note that we depend on the fact that this function does a - -- bottom up traversal of the tree-like portions of the graph. - -- - -- eg, if we have - -- R1 -> R2 -> R3 - -- - -- ie move value in R1 to R2 and value in R2 to R3. - -- - -- We need to do the R2 -> R3 move before R1 -> R2. - -- - let sccs = stronglyConnCompFromEdgedVerticesOrdR graph - - -- debugging - {- - pprTrace - ("joinToTargets: making fixup code") - (vcat [ text " in block: " <> ppr block_id - , text " jmp instruction: " <> ppr instr - , text " src assignment: " <> ppr src_assig - , text " dest assignment: " <> ppr dest_assig - , text " movement graph: " <> ppr graph - , text " sccs of graph: " <> ppr sccs - , text ""]) - (return ()) - -} - delta <- getDeltaR - fixUpInstrs_ <- mapM (handleComponent delta instr) sccs - let fixUpInstrs = concat fixUpInstrs_ - - -- make a new basic block containing the fixup code. - -- A the end of the current block we will jump to the fixup one, - -- then that will jump to our original destination. - fixup_block_id <- mkBlockId <$> getUniqueR - let block = BasicBlock fixup_block_id - $ fixUpInstrs ++ mkJumpInstr dest - - -- if we didn't need any fixups, then don't include the block - case fixUpInstrs of - [] -> joinToTargets' block_live new_blocks block_id instr dests - - -- patch the original branch instruction so it goes to our - -- fixup block instead. - _ -> let instr' = patchJumpInstr instr - (\bid -> if bid == dest - then fixup_block_id - else bid) -- no change! - - in do - {- --debugging - pprTrace "FixUpEdge info:" - ( - text "inBlock:" <> ppr block_id $$ - text "instr:" <> ppr instr $$ - text "instr':" <> ppr instr' $$ - text "fixup_block_id':" <> - ppr fixup_block_id $$ - text "dest:" <> ppr dest - ) (return ()) - -} - recordFixupBlock block_id fixup_block_id dest - joinToTargets' block_live (block : new_blocks) - block_id instr' dests - - --- | Construct a graph of register\/spill movements. --- --- Cyclic components seem to occur only very rarely. --- --- We cut some corners by not handling memory-to-memory moves. --- This shouldn't happen because every temporary gets its own stack slot. --- -makeRegMovementGraph :: RegMap Loc -> RegMap Loc -> [Node Loc Unique] -makeRegMovementGraph adjusted_assig dest_assig - = [ node | (vreg, src) <- nonDetUFMToList adjusted_assig - -- This is non-deterministic but we do not - -- currently support deterministic code-generation. - -- See Note [Unique Determinism and code generation] - -- source reg might not be needed at the dest: - , Just loc <- [lookupUFM_Directly dest_assig vreg] - , node <- expandNode vreg src loc ] - - --- | Expand out the destination, so InBoth destinations turn into --- a combination of InReg and InMem. - --- The InBoth handling is a little tricky here. If the destination is --- InBoth, then we must ensure that the value ends up in both locations. --- An InBoth destination must conflict with an InReg or InMem source, so --- we expand an InBoth destination as necessary. --- --- An InBoth source is slightly different: we only care about the register --- that the source value is in, so that we can move it to the destinations. --- -expandNode - :: a - -> Loc -- ^ source of move - -> Loc -- ^ destination of move - -> [Node Loc a ] - -expandNode vreg loc@(InReg src) (InBoth dst mem) - | src == dst = [DigraphNode vreg loc [InMem mem]] - | otherwise = [DigraphNode vreg loc [InReg dst, InMem mem]] - -expandNode vreg loc@(InMem src) (InBoth dst mem) - | src == mem = [DigraphNode vreg loc [InReg dst]] - | otherwise = [DigraphNode vreg loc [InReg dst, InMem mem]] - -expandNode _ (InBoth _ src) (InMem dst) - | src == dst = [] -- guaranteed to be true - -expandNode _ (InBoth src _) (InReg dst) - | src == dst = [] - -expandNode vreg (InBoth src _) dst - = expandNode vreg (InReg src) dst - -expandNode vreg src dst - | src == dst = [] - | otherwise = [DigraphNode vreg src [dst]] - - --- | Generate fixup code for a particular component in the move graph --- This component tells us what values need to be moved to what --- destinations. We have eliminated any possibility of single-node --- cycles in expandNode above. --- -handleComponent - :: Instruction instr - => Int -> instr -> SCC (Node Loc Unique) - -> RegM freeRegs [instr] - --- If the graph is acyclic then we won't get the swapping problem below. --- In this case we can just do the moves directly, and avoid having to --- go via a spill slot. --- -handleComponent delta _ (AcyclicSCC (DigraphNode vreg src dsts)) - = mapM (makeMove delta vreg src) dsts - - --- Handle some cyclic moves. --- This can happen if we have two regs that need to be swapped. --- eg: --- vreg source loc dest loc --- (vreg1, InReg r1, [InReg r2]) --- (vreg2, InReg r2, [InReg r1]) --- --- To avoid needing temp register, we just spill all the source regs, then --- reaload them into their destination regs. --- --- Note that we can not have cycles that involve memory locations as --- sources as single destination because memory locations (stack slots) --- are allocated exclusively for a virtual register and therefore can not --- require a fixup. --- -handleComponent delta instr - (CyclicSCC ((DigraphNode vreg (InReg sreg) ((InReg dreg: _))) : rest)) - -- dest list may have more than one element, if the reg is also InMem. - = do - -- spill the source into its slot - (instrSpill, slot) - <- spillR (RegReal sreg) vreg - - -- reload into destination reg - instrLoad <- loadR (RegReal dreg) slot - - remainingFixUps <- mapM (handleComponent delta instr) - (stronglyConnCompFromEdgedVerticesOrdR rest) - - -- make sure to do all the reloads after all the spills, - -- so we don't end up clobbering the source values. - return ([instrSpill] ++ concat remainingFixUps ++ [instrLoad]) - -handleComponent _ _ (CyclicSCC _) - = panic "Register Allocator: handleComponent cyclic" - - --- | Move a vreg between these two locations. --- -makeMove - :: Instruction instr - => Int -- ^ current C stack delta. - -> Unique -- ^ unique of the vreg that we're moving. - -> Loc -- ^ source location. - -> Loc -- ^ destination location. - -> RegM freeRegs instr -- ^ move instruction. - -makeMove delta vreg src dst - = do dflags <- getDynFlags - let platform = targetPlatform dflags - - case (src, dst) of - (InReg s, InReg d) -> - do recordSpill (SpillJoinRR vreg) - return $ mkRegRegMoveInstr platform (RegReal s) (RegReal d) - (InMem s, InReg d) -> - do recordSpill (SpillJoinRM vreg) - return $ mkLoadInstr dflags (RegReal d) delta s - (InReg s, InMem d) -> - do recordSpill (SpillJoinRM vreg) - return $ mkSpillInstr dflags (RegReal s) delta d - _ -> - -- we don't handle memory to memory moves. - -- they shouldn't happen because we don't share - -- stack slots between vregs. - panic ("makeMove " ++ show vreg ++ " (" ++ show src ++ ") (" - ++ show dst ++ ")" - ++ " we don't handle mem->mem moves.") - diff --git a/compiler/nativeGen/RegAlloc/Linear/Main.hs b/compiler/nativeGen/RegAlloc/Linear/Main.hs deleted file mode 100644 index 076b63a4ed..0000000000 --- a/compiler/nativeGen/RegAlloc/Linear/Main.hs +++ /dev/null @@ -1,920 +0,0 @@ -{-# LANGUAGE BangPatterns, CPP, ScopedTypeVariables #-} - -{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} - ------------------------------------------------------------------------------ --- --- The register allocator --- --- (c) The University of Glasgow 2004 --- ------------------------------------------------------------------------------ - -{- -The algorithm is roughly: - - 1) Compute strongly connected components of the basic block list. - - 2) Compute liveness (mapping from pseudo register to - point(s) of death?). - - 3) Walk instructions in each basic block. We keep track of - (a) Free real registers (a bitmap?) - (b) Current assignment of temporaries to machine registers and/or - spill slots (call this the "assignment"). - (c) Partial mapping from basic block ids to a virt-to-loc mapping. - When we first encounter a branch to a basic block, - we fill in its entry in this table with the current mapping. - - For each instruction: - (a) For each temporary *read* by the instruction: - If the temporary does not have a real register allocation: - - Allocate a real register from the free list. If - the list is empty: - - Find a temporary to spill. Pick one that is - not used in this instruction (ToDo: not - used for a while...) - - generate a spill instruction - - If the temporary was previously spilled, - generate an instruction to read the temp from its spill loc. - (optimisation: if we can see that a real register is going to - be used soon, then don't use it for allocation). - - (b) For each real register clobbered by this instruction: - If a temporary resides in it, - If the temporary is live after this instruction, - Move the temporary to another (non-clobbered & free) reg, - or spill it to memory. Mark the temporary as residing - in both memory and a register if it was spilled (it might - need to be read by this instruction). - - (ToDo: this is wrong for jump instructions?) - - We do this after step (a), because if we start with - movq v1, %rsi - which is an instruction that clobbers %rsi, if v1 currently resides - in %rsi we want to get - movq %rsi, %freereg - movq %rsi, %rsi -- will disappear - instead of - movq %rsi, %freereg - movq %freereg, %rsi - - (c) Update the current assignment - - (d) If the instruction is a branch: - if the destination block already has a register assignment, - Generate a new block with fixup code and redirect the - jump to the new block. - else, - Update the block id->assignment mapping with the current - assignment. - - (e) Delete all register assignments for temps which are read - (only) and die here. Update the free register list. - - (f) Mark all registers clobbered by this instruction as not free, - and mark temporaries which have been spilled due to clobbering - as in memory (step (a) marks then as in both mem & reg). - - (g) For each temporary *written* by this instruction: - Allocate a real register as for (b), spilling something - else if necessary. - - except when updating the assignment, drop any memory - locations that the temporary was previously in, since - they will be no longer valid after this instruction. - - (h) Delete all register assignments for temps which are - written and die here (there should rarely be any). Update - the free register list. - - (i) Rewrite the instruction with the new mapping. - - (j) For each spilled reg known to be now dead, re-add its stack slot - to the free list. - --} - -module RegAlloc.Linear.Main ( - regAlloc, - module RegAlloc.Linear.Base, - module RegAlloc.Linear.Stats - ) where - -#include "HsVersions.h" - - -import GhcPrelude - -import RegAlloc.Linear.State -import RegAlloc.Linear.Base -import RegAlloc.Linear.StackMap -import RegAlloc.Linear.FreeRegs -import RegAlloc.Linear.Stats -import RegAlloc.Linear.JoinToTargets -import qualified RegAlloc.Linear.PPC.FreeRegs as PPC -import qualified RegAlloc.Linear.SPARC.FreeRegs as SPARC -import qualified RegAlloc.Linear.X86.FreeRegs as X86 -import qualified RegAlloc.Linear.X86_64.FreeRegs as X86_64 -import TargetReg -import RegAlloc.Liveness -import Instruction -import Reg - -import GHC.Cmm.BlockId -import GHC.Cmm.Dataflow.Collections -import GHC.Cmm hiding (RegSet) - -import Digraph -import GHC.Driver.Session -import Unique -import UniqSet -import UniqFM -import UniqSupply -import Outputable -import GHC.Platform - -import Data.Maybe -import Data.List -import Control.Monad - --- ----------------------------------------------------------------------------- --- Top level of the register allocator - --- Allocate registers -regAlloc - :: (Outputable instr, Instruction instr) - => DynFlags - -> LiveCmmDecl statics instr - -> UniqSM ( NatCmmDecl statics instr - , Maybe Int -- number of extra stack slots required, - -- beyond maxSpillSlots - , Maybe RegAllocStats - ) - -regAlloc _ (CmmData sec d) - = return - ( CmmData sec d - , Nothing - , Nothing ) - -regAlloc _ (CmmProc (LiveInfo info _ _ _) lbl live []) - = return ( CmmProc info lbl live (ListGraph []) - , Nothing - , Nothing ) - -regAlloc dflags (CmmProc static lbl live sccs) - | LiveInfo info entry_ids@(first_id:_) block_live _ <- static - = do - -- do register allocation on each component. - (final_blocks, stats, stack_use) - <- linearRegAlloc dflags entry_ids block_live sccs - - -- make sure the block that was first in the input list - -- stays at the front of the output - let ((first':_), rest') - = partition ((== first_id) . blockId) final_blocks - - let max_spill_slots = maxSpillSlots dflags - extra_stack - | stack_use > max_spill_slots - = Just (stack_use - max_spill_slots) - | otherwise - = Nothing - - return ( CmmProc info lbl live (ListGraph (first' : rest')) - , extra_stack - , Just stats) - --- bogus. to make non-exhaustive match warning go away. -regAlloc _ (CmmProc _ _ _ _) - = panic "RegAllocLinear.regAlloc: no match" - - --- ----------------------------------------------------------------------------- --- Linear sweep to allocate registers - - --- | Do register allocation on some basic blocks. --- But be careful to allocate a block in an SCC only if it has --- an entry in the block map or it is the first block. --- -linearRegAlloc - :: (Outputable instr, Instruction instr) - => DynFlags - -> [BlockId] -- ^ entry points - -> BlockMap RegSet - -- ^ live regs on entry to each basic block - -> [SCC (LiveBasicBlock instr)] - -- ^ instructions annotated with "deaths" - -> UniqSM ([NatBasicBlock instr], RegAllocStats, Int) - -linearRegAlloc dflags entry_ids block_live sccs - = case platformArch platform of - ArchX86 -> go $ (frInitFreeRegs platform :: X86.FreeRegs) - ArchX86_64 -> go $ (frInitFreeRegs platform :: X86_64.FreeRegs) - ArchS390X -> panic "linearRegAlloc ArchS390X" - ArchSPARC -> go $ (frInitFreeRegs platform :: SPARC.FreeRegs) - ArchSPARC64 -> panic "linearRegAlloc ArchSPARC64" - ArchPPC -> go $ (frInitFreeRegs platform :: PPC.FreeRegs) - ArchARM _ _ _ -> panic "linearRegAlloc ArchARM" - ArchARM64 -> panic "linearRegAlloc ArchARM64" - ArchPPC_64 _ -> go $ (frInitFreeRegs platform :: PPC.FreeRegs) - ArchAlpha -> panic "linearRegAlloc ArchAlpha" - ArchMipseb -> panic "linearRegAlloc ArchMipseb" - ArchMipsel -> panic "linearRegAlloc ArchMipsel" - ArchJavaScript -> panic "linearRegAlloc ArchJavaScript" - ArchUnknown -> panic "linearRegAlloc ArchUnknown" - where - go f = linearRegAlloc' dflags f entry_ids block_live sccs - platform = targetPlatform dflags - -linearRegAlloc' - :: (FR freeRegs, Outputable instr, Instruction instr) - => DynFlags - -> freeRegs - -> [BlockId] -- ^ entry points - -> BlockMap RegSet -- ^ live regs on entry to each basic block - -> [SCC (LiveBasicBlock instr)] -- ^ instructions annotated with "deaths" - -> UniqSM ([NatBasicBlock instr], RegAllocStats, Int) - -linearRegAlloc' dflags initFreeRegs entry_ids block_live sccs - = do us <- getUniqueSupplyM - let (_, stack, stats, blocks) = - runR dflags mapEmpty initFreeRegs emptyRegMap (emptyStackMap dflags) us - $ linearRA_SCCs entry_ids block_live [] sccs - return (blocks, stats, getStackUse stack) - - -linearRA_SCCs :: (FR freeRegs, Instruction instr, Outputable instr) - => [BlockId] - -> BlockMap RegSet - -> [NatBasicBlock instr] - -> [SCC (LiveBasicBlock instr)] - -> RegM freeRegs [NatBasicBlock instr] - -linearRA_SCCs _ _ blocksAcc [] - = return $ reverse blocksAcc - -linearRA_SCCs entry_ids block_live blocksAcc (AcyclicSCC block : sccs) - = do blocks' <- processBlock block_live block - linearRA_SCCs entry_ids block_live - ((reverse blocks') ++ blocksAcc) - sccs - -linearRA_SCCs entry_ids block_live blocksAcc (CyclicSCC blocks : sccs) - = do - blockss' <- process entry_ids block_live blocks [] (return []) False - linearRA_SCCs entry_ids block_live - (reverse (concat blockss') ++ blocksAcc) - sccs - -{- from John Dias's patch 2008/10/16: - The linear-scan allocator sometimes allocates a block - before allocating one of its predecessors, which could lead to - inconsistent allocations. Make it so a block is only allocated - if a predecessor has set the "incoming" assignments for the block, or - if it's the procedure's entry block. - - BL 2009/02: Careful. If the assignment for a block doesn't get set for - some reason then this function will loop. We should probably do some - more sanity checking to guard against this eventuality. --} - -process :: (FR freeRegs, Instruction instr, Outputable instr) - => [BlockId] - -> BlockMap RegSet - -> [GenBasicBlock (LiveInstr instr)] - -> [GenBasicBlock (LiveInstr instr)] - -> [[NatBasicBlock instr]] - -> Bool - -> RegM freeRegs [[NatBasicBlock instr]] - -process _ _ [] [] accum _ - = return $ reverse accum - -process entry_ids block_live [] next_round accum madeProgress - | not madeProgress - - {- BUGS: There are so many unreachable blocks in the code the warnings are overwhelming. - pprTrace "RegAlloc.Linear.Main.process: no progress made, bailing out." - ( text "Unreachable blocks:" - $$ vcat (map ppr next_round)) -} - = return $ reverse accum - - | otherwise - = process entry_ids block_live - next_round [] accum False - -process entry_ids block_live (b@(BasicBlock id _) : blocks) - next_round accum madeProgress - = do - block_assig <- getBlockAssigR - - if isJust (mapLookup id block_assig) - || id `elem` entry_ids - then do - b' <- processBlock block_live b - process entry_ids block_live blocks - next_round (b' : accum) True - - else process entry_ids block_live blocks - (b : next_round) accum madeProgress - - --- | Do register allocation on this basic block --- -processBlock - :: (FR freeRegs, Outputable instr, Instruction instr) - => BlockMap RegSet -- ^ live regs on entry to each basic block - -> LiveBasicBlock instr -- ^ block to do register allocation on - -> RegM freeRegs [NatBasicBlock instr] -- ^ block with registers allocated - -processBlock block_live (BasicBlock id instrs) - = do initBlock id block_live - (instrs', fixups) - <- linearRA block_live [] [] id instrs - return $ BasicBlock id instrs' : fixups - - --- | Load the freeregs and current reg assignment into the RegM state --- for the basic block with this BlockId. -initBlock :: FR freeRegs - => BlockId -> BlockMap RegSet -> RegM freeRegs () -initBlock id block_live - = do dflags <- getDynFlags - let platform = targetPlatform dflags - block_assig <- getBlockAssigR - case mapLookup id block_assig of - -- no prior info about this block: we must consider - -- any fixed regs to be allocated, but we can ignore - -- virtual regs (presumably this is part of a loop, - -- and we'll iterate again). The assignment begins - -- empty. - Nothing - -> do -- pprTrace "initFreeRegs" (text $ show initFreeRegs) (return ()) - case mapLookup id block_live of - Nothing -> - setFreeRegsR (frInitFreeRegs platform) - Just live -> - setFreeRegsR $ foldl' (flip $ frAllocateReg platform) (frInitFreeRegs platform) - [ r | RegReal r <- nonDetEltsUniqSet live ] - -- See Note [Unique Determinism and code generation] - setAssigR emptyRegMap - - -- load info about register assignments leading into this block. - Just (freeregs, assig) - -> do setFreeRegsR freeregs - setAssigR assig - - --- | Do allocation for a sequence of instructions. -linearRA - :: (FR freeRegs, Outputable instr, Instruction instr) - => BlockMap RegSet -- ^ map of what vregs are live on entry to each block. - -> [instr] -- ^ accumulator for instructions already processed. - -> [NatBasicBlock instr] -- ^ accumulator for blocks of fixup code. - -> BlockId -- ^ id of the current block, for debugging. - -> [LiveInstr instr] -- ^ liveness annotated instructions in this block. - - -> RegM freeRegs - ( [instr] -- instructions after register allocation - , [NatBasicBlock instr]) -- fresh blocks of fixup code. - - -linearRA _ accInstr accFixup _ [] - = return - ( reverse accInstr -- instrs need to be returned in the correct order. - , accFixup) -- it doesn't matter what order the fixup blocks are returned in. - - -linearRA block_live accInstr accFixups id (instr:instrs) - = do - (accInstr', new_fixups) <- raInsn block_live accInstr id instr - - linearRA block_live accInstr' (new_fixups ++ accFixups) id instrs - - --- | Do allocation for a single instruction. -raInsn - :: (FR freeRegs, Outputable instr, Instruction instr) - => BlockMap RegSet -- ^ map of what vregs are love on entry to each block. - -> [instr] -- ^ accumulator for instructions already processed. - -> BlockId -- ^ the id of the current block, for debugging - -> LiveInstr instr -- ^ the instr to have its regs allocated, with liveness info. - -> RegM freeRegs - ( [instr] -- new instructions - , [NatBasicBlock instr]) -- extra fixup blocks - -raInsn _ new_instrs _ (LiveInstr ii Nothing) - | Just n <- takeDeltaInstr ii - = do setDeltaR n - return (new_instrs, []) - -raInsn _ new_instrs _ (LiveInstr ii@(Instr i) Nothing) - | isMetaInstr ii - = return (i : new_instrs, []) - - -raInsn block_live new_instrs id (LiveInstr (Instr instr) (Just live)) - = do - assig <- getAssigR - - -- If we have a reg->reg move between virtual registers, where the - -- src register is not live after this instruction, and the dst - -- register does not already have an assignment, - -- and the source register is assigned to a register, not to a spill slot, - -- then we can eliminate the instruction. - -- (we can't eliminate it if the source register is on the stack, because - -- we do not want to use one spill slot for different virtual registers) - case takeRegRegMoveInstr instr of - Just (src,dst) | src `elementOfUniqSet` (liveDieRead live), - isVirtualReg dst, - not (dst `elemUFM` assig), - isRealReg src || isInReg src assig -> do - case src of - (RegReal rr) -> setAssigR (addToUFM assig dst (InReg rr)) - -- if src is a fixed reg, then we just map dest to this - -- reg in the assignment. src must be an allocatable reg, - -- otherwise it wouldn't be in r_dying. - _virt -> case lookupUFM assig src of - Nothing -> panic "raInsn" - Just loc -> - setAssigR (addToUFM (delFromUFM assig src) dst loc) - - -- we have eliminated this instruction - {- - freeregs <- getFreeRegsR - assig <- getAssigR - pprTrace "raInsn" (text "ELIMINATED: " <> docToSDoc (pprInstr instr) - $$ ppr r_dying <+> ppr w_dying $$ text (show freeregs) $$ ppr assig) $ do - -} - return (new_instrs, []) - - _ -> genRaInsn block_live new_instrs id instr - (nonDetEltsUniqSet $ liveDieRead live) - (nonDetEltsUniqSet $ liveDieWrite live) - -- See Note [Unique Determinism and code generation] - -raInsn _ _ _ instr - = pprPanic "raInsn" (text "no match for:" <> ppr instr) - --- ToDo: what can we do about --- --- R1 = x --- jump I64[x] // [R1] --- --- where x is mapped to the same reg as R1. We want to coalesce x and --- R1, but the register allocator doesn't know whether x will be --- assigned to again later, in which case x and R1 should be in --- different registers. Right now we assume the worst, and the --- assignment to R1 will clobber x, so we'll spill x into another reg, --- generating another reg->reg move. - - -isInReg :: Reg -> RegMap Loc -> Bool -isInReg src assig | Just (InReg _) <- lookupUFM assig src = True - | otherwise = False - - -genRaInsn :: (FR freeRegs, Instruction instr, Outputable instr) - => BlockMap RegSet - -> [instr] - -> BlockId - -> instr - -> [Reg] - -> [Reg] - -> RegM freeRegs ([instr], [NatBasicBlock instr]) - -genRaInsn block_live new_instrs block_id instr r_dying w_dying = do - dflags <- getDynFlags - let platform = targetPlatform dflags - case regUsageOfInstr platform instr of { RU read written -> - do - let real_written = [ rr | (RegReal rr) <- written ] - let virt_written = [ vr | (RegVirtual vr) <- written ] - - -- we don't need to do anything with real registers that are - -- only read by this instr. (the list is typically ~2 elements, - -- so using nub isn't a problem). - let virt_read = nub [ vr | (RegVirtual vr) <- read ] - - -- debugging -{- freeregs <- getFreeRegsR - assig <- getAssigR - pprDebugAndThen (defaultDynFlags Settings{ sTargetPlatform=platform } undefined) trace "genRaInsn" - (ppr instr - $$ text "r_dying = " <+> ppr r_dying - $$ text "w_dying = " <+> ppr w_dying - $$ text "virt_read = " <+> ppr virt_read - $$ text "virt_written = " <+> ppr virt_written - $$ text "freeregs = " <+> text (show freeregs) - $$ text "assig = " <+> ppr assig) - $ do --} - - -- (a), (b) allocate real regs for all regs read by this instruction. - (r_spills, r_allocd) <- - allocateRegsAndSpill True{-reading-} virt_read [] [] virt_read - - -- (c) save any temporaries which will be clobbered by this instruction - clobber_saves <- saveClobberedTemps real_written r_dying - - -- (d) Update block map for new destinations - -- NB. do this before removing dead regs from the assignment, because - -- these dead regs might in fact be live in the jump targets (they're - -- only dead in the code that follows in the current basic block). - (fixup_blocks, adjusted_instr) - <- joinToTargets block_live block_id instr - - -- Debugging - show places where the reg alloc inserted - -- assignment fixup blocks. - -- when (not $ null fixup_blocks) $ - -- pprTrace "fixup_blocks" (ppr fixup_blocks) (return ()) - - -- (e) Delete all register assignments for temps which are read - -- (only) and die here. Update the free register list. - releaseRegs r_dying - - -- (f) Mark regs which are clobbered as unallocatable - clobberRegs real_written - - -- (g) Allocate registers for temporaries *written* (only) - (w_spills, w_allocd) <- - allocateRegsAndSpill False{-writing-} virt_written [] [] virt_written - - -- (h) Release registers for temps which are written here and not - -- used again. - releaseRegs w_dying - - let - -- (i) Patch the instruction - patch_map - = listToUFM - [ (t, RegReal r) - | (t, r) <- zip virt_read r_allocd - ++ zip virt_written w_allocd ] - - patched_instr - = patchRegsOfInstr adjusted_instr patchLookup - - patchLookup x - = case lookupUFM patch_map x of - Nothing -> x - Just y -> y - - - -- (j) free up stack slots for dead spilled regs - -- TODO (can't be bothered right now) - - -- erase reg->reg moves where the source and destination are the same. - -- If the src temp didn't die in this instr but happened to be allocated - -- to the same real reg as the destination, then we can erase the move anyway. - let squashed_instr = case takeRegRegMoveInstr patched_instr of - Just (src, dst) - | src == dst -> [] - _ -> [patched_instr] - - let code = squashed_instr ++ w_spills ++ reverse r_spills - ++ clobber_saves ++ new_instrs - --- pprTrace "patched-code" ((vcat $ map (docToSDoc . pprInstr) code)) $ do --- pprTrace "pached-fixup" ((ppr fixup_blocks)) $ do - - return (code, fixup_blocks) - - } - --- ----------------------------------------------------------------------------- --- releaseRegs - -releaseRegs :: FR freeRegs => [Reg] -> RegM freeRegs () -releaseRegs regs = do - dflags <- getDynFlags - let platform = targetPlatform dflags - assig <- getAssigR - free <- getFreeRegsR - let loop assig !free [] = do setAssigR assig; setFreeRegsR free; return () - loop assig !free (RegReal rr : rs) = loop assig (frReleaseReg platform rr free) rs - loop assig !free (r:rs) = - case lookupUFM assig r of - Just (InBoth real _) -> loop (delFromUFM assig r) - (frReleaseReg platform real free) rs - Just (InReg real) -> loop (delFromUFM assig r) - (frReleaseReg platform real free) rs - _ -> loop (delFromUFM assig r) free rs - loop assig free regs - - --- ----------------------------------------------------------------------------- --- Clobber real registers - --- For each temp in a register that is going to be clobbered: --- - if the temp dies after this instruction, do nothing --- - otherwise, put it somewhere safe (another reg if possible, --- otherwise spill and record InBoth in the assignment). --- - for allocateRegs on the temps *read*, --- - clobbered regs are allocatable. --- --- for allocateRegs on the temps *written*, --- - clobbered regs are not allocatable. --- - -saveClobberedTemps - :: (Instruction instr, FR freeRegs) - => [RealReg] -- real registers clobbered by this instruction - -> [Reg] -- registers which are no longer live after this insn - -> RegM freeRegs [instr] -- return: instructions to spill any temps that will - -- be clobbered. - -saveClobberedTemps [] _ - = return [] - -saveClobberedTemps clobbered dying - = do - assig <- getAssigR - let to_spill - = [ (temp,reg) - | (temp, InReg reg) <- nonDetUFMToList assig - -- This is non-deterministic but we do not - -- currently support deterministic code-generation. - -- See Note [Unique Determinism and code generation] - , any (realRegsAlias reg) clobbered - , temp `notElem` map getUnique dying ] - - (instrs,assig') <- clobber assig [] to_spill - setAssigR assig' - return instrs - - where - clobber assig instrs [] - = return (instrs, assig) - - clobber assig instrs ((temp, reg) : rest) - = do dflags <- getDynFlags - let platform = targetPlatform dflags - - freeRegs <- getFreeRegsR - let regclass = targetClassOfRealReg platform reg - freeRegs_thisClass = frGetFreeRegs platform regclass freeRegs - - case filter (`notElem` clobbered) freeRegs_thisClass of - - -- (1) we have a free reg of the right class that isn't - -- clobbered by this instruction; use it to save the - -- clobbered value. - (my_reg : _) -> do - setFreeRegsR (frAllocateReg platform my_reg freeRegs) - - let new_assign = addToUFM assig temp (InReg my_reg) - let instr = mkRegRegMoveInstr platform - (RegReal reg) (RegReal my_reg) - - clobber new_assign (instr : instrs) rest - - -- (2) no free registers: spill the value - [] -> do - (spill, slot) <- spillR (RegReal reg) temp - - -- record why this reg was spilled for profiling - recordSpill (SpillClobber temp) - - let new_assign = addToUFM assig temp (InBoth reg slot) - - clobber new_assign (spill : instrs) rest - - - --- | Mark all these real regs as allocated, --- and kick out their vreg assignments. --- -clobberRegs :: FR freeRegs => [RealReg] -> RegM freeRegs () -clobberRegs [] - = return () - -clobberRegs clobbered - = do dflags <- getDynFlags - let platform = targetPlatform dflags - - freeregs <- getFreeRegsR - setFreeRegsR $! foldl' (flip $ frAllocateReg platform) freeregs clobbered - - assig <- getAssigR - setAssigR $! clobber assig (nonDetUFMToList assig) - -- This is non-deterministic but we do not - -- currently support deterministic code-generation. - -- See Note [Unique Determinism and code generation] - - where - -- if the temp was InReg and clobbered, then we will have - -- saved it in saveClobberedTemps above. So the only case - -- we have to worry about here is InBoth. Note that this - -- also catches temps which were loaded up during allocation - -- of read registers, not just those saved in saveClobberedTemps. - - clobber assig [] - = assig - - clobber assig ((temp, InBoth reg slot) : rest) - | any (realRegsAlias reg) clobbered - = clobber (addToUFM assig temp (InMem slot)) rest - - clobber assig (_:rest) - = clobber assig rest - --- ----------------------------------------------------------------------------- --- allocateRegsAndSpill - --- Why are we performing a spill? -data SpillLoc = ReadMem StackSlot -- reading from register only in memory - | WriteNew -- writing to a new variable - | WriteMem -- writing to register only in memory --- Note that ReadNew is not valid, since you don't want to be reading --- from an uninitialized register. We also don't need the location of --- the register in memory, since that will be invalidated by the write. --- Technically, we could coalesce WriteNew and WriteMem into a single --- entry as well. -- EZY - --- This function does several things: --- For each temporary referred to by this instruction, --- we allocate a real register (spilling another temporary if necessary). --- We load the temporary up from memory if necessary. --- We also update the register assignment in the process, and --- the list of free registers and free stack slots. - -allocateRegsAndSpill - :: (FR freeRegs, Outputable instr, Instruction instr) - => Bool -- True <=> reading (load up spilled regs) - -> [VirtualReg] -- don't push these out - -> [instr] -- spill insns - -> [RealReg] -- real registers allocated (accum.) - -> [VirtualReg] -- temps to allocate - -> RegM freeRegs ( [instr] , [RealReg]) - -allocateRegsAndSpill _ _ spills alloc [] - = return (spills, reverse alloc) - -allocateRegsAndSpill reading keep spills alloc (r:rs) - = do assig <- getAssigR - let doSpill = allocRegsAndSpill_spill reading keep spills alloc r rs assig - case lookupUFM assig r of - -- case (1a): already in a register - Just (InReg my_reg) -> - allocateRegsAndSpill reading keep spills (my_reg:alloc) rs - - -- case (1b): already in a register (and memory) - -- NB1. if we're writing this register, update its assignment to be - -- InReg, because the memory value is no longer valid. - -- NB2. This is why we must process written registers here, even if they - -- are also read by the same instruction. - Just (InBoth my_reg _) - -> do when (not reading) (setAssigR (addToUFM assig r (InReg my_reg))) - allocateRegsAndSpill reading keep spills (my_reg:alloc) rs - - -- Not already in a register, so we need to find a free one... - Just (InMem slot) | reading -> doSpill (ReadMem slot) - | otherwise -> doSpill WriteMem - Nothing | reading -> - pprPanic "allocateRegsAndSpill: Cannot read from uninitialized register" (ppr r) - -- NOTE: if the input to the NCG contains some - -- unreachable blocks with junk code, this panic - -- might be triggered. Make sure you only feed - -- sensible code into the NCG. In GHC.Cmm.Pipeline we - -- call removeUnreachableBlocks at the end for this - -- reason. - - | otherwise -> doSpill WriteNew - - --- reading is redundant with reason, but we keep it around because it's --- convenient and it maintains the recursive structure of the allocator. -- EZY -allocRegsAndSpill_spill :: (FR freeRegs, Instruction instr, Outputable instr) - => Bool - -> [VirtualReg] - -> [instr] - -> [RealReg] - -> VirtualReg - -> [VirtualReg] - -> UniqFM Loc - -> SpillLoc - -> RegM freeRegs ([instr], [RealReg]) -allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc - = do dflags <- getDynFlags - let platform = targetPlatform dflags - freeRegs <- getFreeRegsR - let freeRegs_thisClass = frGetFreeRegs platform (classOfVirtualReg r) freeRegs - - case freeRegs_thisClass of - - -- case (2): we have a free register - (my_reg : _) -> - do spills' <- loadTemp r spill_loc my_reg spills - - setAssigR (addToUFM assig r $! newLocation spill_loc my_reg) - setFreeRegsR $ frAllocateReg platform my_reg freeRegs - - allocateRegsAndSpill reading keep spills' (my_reg : alloc) rs - - - -- case (3): we need to push something out to free up a register - [] -> - do let inRegOrBoth (InReg _) = True - inRegOrBoth (InBoth _ _) = True - inRegOrBoth _ = False - let candidates' = - flip delListFromUFM keep $ - filterUFM inRegOrBoth $ - assig - -- This is non-deterministic but we do not - -- currently support deterministic code-generation. - -- See Note [Unique Determinism and code generation] - let candidates = nonDetUFMToList candidates' - - -- the vregs we could kick out that are already in a slot - let candidates_inBoth - = [ (temp, reg, mem) - | (temp, InBoth reg mem) <- candidates - , targetClassOfRealReg platform reg == classOfVirtualReg r ] - - -- the vregs we could kick out that are only in a reg - -- this would require writing the reg to a new slot before using it. - let candidates_inReg - = [ (temp, reg) - | (temp, InReg reg) <- candidates - , targetClassOfRealReg platform reg == classOfVirtualReg r ] - - let result - - -- we have a temporary that is in both register and mem, - -- just free up its register for use. - | (temp, my_reg, slot) : _ <- candidates_inBoth - = do spills' <- loadTemp r spill_loc my_reg spills - let assig1 = addToUFM assig temp (InMem slot) - let assig2 = addToUFM assig1 r $! newLocation spill_loc my_reg - - setAssigR assig2 - allocateRegsAndSpill reading keep spills' (my_reg:alloc) rs - - -- otherwise, we need to spill a temporary that currently - -- resides in a register. - | (temp_to_push_out, (my_reg :: RealReg)) : _ - <- candidates_inReg - = do - (spill_insn, slot) <- spillR (RegReal my_reg) temp_to_push_out - let spill_store = (if reading then id else reverse) - [ -- COMMENT (fsLit "spill alloc") - spill_insn ] - - -- record that this temp was spilled - recordSpill (SpillAlloc temp_to_push_out) - - -- update the register assignment - let assig1 = addToUFM assig temp_to_push_out (InMem slot) - let assig2 = addToUFM assig1 r $! newLocation spill_loc my_reg - setAssigR assig2 - - -- if need be, load up a spilled temp into the reg we've just freed up. - spills' <- loadTemp r spill_loc my_reg spills - - allocateRegsAndSpill reading keep - (spill_store ++ spills') - (my_reg:alloc) rs - - - -- there wasn't anything to spill, so we're screwed. - | otherwise - = pprPanic ("RegAllocLinear.allocRegsAndSpill: no spill candidates\n") - $ vcat - [ text "allocating vreg: " <> text (show r) - , text "assignment: " <> ppr assig - , text "freeRegs: " <> text (show freeRegs) - , text "initFreeRegs: " <> text (show (frInitFreeRegs platform `asTypeOf` freeRegs)) ] - - result - - --- | Calculate a new location after a register has been loaded. -newLocation :: SpillLoc -> RealReg -> Loc --- if the tmp was read from a slot, then now its in a reg as well -newLocation (ReadMem slot) my_reg = InBoth my_reg slot --- writes will always result in only the register being available -newLocation _ my_reg = InReg my_reg - --- | Load up a spilled temporary if we need to (read from memory). -loadTemp - :: (Instruction instr) - => VirtualReg -- the temp being loaded - -> SpillLoc -- the current location of this temp - -> RealReg -- the hreg to load the temp into - -> [instr] - -> RegM freeRegs [instr] - -loadTemp vreg (ReadMem slot) hreg spills - = do - insn <- loadR (RegReal hreg) slot - recordSpill (SpillLoad $ getUnique vreg) - return $ {- COMMENT (fsLit "spill load") : -} insn : spills - -loadTemp _ _ _ spills = - return spills - diff --git a/compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs deleted file mode 100644 index 1239380ba2..0000000000 --- a/compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs +++ /dev/null @@ -1,61 +0,0 @@ --- | Free regs map for PowerPC -module RegAlloc.Linear.PPC.FreeRegs -where - -import GhcPrelude - -import PPC.Regs -import RegClass -import Reg - -import Outputable -import GHC.Platform - -import Data.Word -import Data.Bits - --- The PowerPC has 32 integer and 32 floating point registers. --- This is 32bit PowerPC, so Word64 is inefficient - two Word32s are much --- better. --- Note that when getFreeRegs scans for free registers, it starts at register --- 31 and counts down. This is a hack for the PowerPC - the higher-numbered --- registers are callee-saves, while the lower regs are caller-saves, so it --- makes sense to start at the high end. --- Apart from that, the code does nothing PowerPC-specific, so feel free to --- add your favourite platform to the #if (if you have 64 registers but only --- 32-bit words). - -data FreeRegs = FreeRegs !Word32 !Word32 - deriving( Show ) -- The Show is used in an ASSERT - -noFreeRegs :: FreeRegs -noFreeRegs = FreeRegs 0 0 - -releaseReg :: RealReg -> FreeRegs -> FreeRegs -releaseReg (RealRegSingle r) (FreeRegs g f) - | r > 31 = FreeRegs g (f .|. (1 `shiftL` (r - 32))) - | otherwise = FreeRegs (g .|. (1 `shiftL` r)) f - -releaseReg _ _ - = panic "RegAlloc.Linear.PPC.releaseReg: bad reg" - -initFreeRegs :: Platform -> FreeRegs -initFreeRegs platform = foldl' (flip releaseReg) noFreeRegs (allocatableRegs platform) - -getFreeRegs :: RegClass -> FreeRegs -> [RealReg] -- lazily -getFreeRegs cls (FreeRegs g f) - | RcDouble <- cls = go f (0x80000000) 63 - | RcInteger <- cls = go g (0x80000000) 31 - | otherwise = pprPanic "RegAllocLinear.getFreeRegs: Bad register class" (ppr cls) - where - go _ 0 _ = [] - go x m i | x .&. m /= 0 = RealRegSingle i : (go x (m `shiftR` 1) $! i-1) - | otherwise = go x (m `shiftR` 1) $! i-1 - -allocateReg :: RealReg -> FreeRegs -> FreeRegs -allocateReg (RealRegSingle r) (FreeRegs g f) - | r > 31 = FreeRegs g (f .&. complement (1 `shiftL` (r - 32))) - | otherwise = FreeRegs (g .&. complement (1 `shiftL` r)) f - -allocateReg _ _ - = panic "RegAlloc.Linear.PPC.allocateReg: bad reg" diff --git a/compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs deleted file mode 100644 index fc67159f0f..0000000000 --- a/compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs +++ /dev/null @@ -1,189 +0,0 @@ -{-# LANGUAGE CPP #-} - --- | Free regs map for SPARC -module RegAlloc.Linear.SPARC.FreeRegs -where - -import GhcPrelude - -import SPARC.Regs -import RegClass -import Reg - -import GHC.Platform.Regs -import Outputable -import GHC.Platform - -import Data.Word -import Data.Bits - - --------------------------------------------------------------------------------- --- SPARC is like PPC, except for twinning of floating point regs. --- When we allocate a double reg we must take an even numbered --- float reg, as well as the one after it. - - --- Holds bitmaps showing what registers are currently allocated. --- The float and double reg bitmaps overlap, but we only alloc --- float regs into the float map, and double regs into the double map. --- --- Free regs have a bit set in the corresponding bitmap. --- -data FreeRegs - = FreeRegs - !Word32 -- int reg bitmap regs 0..31 - !Word32 -- float reg bitmap regs 32..63 - !Word32 -- double reg bitmap regs 32..63 - -instance Show FreeRegs where - show = showFreeRegs - --- | A reg map where no regs are free to be allocated. -noFreeRegs :: FreeRegs -noFreeRegs = FreeRegs 0 0 0 - - --- | The initial set of free regs. -initFreeRegs :: Platform -> FreeRegs -initFreeRegs platform - = foldl' (flip $ releaseReg platform) noFreeRegs allocatableRegs - - --- | Get all the free registers of this class. -getFreeRegs :: RegClass -> FreeRegs -> [RealReg] -- lazily -getFreeRegs cls (FreeRegs g f d) - | RcInteger <- cls = map RealRegSingle $ go 1 g 1 0 - | RcFloat <- cls = map RealRegSingle $ go 1 f 1 32 - | RcDouble <- cls = map (\i -> RealRegPair i (i+1)) $ go 2 d 1 32 -#if __GLASGOW_HASKELL__ <= 810 - | otherwise = pprPanic "RegAllocLinear.getFreeRegs: Bad register class " (ppr cls) -#endif - where - go _ _ 0 _ - = [] - - go step bitmap mask ix - | bitmap .&. mask /= 0 - = ix : (go step bitmap (mask `shiftL` step) $! ix + step) - - | otherwise - = go step bitmap (mask `shiftL` step) $! ix + step - - --- | Grab a register. -allocateReg :: Platform -> RealReg -> FreeRegs -> FreeRegs -allocateReg platform - reg@(RealRegSingle r) - (FreeRegs g f d) - - -- can't allocate free regs - | not $ freeReg platform r - = pprPanic "SPARC.FreeRegs.allocateReg: not allocating pinned reg" (ppr reg) - - -- a general purpose reg - | r <= 31 - = let mask = complement (bitMask r) - in FreeRegs - (g .&. mask) - f - d - - -- a float reg - | r >= 32, r <= 63 - = let mask = complement (bitMask (r - 32)) - - -- the mask of the double this FP reg aliases - maskLow = if r `mod` 2 == 0 - then complement (bitMask (r - 32)) - else complement (bitMask (r - 32 - 1)) - in FreeRegs - g - (f .&. mask) - (d .&. maskLow) - - | otherwise - = pprPanic "SPARC.FreeRegs.releaseReg: not allocating bad reg" (ppr reg) - -allocateReg _ - reg@(RealRegPair r1 r2) - (FreeRegs g f d) - - | r1 >= 32, r1 <= 63, r1 `mod` 2 == 0 - , r2 >= 32, r2 <= 63 - = let mask1 = complement (bitMask (r1 - 32)) - mask2 = complement (bitMask (r2 - 32)) - in - FreeRegs - g - ((f .&. mask1) .&. mask2) - (d .&. mask1) - - | otherwise - = pprPanic "SPARC.FreeRegs.releaseReg: not allocating bad reg" (ppr reg) - - - --- | Release a register from allocation. --- The register liveness information says that most regs die after a C call, --- but we still don't want to allocate to some of them. --- -releaseReg :: Platform -> RealReg -> FreeRegs -> FreeRegs -releaseReg platform - reg@(RealRegSingle r) - regs@(FreeRegs g f d) - - -- don't release pinned reg - | not $ freeReg platform r - = regs - - -- a general purpose reg - | r <= 31 - = let mask = bitMask r - in FreeRegs (g .|. mask) f d - - -- a float reg - | r >= 32, r <= 63 - = let mask = bitMask (r - 32) - - -- the mask of the double this FP reg aliases - maskLow = if r `mod` 2 == 0 - then bitMask (r - 32) - else bitMask (r - 32 - 1) - in FreeRegs - g - (f .|. mask) - (d .|. maskLow) - - | otherwise - = pprPanic "SPARC.FreeRegs.releaseReg: not releasing bad reg" (ppr reg) - -releaseReg _ - reg@(RealRegPair r1 r2) - (FreeRegs g f d) - - | r1 >= 32, r1 <= 63, r1 `mod` 2 == 0 - , r2 >= 32, r2 <= 63 - = let mask1 = bitMask (r1 - 32) - mask2 = bitMask (r2 - 32) - in - FreeRegs - g - ((f .|. mask1) .|. mask2) - (d .|. mask1) - - | otherwise - = pprPanic "SPARC.FreeRegs.releaseReg: not releasing bad reg" (ppr reg) - - - -bitMask :: Int -> Word32 -bitMask n = 1 `shiftL` n - - -showFreeRegs :: FreeRegs -> String -showFreeRegs regs - = "FreeRegs\n" - ++ " integer: " ++ (show $ getFreeRegs RcInteger regs) ++ "\n" - ++ " float: " ++ (show $ getFreeRegs RcFloat regs) ++ "\n" - ++ " double: " ++ (show $ getFreeRegs RcDouble regs) ++ "\n" diff --git a/compiler/nativeGen/RegAlloc/Linear/StackMap.hs b/compiler/nativeGen/RegAlloc/Linear/StackMap.hs deleted file mode 100644 index 79496c6e43..0000000000 --- a/compiler/nativeGen/RegAlloc/Linear/StackMap.hs +++ /dev/null @@ -1,61 +0,0 @@ - --- | The assignment of virtual registers to stack slots - --- We have lots of stack slots. Memory-to-memory moves are a pain on most --- architectures. Therefore, we avoid having to generate memory-to-memory moves --- by simply giving every virtual register its own stack slot. - --- The StackMap stack map keeps track of virtual register - stack slot --- associations and of which stack slots are still free. Once it has been --- associated, a stack slot is never "freed" or removed from the StackMap again, --- it remains associated until we are done with the current CmmProc. --- -module RegAlloc.Linear.StackMap ( - StackSlot, - StackMap(..), - emptyStackMap, - getStackSlotFor, - getStackUse -) - -where - -import GhcPrelude - -import GHC.Driver.Session -import UniqFM -import Unique - - --- | Identifier for a stack slot. -type StackSlot = Int - -data StackMap - = StackMap - { -- | The slots that are still available to be allocated. - stackMapNextFreeSlot :: !Int - - -- | Assignment of vregs to stack slots. - , stackMapAssignment :: UniqFM StackSlot } - - --- | An empty stack map, with all slots available. -emptyStackMap :: DynFlags -> StackMap -emptyStackMap _ = StackMap 0 emptyUFM - - --- | If this vreg unique already has a stack assignment then return the slot number, --- otherwise allocate a new slot, and update the map. --- -getStackSlotFor :: StackMap -> Unique -> (StackMap, Int) - -getStackSlotFor fs@(StackMap _ reserved) reg - | Just slot <- lookupUFM reserved reg = (fs, slot) - -getStackSlotFor (StackMap freeSlot reserved) reg = - (StackMap (freeSlot+1) (addToUFM reserved reg freeSlot), freeSlot) - --- | Return the number of stack slots that were allocated -getStackUse :: StackMap -> Int -getStackUse (StackMap freeSlot _) = freeSlot - diff --git a/compiler/nativeGen/RegAlloc/Linear/State.hs b/compiler/nativeGen/RegAlloc/Linear/State.hs deleted file mode 100644 index 9e5efa5f7f..0000000000 --- a/compiler/nativeGen/RegAlloc/Linear/State.hs +++ /dev/null @@ -1,184 +0,0 @@ -{-# LANGUAGE CPP, PatternSynonyms, DeriveFunctor #-} - -#if !defined(GHC_LOADED_INTO_GHCI) -{-# LANGUAGE UnboxedTuples #-} -#endif - --- | State monad for the linear register allocator. - --- Here we keep all the state that the register allocator keeps track --- of as it walks the instructions in a basic block. - -module RegAlloc.Linear.State ( - RA_State(..), - RegM, - runR, - - spillR, - loadR, - - getFreeRegsR, - setFreeRegsR, - - getAssigR, - setAssigR, - - getBlockAssigR, - setBlockAssigR, - - setDeltaR, - getDeltaR, - - getUniqueR, - - recordSpill, - recordFixupBlock -) -where - -import GhcPrelude - -import RegAlloc.Linear.Stats -import RegAlloc.Linear.StackMap -import RegAlloc.Linear.Base -import RegAlloc.Liveness -import Instruction -import Reg -import GHC.Cmm.BlockId - -import GHC.Driver.Session -import Unique -import UniqSupply - -import Control.Monad (ap) - --- Avoids using unboxed tuples when loading into GHCi -#if !defined(GHC_LOADED_INTO_GHCI) - -type RA_Result freeRegs a = (# RA_State freeRegs, a #) - -pattern RA_Result :: a -> b -> (# a, b #) -pattern RA_Result a b = (# a, b #) -{-# COMPLETE RA_Result #-} -#else - -data RA_Result freeRegs a = RA_Result {-# UNPACK #-} !(RA_State freeRegs) !a - deriving (Functor) - -#endif - --- | The register allocator monad type. -newtype RegM freeRegs a - = RegM { unReg :: RA_State freeRegs -> RA_Result freeRegs a } - deriving (Functor) - -instance Applicative (RegM freeRegs) where - pure a = RegM $ \s -> RA_Result s a - (<*>) = ap - -instance Monad (RegM freeRegs) where - m >>= k = RegM $ \s -> case unReg m s of { RA_Result s a -> unReg (k a) s } - -instance HasDynFlags (RegM a) where - getDynFlags = RegM $ \s -> RA_Result s (ra_DynFlags s) - - --- | Run a computation in the RegM register allocator monad. -runR :: DynFlags - -> BlockAssignment freeRegs - -> freeRegs - -> RegMap Loc - -> StackMap - -> UniqSupply - -> RegM freeRegs a - -> (BlockAssignment freeRegs, StackMap, RegAllocStats, a) - -runR dflags block_assig freeregs assig stack us thing = - case unReg thing - (RA_State - { ra_blockassig = block_assig - , ra_freeregs = freeregs - , ra_assig = assig - , ra_delta = 0{-???-} - , ra_stack = stack - , ra_us = us - , ra_spills = [] - , ra_DynFlags = dflags - , ra_fixups = [] }) - of - RA_Result state returned_thing - -> (ra_blockassig state, ra_stack state, makeRAStats state, returned_thing) - - --- | Make register allocator stats from its final state. -makeRAStats :: RA_State freeRegs -> RegAllocStats -makeRAStats state - = RegAllocStats - { ra_spillInstrs = binSpillReasons (ra_spills state) - , ra_fixupList = ra_fixups state } - - -spillR :: Instruction instr - => Reg -> Unique -> RegM freeRegs (instr, Int) - -spillR reg temp = RegM $ \ s@RA_State{ra_delta=delta, ra_stack=stack0} -> - let dflags = ra_DynFlags s - (stack1,slot) = getStackSlotFor stack0 temp - instr = mkSpillInstr dflags reg delta slot - in - RA_Result s{ra_stack=stack1} (instr,slot) - - -loadR :: Instruction instr - => Reg -> Int -> RegM freeRegs instr - -loadR reg slot = RegM $ \ s@RA_State{ra_delta=delta} -> - let dflags = ra_DynFlags s - in RA_Result s (mkLoadInstr dflags reg delta slot) - -getFreeRegsR :: RegM freeRegs freeRegs -getFreeRegsR = RegM $ \ s@RA_State{ra_freeregs = freeregs} -> - RA_Result s freeregs - -setFreeRegsR :: freeRegs -> RegM freeRegs () -setFreeRegsR regs = RegM $ \ s -> - RA_Result s{ra_freeregs = regs} () - -getAssigR :: RegM freeRegs (RegMap Loc) -getAssigR = RegM $ \ s@RA_State{ra_assig = assig} -> - RA_Result s assig - -setAssigR :: RegMap Loc -> RegM freeRegs () -setAssigR assig = RegM $ \ s -> - RA_Result s{ra_assig=assig} () - -getBlockAssigR :: RegM freeRegs (BlockAssignment freeRegs) -getBlockAssigR = RegM $ \ s@RA_State{ra_blockassig = assig} -> - RA_Result s assig - -setBlockAssigR :: BlockAssignment freeRegs -> RegM freeRegs () -setBlockAssigR assig = RegM $ \ s -> - RA_Result s{ra_blockassig = assig} () - -setDeltaR :: Int -> RegM freeRegs () -setDeltaR n = RegM $ \ s -> - RA_Result s{ra_delta = n} () - -getDeltaR :: RegM freeRegs Int -getDeltaR = RegM $ \s -> RA_Result s (ra_delta s) - -getUniqueR :: RegM freeRegs Unique -getUniqueR = RegM $ \s -> - case takeUniqFromSupply (ra_us s) of - (uniq, us) -> RA_Result s{ra_us = us} uniq - - --- | Record that a spill instruction was inserted, for profiling. -recordSpill :: SpillReason -> RegM freeRegs () -recordSpill spill - = RegM $ \s -> RA_Result (s { ra_spills = spill : ra_spills s }) () - --- | Record a created fixup block -recordFixupBlock :: BlockId -> BlockId -> BlockId -> RegM freeRegs () -recordFixupBlock from between to - = RegM $ \s -> RA_Result (s { ra_fixups = (from,between,to) : ra_fixups s }) () diff --git a/compiler/nativeGen/RegAlloc/Linear/Stats.hs b/compiler/nativeGen/RegAlloc/Linear/Stats.hs deleted file mode 100644 index 74f3c834d0..0000000000 --- a/compiler/nativeGen/RegAlloc/Linear/Stats.hs +++ /dev/null @@ -1,87 +0,0 @@ -module RegAlloc.Linear.Stats ( - binSpillReasons, - countRegRegMovesNat, - pprStats -) - -where - -import GhcPrelude - -import RegAlloc.Linear.Base -import RegAlloc.Liveness -import Instruction - -import UniqFM -import Outputable - -import State - --- | Build a map of how many times each reg was alloced, clobbered, loaded etc. -binSpillReasons - :: [SpillReason] -> UniqFM [Int] - -binSpillReasons reasons - = addListToUFM_C - (zipWith (+)) - emptyUFM - (map (\reason -> case reason of - SpillAlloc r -> (r, [1, 0, 0, 0, 0]) - SpillClobber r -> (r, [0, 1, 0, 0, 0]) - SpillLoad r -> (r, [0, 0, 1, 0, 0]) - SpillJoinRR r -> (r, [0, 0, 0, 1, 0]) - SpillJoinRM r -> (r, [0, 0, 0, 0, 1])) reasons) - - --- | Count reg-reg moves remaining in this code. -countRegRegMovesNat - :: Instruction instr - => NatCmmDecl statics instr -> Int - -countRegRegMovesNat cmm - = execState (mapGenBlockTopM countBlock cmm) 0 - where - countBlock b@(BasicBlock _ instrs) - = do mapM_ countInstr instrs - return b - - countInstr instr - | Just _ <- takeRegRegMoveInstr instr - = do modify (+ 1) - return instr - - | otherwise - = return instr - - --- | Pretty print some RegAllocStats -pprStats - :: Instruction instr - => [NatCmmDecl statics instr] -> [RegAllocStats] -> SDoc - -pprStats code statss - = let -- sum up all the instrs inserted by the spiller - spills = foldl' (plusUFM_C (zipWith (+))) - emptyUFM - $ map ra_spillInstrs statss - - spillTotals = foldl' (zipWith (+)) - [0, 0, 0, 0, 0] - $ nonDetEltsUFM spills - -- See Note [Unique Determinism and code generation] - - -- count how many reg-reg-moves remain in the code - moves = sum $ map countRegRegMovesNat code - - pprSpill (reg, spills) - = parens $ (hcat $ punctuate (text ", ") (doubleQuotes (ppr reg) : map ppr spills)) - - in ( text "-- spills-added-total" - $$ text "-- (allocs, clobbers, loads, joinRR, joinRM, reg_reg_moves_remaining)" - $$ (parens $ (hcat $ punctuate (text ", ") (map ppr spillTotals ++ [ppr moves]))) - $$ text "" - $$ text "-- spills-added" - $$ text "-- (reg_name, allocs, clobbers, loads, joinRR, joinRM)" - $$ (pprUFMWithKeys spills (vcat . map pprSpill)) - $$ text "") - diff --git a/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs deleted file mode 100644 index e7f8cb4a63..0000000000 --- a/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs +++ /dev/null @@ -1,53 +0,0 @@ - --- | Free regs map for i386 -module RegAlloc.Linear.X86.FreeRegs -where - -import GhcPrelude - -import X86.Regs -import RegClass -import Reg -import Panic -import GHC.Platform - -import Data.Word -import Data.Bits - -newtype FreeRegs = FreeRegs Word32 - deriving Show - -noFreeRegs :: FreeRegs -noFreeRegs = FreeRegs 0 - -releaseReg :: RealReg -> FreeRegs -> FreeRegs -releaseReg (RealRegSingle n) (FreeRegs f) - = FreeRegs (f .|. (1 `shiftL` n)) - -releaseReg _ _ - = panic "RegAlloc.Linear.X86.FreeRegs.releaseReg: no reg" - -initFreeRegs :: Platform -> FreeRegs -initFreeRegs platform - = foldl' (flip releaseReg) noFreeRegs (allocatableRegs platform) - -getFreeRegs :: Platform -> RegClass -> FreeRegs -> [RealReg] -- lazily -getFreeRegs platform cls (FreeRegs f) = go f 0 - - where go 0 _ = [] - go n m - | n .&. 1 /= 0 && classOfRealReg platform (RealRegSingle m) == cls - = RealRegSingle m : (go (n `shiftR` 1) $! (m+1)) - - | otherwise - = go (n `shiftR` 1) $! (m+1) - -- ToDo: there's no point looking through all the integer registers - -- in order to find a floating-point one. - -allocateReg :: RealReg -> FreeRegs -> FreeRegs -allocateReg (RealRegSingle r) (FreeRegs f) - = FreeRegs (f .&. complement (1 `shiftL` r)) - -allocateReg _ _ - = panic "RegAlloc.Linear.X86.FreeRegs.allocateReg: no reg" - diff --git a/compiler/nativeGen/RegAlloc/Linear/X86_64/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/X86_64/FreeRegs.hs deleted file mode 100644 index 44a3bbb306..0000000000 --- a/compiler/nativeGen/RegAlloc/Linear/X86_64/FreeRegs.hs +++ /dev/null @@ -1,54 +0,0 @@ - --- | Free regs map for x86_64 -module RegAlloc.Linear.X86_64.FreeRegs -where - -import GhcPrelude - -import X86.Regs -import RegClass -import Reg -import Panic -import GHC.Platform - -import Data.Word -import Data.Bits - -newtype FreeRegs = FreeRegs Word64 - deriving Show - -noFreeRegs :: FreeRegs -noFreeRegs = FreeRegs 0 - -releaseReg :: RealReg -> FreeRegs -> FreeRegs -releaseReg (RealRegSingle n) (FreeRegs f) - = FreeRegs (f .|. (1 `shiftL` n)) - -releaseReg _ _ - = panic "RegAlloc.Linear.X86_64.FreeRegs.releaseReg: no reg" - -initFreeRegs :: Platform -> FreeRegs -initFreeRegs platform - = foldl' (flip releaseReg) noFreeRegs (allocatableRegs platform) - -getFreeRegs :: Platform -> RegClass -> FreeRegs -> [RealReg] -- lazily -getFreeRegs platform cls (FreeRegs f) = go f 0 - - where go 0 _ = [] - go n m - | n .&. 1 /= 0 && classOfRealReg platform (RealRegSingle m) == cls - = RealRegSingle m : (go (n `shiftR` 1) $! (m+1)) - - | otherwise - = go (n `shiftR` 1) $! (m+1) - -- ToDo: there's no point looking through all the integer registers - -- in order to find a floating-point one. - -allocateReg :: RealReg -> FreeRegs -> FreeRegs -allocateReg (RealRegSingle r) (FreeRegs f) - = FreeRegs (f .&. complement (1 `shiftL` r)) - -allocateReg _ _ - = panic "RegAlloc.Linear.X86_64.FreeRegs.allocateReg: no reg" - - diff --git a/compiler/nativeGen/RegAlloc/Liveness.hs b/compiler/nativeGen/RegAlloc/Liveness.hs deleted file mode 100644 index b6fd3b3937..0000000000 --- a/compiler/nativeGen/RegAlloc/Liveness.hs +++ /dev/null @@ -1,1025 +0,0 @@ -{-# LANGUAGE BangPatterns #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TypeFamilies #-} - -{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} - ------------------------------------------------------------------------------ --- --- The register liveness determinator --- --- (c) The University of Glasgow 2004-2013 --- ------------------------------------------------------------------------------ - -module RegAlloc.Liveness ( - RegSet, - RegMap, emptyRegMap, - BlockMap, mapEmpty, - LiveCmmDecl, - InstrSR (..), - LiveInstr (..), - Liveness (..), - LiveInfo (..), - LiveBasicBlock, - - mapBlockTop, mapBlockTopM, mapSCCM, - mapGenBlockTop, mapGenBlockTopM, - stripLive, - stripLiveBlock, - slurpConflicts, - slurpReloadCoalesce, - eraseDeltasLive, - patchEraseLive, - patchRegsLiveInstr, - reverseBlocksInTops, - regLiveness, - cmmTopLiveness - ) where -import GhcPrelude - -import Reg -import Instruction - -import GHC.Cmm.BlockId -import CFG -import GHC.Cmm.Dataflow.Collections -import GHC.Cmm.Dataflow.Label -import GHC.Cmm hiding (RegSet, emptyRegSet) - -import Digraph -import GHC.Driver.Session -import MonadUtils -import Outputable -import GHC.Platform -import UniqSet -import UniqFM -import UniqSupply -import Bag -import State - -import Data.List -import Data.Maybe -import Data.IntSet (IntSet) - ------------------------------------------------------------------------------ -type RegSet = UniqSet Reg - -type RegMap a = UniqFM a - -emptyRegMap :: UniqFM a -emptyRegMap = emptyUFM - -emptyRegSet :: RegSet -emptyRegSet = emptyUniqSet - -type BlockMap a = LabelMap a - - --- | A top level thing which carries liveness information. -type LiveCmmDecl statics instr - = GenCmmDecl - statics - LiveInfo - [SCC (LiveBasicBlock instr)] - - --- | The register allocator also wants to use SPILL/RELOAD meta instructions, --- so we'll keep those here. -data InstrSR instr - -- | A real machine instruction - = Instr instr - - -- | spill this reg to a stack slot - | SPILL Reg Int - - -- | reload this reg from a stack slot - | RELOAD Int Reg - -instance Instruction instr => Instruction (InstrSR instr) where - regUsageOfInstr platform i - = case i of - Instr instr -> regUsageOfInstr platform instr - SPILL reg _ -> RU [reg] [] - RELOAD _ reg -> RU [] [reg] - - patchRegsOfInstr i f - = case i of - Instr instr -> Instr (patchRegsOfInstr instr f) - SPILL reg slot -> SPILL (f reg) slot - RELOAD slot reg -> RELOAD slot (f reg) - - isJumpishInstr i - = case i of - Instr instr -> isJumpishInstr instr - _ -> False - - jumpDestsOfInstr i - = case i of - Instr instr -> jumpDestsOfInstr instr - _ -> [] - - patchJumpInstr i f - = case i of - Instr instr -> Instr (patchJumpInstr instr f) - _ -> i - - mkSpillInstr = error "mkSpillInstr[InstrSR]: Not making SPILL meta-instr" - mkLoadInstr = error "mkLoadInstr[InstrSR]: Not making LOAD meta-instr" - - takeDeltaInstr i - = case i of - Instr instr -> takeDeltaInstr instr - _ -> Nothing - - isMetaInstr i - = case i of - Instr instr -> isMetaInstr instr - _ -> False - - mkRegRegMoveInstr platform r1 r2 - = Instr (mkRegRegMoveInstr platform r1 r2) - - takeRegRegMoveInstr i - = case i of - Instr instr -> takeRegRegMoveInstr instr - _ -> Nothing - - mkJumpInstr target = map Instr (mkJumpInstr target) - - mkStackAllocInstr platform amount = - Instr <$> mkStackAllocInstr platform amount - - mkStackDeallocInstr platform amount = - Instr <$> mkStackDeallocInstr platform amount - - --- | An instruction with liveness information. -data LiveInstr instr - = LiveInstr (InstrSR instr) (Maybe Liveness) - --- | Liveness information. --- The regs which die are ones which are no longer live in the *next* instruction --- in this sequence. --- (NB. if the instruction is a jump, these registers might still be live --- at the jump target(s) - you have to check the liveness at the destination --- block to find out). - -data Liveness - = Liveness - { liveBorn :: RegSet -- ^ registers born in this instruction (written to for first time). - , liveDieRead :: RegSet -- ^ registers that died because they were read for the last time. - , liveDieWrite :: RegSet } -- ^ registers that died because they were clobbered by something. - - --- | Stash regs live on entry to each basic block in the info part of the cmm code. -data LiveInfo - = LiveInfo - (LabelMap RawCmmStatics) -- cmm info table static stuff - [BlockId] -- entry points (first one is the - -- entry point for the proc). - (BlockMap RegSet) -- argument locals live on entry to this block - (BlockMap IntSet) -- stack slots live on entry to this block - - --- | A basic block with liveness information. -type LiveBasicBlock instr - = GenBasicBlock (LiveInstr instr) - - -instance Outputable instr - => Outputable (InstrSR instr) where - - ppr (Instr realInstr) - = ppr realInstr - - ppr (SPILL reg slot) - = hcat [ - text "\tSPILL", - char ' ', - ppr reg, - comma, - text "SLOT" <> parens (int slot)] - - ppr (RELOAD slot reg) - = hcat [ - text "\tRELOAD", - char ' ', - text "SLOT" <> parens (int slot), - comma, - ppr reg] - -instance Outputable instr - => Outputable (LiveInstr instr) where - - ppr (LiveInstr instr Nothing) - = ppr instr - - ppr (LiveInstr instr (Just live)) - = ppr instr - $$ (nest 8 - $ vcat - [ pprRegs (text "# born: ") (liveBorn live) - , pprRegs (text "# r_dying: ") (liveDieRead live) - , pprRegs (text "# w_dying: ") (liveDieWrite live) ] - $+$ space) - - where pprRegs :: SDoc -> RegSet -> SDoc - pprRegs name regs - | isEmptyUniqSet regs = empty - | otherwise = name <> - (pprUFM (getUniqSet regs) (hcat . punctuate space . map ppr)) - -instance Outputable LiveInfo where - ppr (LiveInfo mb_static entryIds liveVRegsOnEntry liveSlotsOnEntry) - = (ppr mb_static) - $$ text "# entryIds = " <> ppr entryIds - $$ text "# liveVRegsOnEntry = " <> ppr liveVRegsOnEntry - $$ text "# liveSlotsOnEntry = " <> text (show liveSlotsOnEntry) - - - --- | map a function across all the basic blocks in this code --- -mapBlockTop - :: (LiveBasicBlock instr -> LiveBasicBlock instr) - -> LiveCmmDecl statics instr -> LiveCmmDecl statics instr - -mapBlockTop f cmm - = evalState (mapBlockTopM (\x -> return $ f x) cmm) () - - --- | map a function across all the basic blocks in this code (monadic version) --- -mapBlockTopM - :: Monad m - => (LiveBasicBlock instr -> m (LiveBasicBlock instr)) - -> LiveCmmDecl statics instr -> m (LiveCmmDecl statics instr) - -mapBlockTopM _ cmm@(CmmData{}) - = return cmm - -mapBlockTopM f (CmmProc header label live sccs) - = do sccs' <- mapM (mapSCCM f) sccs - return $ CmmProc header label live sccs' - -mapSCCM :: Monad m => (a -> m b) -> SCC a -> m (SCC b) -mapSCCM f (AcyclicSCC x) - = do x' <- f x - return $ AcyclicSCC x' - -mapSCCM f (CyclicSCC xs) - = do xs' <- mapM f xs - return $ CyclicSCC xs' - - --- map a function across all the basic blocks in this code -mapGenBlockTop - :: (GenBasicBlock i -> GenBasicBlock i) - -> (GenCmmDecl d h (ListGraph i) -> GenCmmDecl d h (ListGraph i)) - -mapGenBlockTop f cmm - = evalState (mapGenBlockTopM (\x -> return $ f x) cmm) () - - --- | map a function across all the basic blocks in this code (monadic version) -mapGenBlockTopM - :: Monad m - => (GenBasicBlock i -> m (GenBasicBlock i)) - -> (GenCmmDecl d h (ListGraph i) -> m (GenCmmDecl d h (ListGraph i))) - -mapGenBlockTopM _ cmm@(CmmData{}) - = return cmm - -mapGenBlockTopM f (CmmProc header label live (ListGraph blocks)) - = do blocks' <- mapM f blocks - return $ CmmProc header label live (ListGraph blocks') - - --- | Slurp out the list of register conflicts and reg-reg moves from this top level thing. --- Slurping of conflicts and moves is wrapped up together so we don't have --- to make two passes over the same code when we want to build the graph. --- -slurpConflicts - :: Instruction instr - => LiveCmmDecl statics instr - -> (Bag (UniqSet Reg), Bag (Reg, Reg)) - -slurpConflicts live - = slurpCmm (emptyBag, emptyBag) live - - where slurpCmm rs CmmData{} = rs - slurpCmm rs (CmmProc info _ _ sccs) - = foldl' (slurpSCC info) rs sccs - - slurpSCC info rs (AcyclicSCC b) - = slurpBlock info rs b - - slurpSCC info rs (CyclicSCC bs) - = foldl' (slurpBlock info) rs bs - - slurpBlock info rs (BasicBlock blockId instrs) - | LiveInfo _ _ blockLive _ <- info - , Just rsLiveEntry <- mapLookup blockId blockLive - , (conflicts, moves) <- slurpLIs rsLiveEntry rs instrs - = (consBag rsLiveEntry conflicts, moves) - - | otherwise - = panic "Liveness.slurpConflicts: bad block" - - slurpLIs rsLive (conflicts, moves) [] - = (consBag rsLive conflicts, moves) - - slurpLIs rsLive rs (LiveInstr _ Nothing : lis) - = slurpLIs rsLive rs lis - - slurpLIs rsLiveEntry (conflicts, moves) (LiveInstr instr (Just live) : lis) - = let - -- regs that die because they are read for the last time at the start of an instruction - -- are not live across it. - rsLiveAcross = rsLiveEntry `minusUniqSet` (liveDieRead live) - - -- regs live on entry to the next instruction. - -- be careful of orphans, make sure to delete dying regs _after_ unioning - -- in the ones that are born here. - rsLiveNext = (rsLiveAcross `unionUniqSets` (liveBorn live)) - `minusUniqSet` (liveDieWrite live) - - -- orphan vregs are the ones that die in the same instruction they are born in. - -- these are likely to be results that are never used, but we still - -- need to assign a hreg to them.. - rsOrphans = intersectUniqSets - (liveBorn live) - (unionUniqSets (liveDieWrite live) (liveDieRead live)) - - -- - rsConflicts = unionUniqSets rsLiveNext rsOrphans - - in case takeRegRegMoveInstr instr of - Just rr -> slurpLIs rsLiveNext - ( consBag rsConflicts conflicts - , consBag rr moves) lis - - Nothing -> slurpLIs rsLiveNext - ( consBag rsConflicts conflicts - , moves) lis - - --- | For spill\/reloads --- --- SPILL v1, slot1 --- ... --- RELOAD slot1, v2 --- --- If we can arrange that v1 and v2 are allocated to the same hreg it's more likely --- the spill\/reload instrs can be cleaned and replaced by a nop reg-reg move. --- --- -slurpReloadCoalesce - :: forall statics instr. Instruction instr - => LiveCmmDecl statics instr - -> Bag (Reg, Reg) - -slurpReloadCoalesce live - = slurpCmm emptyBag live - - where - slurpCmm :: Bag (Reg, Reg) - -> GenCmmDecl t t1 [SCC (LiveBasicBlock instr)] - -> Bag (Reg, Reg) - slurpCmm cs CmmData{} = cs - slurpCmm cs (CmmProc _ _ _ sccs) - = slurpComp cs (flattenSCCs sccs) - - slurpComp :: Bag (Reg, Reg) - -> [LiveBasicBlock instr] - -> Bag (Reg, Reg) - slurpComp cs blocks - = let (moveBags, _) = runState (slurpCompM blocks) emptyUFM - in unionManyBags (cs : moveBags) - - slurpCompM :: [LiveBasicBlock instr] - -> State (UniqFM [UniqFM Reg]) [Bag (Reg, Reg)] - slurpCompM blocks - = do -- run the analysis once to record the mapping across jumps. - mapM_ (slurpBlock False) blocks - - -- run it a second time while using the information from the last pass. - -- We /could/ run this many more times to deal with graphical control - -- flow and propagating info across multiple jumps, but it's probably - -- not worth the trouble. - mapM (slurpBlock True) blocks - - slurpBlock :: Bool -> LiveBasicBlock instr - -> State (UniqFM [UniqFM Reg]) (Bag (Reg, Reg)) - slurpBlock propagate (BasicBlock blockId instrs) - = do -- grab the slot map for entry to this block - slotMap <- if propagate - then getSlotMap blockId - else return emptyUFM - - (_, mMoves) <- mapAccumLM slurpLI slotMap instrs - return $ listToBag $ catMaybes mMoves - - slurpLI :: UniqFM Reg -- current slotMap - -> LiveInstr instr - -> State (UniqFM [UniqFM Reg]) -- blockId -> [slot -> reg] - -- for tracking slotMaps across jumps - - ( UniqFM Reg -- new slotMap - , Maybe (Reg, Reg)) -- maybe a new coalesce edge - - slurpLI slotMap li - - -- remember what reg was stored into the slot - | LiveInstr (SPILL reg slot) _ <- li - , slotMap' <- addToUFM slotMap slot reg - = return (slotMap', Nothing) - - -- add an edge between the this reg and the last one stored into the slot - | LiveInstr (RELOAD slot reg) _ <- li - = case lookupUFM slotMap slot of - Just reg2 - | reg /= reg2 -> return (slotMap, Just (reg, reg2)) - | otherwise -> return (slotMap, Nothing) - - Nothing -> return (slotMap, Nothing) - - -- if we hit a jump, remember the current slotMap - | LiveInstr (Instr instr) _ <- li - , targets <- jumpDestsOfInstr instr - , not $ null targets - = do mapM_ (accSlotMap slotMap) targets - return (slotMap, Nothing) - - | otherwise - = return (slotMap, Nothing) - - -- record a slotmap for an in edge to this block - accSlotMap slotMap blockId - = modify (\s -> addToUFM_C (++) s blockId [slotMap]) - - -- work out the slot map on entry to this block - -- if we have slot maps for multiple in-edges then we need to merge them. - getSlotMap blockId - = do map <- get - let slotMaps = fromMaybe [] (lookupUFM map blockId) - return $ foldr mergeSlotMaps emptyUFM slotMaps - - mergeSlotMaps :: UniqFM Reg -> UniqFM Reg -> UniqFM Reg - mergeSlotMaps map1 map2 - = listToUFM - $ [ (k, r1) - | (k, r1) <- nonDetUFMToList map1 - -- This is non-deterministic but we do not - -- currently support deterministic code-generation. - -- See Note [Unique Determinism and code generation] - , case lookupUFM map2 k of - Nothing -> False - Just r2 -> r1 == r2 ] - - --- | Strip away liveness information, yielding NatCmmDecl -stripLive - :: (Outputable statics, Outputable instr, Instruction instr) - => DynFlags - -> LiveCmmDecl statics instr - -> NatCmmDecl statics instr - -stripLive dflags live - = stripCmm live - - where stripCmm :: (Outputable statics, Outputable instr, Instruction instr) - => LiveCmmDecl statics instr -> NatCmmDecl statics instr - stripCmm (CmmData sec ds) = CmmData sec ds - stripCmm (CmmProc (LiveInfo info (first_id:_) _ _) label live sccs) - = let final_blocks = flattenSCCs sccs - - -- make sure the block that was first in the input list - -- stays at the front of the output. This is the entry point - -- of the proc, and it needs to come first. - ((first':_), rest') - = partition ((== first_id) . blockId) final_blocks - - in CmmProc info label live - (ListGraph $ map (stripLiveBlock dflags) $ first' : rest') - - -- If the proc has blocks but we don't know what the first one was, then we're dead. - stripCmm proc - = pprPanic "RegAlloc.Liveness.stripLive: no first_id on proc" (ppr proc) - --- | Strip away liveness information from a basic block, --- and make real spill instructions out of SPILL, RELOAD pseudos along the way. - -stripLiveBlock - :: Instruction instr - => DynFlags - -> LiveBasicBlock instr - -> NatBasicBlock instr - -stripLiveBlock dflags (BasicBlock i lis) - = BasicBlock i instrs' - - where (instrs', _) - = runState (spillNat [] lis) 0 - - spillNat acc [] - = return (reverse acc) - - spillNat acc (LiveInstr (SPILL reg slot) _ : instrs) - = do delta <- get - spillNat (mkSpillInstr dflags reg delta slot : acc) instrs - - spillNat acc (LiveInstr (RELOAD slot reg) _ : instrs) - = do delta <- get - spillNat (mkLoadInstr dflags reg delta slot : acc) instrs - - spillNat acc (LiveInstr (Instr instr) _ : instrs) - | Just i <- takeDeltaInstr instr - = do put i - spillNat acc instrs - - spillNat acc (LiveInstr (Instr instr) _ : instrs) - = spillNat (instr : acc) instrs - - --- | Erase Delta instructions. - -eraseDeltasLive - :: Instruction instr - => LiveCmmDecl statics instr - -> LiveCmmDecl statics instr - -eraseDeltasLive cmm - = mapBlockTop eraseBlock cmm - where - eraseBlock (BasicBlock id lis) - = BasicBlock id - $ filter (\(LiveInstr i _) -> not $ isJust $ takeDeltaInstr i) - $ lis - - --- | Patch the registers in this code according to this register mapping. --- also erase reg -> reg moves when the reg is the same. --- also erase reg -> reg moves when the destination dies in this instr. -patchEraseLive - :: Instruction instr - => (Reg -> Reg) - -> LiveCmmDecl statics instr -> LiveCmmDecl statics instr - -patchEraseLive patchF cmm - = patchCmm cmm - where - patchCmm cmm@CmmData{} = cmm - - patchCmm (CmmProc info label live sccs) - | LiveInfo static id blockMap mLiveSlots <- info - = let - patchRegSet set = mkUniqSet $ map patchF $ nonDetEltsUFM set - -- See Note [Unique Determinism and code generation] - blockMap' = mapMap (patchRegSet . getUniqSet) blockMap - - info' = LiveInfo static id blockMap' mLiveSlots - in CmmProc info' label live $ map patchSCC sccs - - patchSCC (AcyclicSCC b) = AcyclicSCC (patchBlock b) - patchSCC (CyclicSCC bs) = CyclicSCC (map patchBlock bs) - - patchBlock (BasicBlock id lis) - = BasicBlock id $ patchInstrs lis - - patchInstrs [] = [] - patchInstrs (li : lis) - - | LiveInstr i (Just live) <- li' - , Just (r1, r2) <- takeRegRegMoveInstr i - , eatMe r1 r2 live - = patchInstrs lis - - | otherwise - = li' : patchInstrs lis - - where li' = patchRegsLiveInstr patchF li - - eatMe r1 r2 live - -- source and destination regs are the same - | r1 == r2 = True - - -- destination reg is never used - | elementOfUniqSet r2 (liveBorn live) - , elementOfUniqSet r2 (liveDieRead live) || elementOfUniqSet r2 (liveDieWrite live) - = True - - | otherwise = False - - --- | Patch registers in this LiveInstr, including the liveness information. --- -patchRegsLiveInstr - :: Instruction instr - => (Reg -> Reg) - -> LiveInstr instr -> LiveInstr instr - -patchRegsLiveInstr patchF li - = case li of - LiveInstr instr Nothing - -> LiveInstr (patchRegsOfInstr instr patchF) Nothing - - LiveInstr instr (Just live) - -> LiveInstr - (patchRegsOfInstr instr patchF) - (Just live - { -- WARNING: have to go via lists here because patchF changes the uniq in the Reg - liveBorn = mapUniqSet patchF $ liveBorn live - , liveDieRead = mapUniqSet patchF $ liveDieRead live - , liveDieWrite = mapUniqSet patchF $ liveDieWrite live }) - -- See Note [Unique Determinism and code generation] - - --------------------------------------------------------------------------------- --- | Convert a NatCmmDecl to a LiveCmmDecl, with liveness information - -cmmTopLiveness - :: (Outputable instr, Instruction instr) - => Maybe CFG -> Platform - -> NatCmmDecl statics instr - -> UniqSM (LiveCmmDecl statics instr) -cmmTopLiveness cfg platform cmm - = regLiveness platform $ natCmmTopToLive cfg cmm - -natCmmTopToLive - :: (Instruction instr, Outputable instr) - => Maybe CFG -> NatCmmDecl statics instr - -> LiveCmmDecl statics instr - -natCmmTopToLive _ (CmmData i d) - = CmmData i d - -natCmmTopToLive _ (CmmProc info lbl live (ListGraph [])) - = CmmProc (LiveInfo info [] mapEmpty mapEmpty) lbl live [] - -natCmmTopToLive mCfg proc@(CmmProc info lbl live (ListGraph blocks@(first : _))) - = CmmProc (LiveInfo info' (first_id : entry_ids) mapEmpty mapEmpty) - lbl live sccsLive - where - first_id = blockId first - all_entry_ids = entryBlocks proc - sccs = sccBlocks blocks all_entry_ids mCfg - sccsLive = map (fmap (\(BasicBlock l instrs) -> - BasicBlock l (map (\i -> LiveInstr (Instr i) Nothing) instrs))) - $ sccs - - entry_ids = filter (reachable_node) . - filter (/= first_id) $ all_entry_ids - info' = mapFilterWithKey (\node _ -> reachable_node node) info - reachable_node - | Just cfg <- mCfg - = hasNode cfg - | otherwise - = const True - --- --- Compute the liveness graph of the set of basic blocks. Important: --- we also discard any unreachable code here, starting from the entry --- points (the first block in the list, and any blocks with info --- tables). Unreachable code arises when code blocks are orphaned in --- earlier optimisation passes, and may confuse the register allocator --- by referring to registers that are not initialised. It's easy to --- discard the unreachable code as part of the SCC pass, so that's --- exactly what we do. (#7574) --- -sccBlocks - :: forall instr . Instruction instr - => [NatBasicBlock instr] - -> [BlockId] - -> Maybe CFG - -> [SCC (NatBasicBlock instr)] - -sccBlocks blocks entries mcfg = map (fmap node_payload) sccs - where - nodes :: [ Node BlockId (NatBasicBlock instr) ] - nodes = [ DigraphNode block id (getOutEdges instrs) - | block@(BasicBlock id instrs) <- blocks ] - - g1 = graphFromEdgedVerticesUniq nodes - - reachable :: LabelSet - reachable - | Just cfg <- mcfg - -- Our CFG only contains reachable nodes by construction at this point. - = setFromList $ getCfgNodes cfg - | otherwise - = setFromList $ [ node_key node | node <- reachablesG g1 roots ] - - g2 = graphFromEdgedVerticesUniq [ node | node <- nodes - , node_key node - `setMember` reachable ] - - sccs = stronglyConnCompG g2 - - getOutEdges :: Instruction instr => [instr] -> [BlockId] - getOutEdges instrs = concatMap jumpDestsOfInstr instrs - - -- This is truly ugly, but I don't see a good alternative. - -- Digraph just has the wrong API. We want to identify nodes - -- by their keys (BlockId), but Digraph requires the whole - -- node: (NatBasicBlock, BlockId, [BlockId]). This takes - -- advantage of the fact that Digraph only looks at the key, - -- even though it asks for the whole triple. - roots = [DigraphNode (panic "sccBlocks") b (panic "sccBlocks") - | b <- entries ] - --------------------------------------------------------------------------------- --- Annotate code with register liveness information --- - -regLiveness - :: (Outputable instr, Instruction instr) - => Platform - -> LiveCmmDecl statics instr - -> UniqSM (LiveCmmDecl statics instr) - -regLiveness _ (CmmData i d) - = return $ CmmData i d - -regLiveness _ (CmmProc info lbl live []) - | LiveInfo static mFirst _ _ <- info - = return $ CmmProc - (LiveInfo static mFirst mapEmpty mapEmpty) - lbl live [] - -regLiveness platform (CmmProc info lbl live sccs) - | LiveInfo static mFirst _ liveSlotsOnEntry <- info - = let (ann_sccs, block_live) = computeLiveness platform sccs - - in return $ CmmProc (LiveInfo static mFirst block_live liveSlotsOnEntry) - lbl live ann_sccs - - --- ----------------------------------------------------------------------------- --- | Check ordering of Blocks --- The computeLiveness function requires SCCs to be in reverse --- dependent order. If they're not the liveness information will be --- wrong, and we'll get a bad allocation. Better to check for this --- precondition explicitly or some other poor sucker will waste a --- day staring at bad assembly code.. --- -checkIsReverseDependent - :: Instruction instr - => [SCC (LiveBasicBlock instr)] -- ^ SCCs of blocks that we're about to run the liveness determinator on. - -> Maybe BlockId -- ^ BlockIds that fail the test (if any) - -checkIsReverseDependent sccs' - = go emptyUniqSet sccs' - - where go _ [] - = Nothing - - go blocksSeen (AcyclicSCC block : sccs) - = let dests = slurpJumpDestsOfBlock block - blocksSeen' = unionUniqSets blocksSeen $ mkUniqSet [blockId block] - badDests = dests `minusUniqSet` blocksSeen' - in case nonDetEltsUniqSet badDests of - -- See Note [Unique Determinism and code generation] - [] -> go blocksSeen' sccs - bad : _ -> Just bad - - go blocksSeen (CyclicSCC blocks : sccs) - = let dests = unionManyUniqSets $ map slurpJumpDestsOfBlock blocks - blocksSeen' = unionUniqSets blocksSeen $ mkUniqSet $ map blockId blocks - badDests = dests `minusUniqSet` blocksSeen' - in case nonDetEltsUniqSet badDests of - -- See Note [Unique Determinism and code generation] - [] -> go blocksSeen' sccs - bad : _ -> Just bad - - slurpJumpDestsOfBlock (BasicBlock _ instrs) - = unionManyUniqSets - $ map (mkUniqSet . jumpDestsOfInstr) - [ i | LiveInstr i _ <- instrs] - - --- | If we've compute liveness info for this code already we have to reverse --- the SCCs in each top to get them back to the right order so we can do it again. -reverseBlocksInTops :: LiveCmmDecl statics instr -> LiveCmmDecl statics instr -reverseBlocksInTops top - = case top of - CmmData{} -> top - CmmProc info lbl live sccs -> CmmProc info lbl live (reverse sccs) - - --- | Computing liveness --- --- On entry, the SCCs must be in "reverse" order: later blocks may transfer --- control to earlier ones only, else `panic`. --- --- The SCCs returned are in the *opposite* order, which is exactly what we --- want for the next pass. --- -computeLiveness - :: (Outputable instr, Instruction instr) - => Platform - -> [SCC (LiveBasicBlock instr)] - -> ([SCC (LiveBasicBlock instr)], -- instructions annotated with list of registers - -- which are "dead after this instruction". - BlockMap RegSet) -- blocks annotated with set of live registers - -- on entry to the block. - -computeLiveness platform sccs - = case checkIsReverseDependent sccs of - Nothing -> livenessSCCs platform mapEmpty [] sccs - Just bad -> pprPanic "RegAlloc.Liveness.computeLiveness" - (vcat [ text "SCCs aren't in reverse dependent order" - , text "bad blockId" <+> ppr bad - , ppr sccs]) - -livenessSCCs - :: Instruction instr - => Platform - -> BlockMap RegSet - -> [SCC (LiveBasicBlock instr)] -- accum - -> [SCC (LiveBasicBlock instr)] - -> ( [SCC (LiveBasicBlock instr)] - , BlockMap RegSet) - -livenessSCCs _ blockmap done [] - = (done, blockmap) - -livenessSCCs platform blockmap done (AcyclicSCC block : sccs) - = let (blockmap', block') = livenessBlock platform blockmap block - in livenessSCCs platform blockmap' (AcyclicSCC block' : done) sccs - -livenessSCCs platform blockmap done - (CyclicSCC blocks : sccs) = - livenessSCCs platform blockmap' (CyclicSCC blocks':done) sccs - where (blockmap', blocks') - = iterateUntilUnchanged linearLiveness equalBlockMaps - blockmap blocks - - iterateUntilUnchanged - :: (a -> b -> (a,c)) -> (a -> a -> Bool) - -> a -> b - -> (a,c) - - iterateUntilUnchanged f eq a b - = head $ - concatMap tail $ - groupBy (\(a1, _) (a2, _) -> eq a1 a2) $ - iterate (\(a, _) -> f a b) $ - (a, panic "RegLiveness.livenessSCCs") - - - linearLiveness - :: Instruction instr - => BlockMap RegSet -> [LiveBasicBlock instr] - -> (BlockMap RegSet, [LiveBasicBlock instr]) - - linearLiveness = mapAccumL (livenessBlock platform) - - -- probably the least efficient way to compare two - -- BlockMaps for equality. - equalBlockMaps a b - = a' == b' - where a' = map f $ mapToList a - b' = map f $ mapToList b - f (key,elt) = (key, nonDetEltsUniqSet elt) - -- See Note [Unique Determinism and code generation] - - - --- | Annotate a basic block with register liveness information. --- -livenessBlock - :: Instruction instr - => Platform - -> BlockMap RegSet - -> LiveBasicBlock instr - -> (BlockMap RegSet, LiveBasicBlock instr) - -livenessBlock platform blockmap (BasicBlock block_id instrs) - = let - (regsLiveOnEntry, instrs1) - = livenessBack platform emptyUniqSet blockmap [] (reverse instrs) - blockmap' = mapInsert block_id regsLiveOnEntry blockmap - - instrs2 = livenessForward platform regsLiveOnEntry instrs1 - - output = BasicBlock block_id instrs2 - - in ( blockmap', output) - --- | Calculate liveness going forwards, --- filling in when regs are born - -livenessForward - :: Instruction instr - => Platform - -> RegSet -- regs live on this instr - -> [LiveInstr instr] -> [LiveInstr instr] - -livenessForward _ _ [] = [] -livenessForward platform rsLiveEntry (li@(LiveInstr instr mLive) : lis) - | Just live <- mLive - = let - RU _ written = regUsageOfInstr platform instr - -- Regs that are written to but weren't live on entry to this instruction - -- are recorded as being born here. - rsBorn = mkUniqSet - $ filter (\r -> not $ elementOfUniqSet r rsLiveEntry) written - - rsLiveNext = (rsLiveEntry `unionUniqSets` rsBorn) - `minusUniqSet` (liveDieRead live) - `minusUniqSet` (liveDieWrite live) - - in LiveInstr instr (Just live { liveBorn = rsBorn }) - : livenessForward platform rsLiveNext lis - - | otherwise - = li : livenessForward platform rsLiveEntry lis - - --- | Calculate liveness going backwards, --- filling in when regs die, and what regs are live across each instruction - -livenessBack - :: Instruction instr - => Platform - -> RegSet -- regs live on this instr - -> BlockMap RegSet -- regs live on entry to other BBs - -> [LiveInstr instr] -- instructions (accum) - -> [LiveInstr instr] -- instructions - -> (RegSet, [LiveInstr instr]) - -livenessBack _ liveregs _ done [] = (liveregs, done) - -livenessBack platform liveregs blockmap acc (instr : instrs) - = let (liveregs', instr') = liveness1 platform liveregs blockmap instr - in livenessBack platform liveregs' blockmap (instr' : acc) instrs - - --- don't bother tagging comments or deltas with liveness -liveness1 - :: Instruction instr - => Platform - -> RegSet - -> BlockMap RegSet - -> LiveInstr instr - -> (RegSet, LiveInstr instr) - -liveness1 _ liveregs _ (LiveInstr instr _) - | isMetaInstr instr - = (liveregs, LiveInstr instr Nothing) - -liveness1 platform liveregs blockmap (LiveInstr instr _) - - | not_a_branch - = (liveregs1, LiveInstr instr - (Just $ Liveness - { liveBorn = emptyUniqSet - , liveDieRead = mkUniqSet r_dying - , liveDieWrite = mkUniqSet w_dying })) - - | otherwise - = (liveregs_br, LiveInstr instr - (Just $ Liveness - { liveBorn = emptyUniqSet - , liveDieRead = mkUniqSet r_dying_br - , liveDieWrite = mkUniqSet w_dying })) - - where - !(RU read written) = regUsageOfInstr platform instr - - -- registers that were written here are dead going backwards. - -- registers that were read here are live going backwards. - liveregs1 = (liveregs `delListFromUniqSet` written) - `addListToUniqSet` read - - -- registers that are not live beyond this point, are recorded - -- as dying here. - r_dying = [ reg | reg <- read, reg `notElem` written, - not (elementOfUniqSet reg liveregs) ] - - w_dying = [ reg | reg <- written, - not (elementOfUniqSet reg liveregs) ] - - -- union in the live regs from all the jump destinations of this - -- instruction. - targets = jumpDestsOfInstr instr -- where we go from here - not_a_branch = null targets - - targetLiveRegs target - = case mapLookup target blockmap of - Just ra -> ra - Nothing -> emptyRegSet - - live_from_branch = unionManyUniqSets (map targetLiveRegs targets) - - liveregs_br = liveregs1 `unionUniqSets` live_from_branch - - -- registers that are live only in the branch targets should - -- be listed as dying here. - live_branch_only = live_from_branch `minusUniqSet` liveregs - r_dying_br = nonDetEltsUniqSet (mkUniqSet r_dying `unionUniqSets` - live_branch_only) - -- See Note [Unique Determinism and code generation] diff --git a/compiler/nativeGen/RegClass.hs b/compiler/nativeGen/RegClass.hs deleted file mode 100644 index f4170cca94..0000000000 --- a/compiler/nativeGen/RegClass.hs +++ /dev/null @@ -1,32 +0,0 @@ --- | An architecture independent description of a register's class. -module RegClass - ( RegClass (..) ) - -where - -import GhcPrelude - -import Outputable -import Unique - - --- | The class of a register. --- Used in the register allocator. --- We treat all registers in a class as being interchangeable. --- -data RegClass - = RcInteger - | RcFloat - | RcDouble - deriving Eq - - -instance Uniquable RegClass where - getUnique RcInteger = mkRegClassUnique 0 - getUnique RcFloat = mkRegClassUnique 1 - getUnique RcDouble = mkRegClassUnique 2 - -instance Outputable RegClass where - ppr RcInteger = Outputable.text "I" - ppr RcFloat = Outputable.text "F" - ppr RcDouble = Outputable.text "D" diff --git a/compiler/nativeGen/SPARC/AddrMode.hs b/compiler/nativeGen/SPARC/AddrMode.hs deleted file mode 100644 index ee40843351..0000000000 --- a/compiler/nativeGen/SPARC/AddrMode.hs +++ /dev/null @@ -1,44 +0,0 @@ - -module SPARC.AddrMode ( - AddrMode(..), - addrOffset -) - -where - -import GhcPrelude - -import SPARC.Imm -import SPARC.Base -import Reg - --- addressing modes ------------------------------------------------------------ - --- | Represents a memory address in an instruction. --- Being a RISC machine, the SPARC addressing modes are very regular. --- -data AddrMode - = AddrRegReg Reg Reg -- addr = r1 + r2 - | AddrRegImm Reg Imm -- addr = r1 + imm - - --- | Add an integer offset to the address in an AddrMode. --- -addrOffset :: AddrMode -> Int -> Maybe AddrMode -addrOffset addr off - = case addr of - AddrRegImm r (ImmInt n) - | fits13Bits n2 -> Just (AddrRegImm r (ImmInt n2)) - | otherwise -> Nothing - where n2 = n + off - - AddrRegImm r (ImmInteger n) - | fits13Bits n2 -> Just (AddrRegImm r (ImmInt (fromInteger n2))) - | otherwise -> Nothing - where n2 = n + toInteger off - - AddrRegReg r (RegReal (RealRegSingle 0)) - | fits13Bits off -> Just (AddrRegImm r (ImmInt off)) - | otherwise -> Nothing - - _ -> Nothing diff --git a/compiler/nativeGen/SPARC/Base.hs b/compiler/nativeGen/SPARC/Base.hs deleted file mode 100644 index 4b3b7c984c..0000000000 --- a/compiler/nativeGen/SPARC/Base.hs +++ /dev/null @@ -1,77 +0,0 @@ - --- | Bits and pieces on the bottom of the module dependency tree. --- Also import the required constants, so we know what we're using. --- --- In the interests of cross-compilation, we want to free ourselves --- from the autoconf generated modules like main/Constants - -module SPARC.Base ( - wordLength, - wordLengthInBits, - spillAreaLength, - spillSlotSize, - extraStackArgsHere, - fits13Bits, - is32BitInteger, - largeOffsetError -) - -where - -import GhcPrelude - -import GHC.Driver.Session -import Panic - -import Data.Int - - --- On 32 bit SPARC, pointers are 32 bits. -wordLength :: Int -wordLength = 4 - -wordLengthInBits :: Int -wordLengthInBits - = wordLength * 8 - --- Size of the available spill area -spillAreaLength :: DynFlags -> Int -spillAreaLength - = rESERVED_C_STACK_BYTES - --- | We need 8 bytes because our largest registers are 64 bit. -spillSlotSize :: Int -spillSlotSize = 8 - - --- | We (allegedly) put the first six C-call arguments in registers; --- where do we start putting the rest of them? -extraStackArgsHere :: Int -extraStackArgsHere = 23 - - -{-# SPECIALIZE fits13Bits :: Int -> Bool, Integer -> Bool #-} --- | Check whether an offset is representable with 13 bits. -fits13Bits :: Integral a => a -> Bool -fits13Bits x = x >= -4096 && x < 4096 - --- | Check whether an integer will fit in 32 bits. --- A CmmInt is intended to be truncated to the appropriate --- number of bits, so here we truncate it to Int64. This is --- important because e.g. -1 as a CmmInt might be either --- -1 or 18446744073709551615. --- -is32BitInteger :: Integer -> Bool -is32BitInteger i - = i64 <= 0x7fffffff && i64 >= -0x80000000 - where i64 = fromIntegral i :: Int64 - - --- | Sadness. -largeOffsetError :: (Show a) => a -> b -largeOffsetError i - = panic ("ERROR: SPARC native-code generator cannot handle large offset (" - ++ show i ++ ");\nprobably because of large constant data structures;" ++ - "\nworkaround: use -fllvm on this module.\n") - - diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs deleted file mode 100644 index b9f79a6b9f..0000000000 --- a/compiler/nativeGen/SPARC/CodeGen.hs +++ /dev/null @@ -1,700 +0,0 @@ -{-# LANGUAGE CPP #-} - ------------------------------------------------------------------------------ --- --- Generating machine code (instruction selection) --- --- (c) The University of Glasgow 1996-2013 --- ------------------------------------------------------------------------------ - -{-# LANGUAGE GADTs #-} -module SPARC.CodeGen ( - cmmTopCodeGen, - generateJumpTableForInstr, - InstrBlock -) - -where - -#include "HsVersions.h" - --- NCG stuff: -import GhcPrelude - -import SPARC.Base -import SPARC.CodeGen.Sanity -import SPARC.CodeGen.Amode -import SPARC.CodeGen.CondCode -import SPARC.CodeGen.Gen64 -import SPARC.CodeGen.Gen32 -import SPARC.CodeGen.Base -import SPARC.Instr -import SPARC.Imm -import SPARC.AddrMode -import SPARC.Regs -import SPARC.Stack -import Instruction -import Format -import NCGMonad ( NatM, getNewRegNat, getNewLabelNat ) - --- Our intermediate code: -import GHC.Cmm.BlockId -import GHC.Cmm -import GHC.Cmm.Utils -import GHC.Cmm.Switch -import GHC.Cmm.Dataflow.Block -import GHC.Cmm.Dataflow.Graph -import PIC -import Reg -import GHC.Cmm.CLabel -import CPrim - --- The rest: -import BasicTypes -import GHC.Driver.Session -import FastString -import OrdList -import Outputable -import GHC.Platform - -import Control.Monad ( mapAndUnzipM ) - --- | Top level code generation -cmmTopCodeGen :: RawCmmDecl - -> NatM [NatCmmDecl RawCmmStatics Instr] - -cmmTopCodeGen (CmmProc info lab live graph) - = do let blocks = toBlockListEntryFirst graph - (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks - - let proc = CmmProc info lab live (ListGraph $ concat nat_blocks) - let tops = proc : concat statics - - return tops - -cmmTopCodeGen (CmmData sec dat) = do - return [CmmData sec dat] -- no translation, we just use CmmStatic - - --- | Do code generation on a single block of CMM code. --- code generation may introduce new basic block boundaries, which --- are indicated by the NEWBLOCK instruction. We must split up the --- instruction stream into basic blocks again. Also, we extract --- LDATAs here too. -basicBlockCodeGen :: CmmBlock - -> NatM ( [NatBasicBlock Instr] - , [NatCmmDecl RawCmmStatics Instr]) - -basicBlockCodeGen block = do - let (_, nodes, tail) = blockSplit block - id = entryLabel block - stmts = blockToList nodes - mid_instrs <- stmtsToInstrs stmts - tail_instrs <- stmtToInstrs tail - let instrs = mid_instrs `appOL` tail_instrs - let - (top,other_blocks,statics) - = foldrOL mkBlocks ([],[],[]) instrs - - mkBlocks (NEWBLOCK id) (instrs,blocks,statics) - = ([], BasicBlock id instrs : blocks, statics) - - mkBlocks (LDATA sec dat) (instrs,blocks,statics) - = (instrs, blocks, CmmData sec dat:statics) - - mkBlocks instr (instrs,blocks,statics) - = (instr:instrs, blocks, statics) - - -- do intra-block sanity checking - blocksChecked - = map (checkBlock block) - $ BasicBlock id top : other_blocks - - return (blocksChecked, statics) - - --- | Convert some Cmm statements to SPARC instructions. -stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock -stmtsToInstrs stmts - = do instrss <- mapM stmtToInstrs stmts - return (concatOL instrss) - - -stmtToInstrs :: CmmNode e x -> NatM InstrBlock -stmtToInstrs stmt = do - dflags <- getDynFlags - case stmt of - CmmComment s -> return (unitOL (COMMENT s)) - CmmTick {} -> return nilOL - CmmUnwind {} -> return nilOL - - CmmAssign reg src - | isFloatType ty -> assignReg_FltCode format reg src - | isWord64 ty -> assignReg_I64Code reg src - | otherwise -> assignReg_IntCode format reg src - where ty = cmmRegType dflags reg - format = cmmTypeFormat ty - - CmmStore addr src - | isFloatType ty -> assignMem_FltCode format addr src - | isWord64 ty -> assignMem_I64Code addr src - | otherwise -> assignMem_IntCode format addr src - where ty = cmmExprType dflags src - format = cmmTypeFormat ty - - CmmUnsafeForeignCall target result_regs args - -> genCCall target result_regs args - - CmmBranch id -> genBranch id - CmmCondBranch arg true false _ -> do - b1 <- genCondJump true arg - b2 <- genBranch false - return (b1 `appOL` b2) - CmmSwitch arg ids -> do dflags <- getDynFlags - genSwitch dflags arg ids - CmmCall { cml_target = arg } -> genJump arg - - _ - -> panic "stmtToInstrs: statement should have been cps'd away" - - -{- -Now, given a tree (the argument to a CmmLoad) that references memory, -produce a suitable addressing mode. - -A Rule of the Game (tm) for Amodes: use of the addr bit must -immediately follow use of the code part, since the code part puts -values in registers which the addr then refers to. So you can't put -anything in between, lest it overwrite some of those registers. If -you need to do some other computation between the code part and use of -the addr bit, first store the effective address from the amode in a -temporary, then do the other computation, and then use the temporary: - - code - LEA amode, tmp - ... other computation ... - ... (tmp) ... --} - - - --- | Convert a BlockId to some CmmStatic data -jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic -jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags)) -jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel) - where blockLabel = blockLbl blockid - - - --- ----------------------------------------------------------------------------- --- Generating assignments - --- Assignments are really at the heart of the whole code generation --- business. Almost all top-level nodes of any real importance are --- assignments, which correspond to loads, stores, or register --- transfers. If we're really lucky, some of the register transfers --- will go away, because we can use the destination register to --- complete the code generation for the right hand side. This only --- fails when the right hand side is forced into a fixed register --- (e.g. the result of a call). - -assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock -assignMem_IntCode pk addr src = do - (srcReg, code) <- getSomeReg src - Amode dstAddr addr_code <- getAmode addr - return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr - - -assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock -assignReg_IntCode _ reg src = do - dflags <- getDynFlags - r <- getRegister src - let dst = getRegisterReg (targetPlatform dflags) reg - return $ case r of - Any _ code -> code dst - Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg freg) dst - - - --- Floating point assignment to memory -assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock -assignMem_FltCode pk addr src = do - dflags <- getDynFlags - Amode dst__2 code1 <- getAmode addr - (src__2, code2) <- getSomeReg src - tmp1 <- getNewRegNat pk - let - pk__2 = cmmExprType dflags src - code__2 = code1 `appOL` code2 `appOL` - if formatToWidth pk == typeWidth pk__2 - then unitOL (ST pk src__2 dst__2) - else toOL [ FxTOy (cmmTypeFormat pk__2) pk src__2 tmp1 - , ST pk tmp1 dst__2] - return code__2 - --- Floating point assignment to a register/temporary -assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock -assignReg_FltCode pk dstCmmReg srcCmmExpr = do - dflags <- getDynFlags - let platform = targetPlatform dflags - srcRegister <- getRegister srcCmmExpr - let dstReg = getRegisterReg platform dstCmmReg - - return $ case srcRegister of - Any _ code -> code dstReg - Fixed _ srcFixedReg srcCode -> srcCode `snocOL` FMOV pk srcFixedReg dstReg - - - - -genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock - -genJump (CmmLit (CmmLabel lbl)) - = return (toOL [CALL (Left target) 0 True, NOP]) - where - target = ImmCLbl lbl - -genJump tree - = do - (target, code) <- getSomeReg tree - return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP) - --- ----------------------------------------------------------------------------- --- Unconditional branches - -genBranch :: BlockId -> NatM InstrBlock -genBranch = return . toOL . mkJumpInstr - - --- ----------------------------------------------------------------------------- --- Conditional jumps - -{- -Conditional jumps are always to local labels, so we can use branch -instructions. We peek at the arguments to decide what kind of -comparison to do. - -SPARC: First, we have to ensure that the condition codes are set -according to the supplied comparison operation. We generate slightly -different code for floating point comparisons, because a floating -point operation cannot directly precede a @BF@. We assume the worst -and fill that slot with a @NOP@. - -SPARC: Do not fill the delay slots here; you will confuse the register -allocator. --} - - -genCondJump - :: BlockId -- the branch target - -> CmmExpr -- the condition on which to branch - -> NatM InstrBlock - - - -genCondJump bid bool = do - CondCode is_float cond code <- getCondCode bool - return ( - code `appOL` - toOL ( - if is_float - then [NOP, BF cond False bid, NOP] - else [BI cond False bid, NOP] - ) - ) - - - --- ----------------------------------------------------------------------------- --- Generating a table-branch - -genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock -genSwitch dflags expr targets - | positionIndependent dflags - = error "MachCodeGen: sparc genSwitch PIC not finished\n" - - | otherwise - = do (e_reg, e_code) <- getSomeReg (cmmOffset dflags expr offset) - - base_reg <- getNewRegNat II32 - offset_reg <- getNewRegNat II32 - dst <- getNewRegNat II32 - - label <- getNewLabelNat - - return $ e_code `appOL` - toOL - [ -- load base of jump table - SETHI (HI (ImmCLbl label)) base_reg - , OR False base_reg (RIImm $ LO $ ImmCLbl label) base_reg - - -- the addrs in the table are 32 bits wide.. - , SLL e_reg (RIImm $ ImmInt 2) offset_reg - - -- load and jump to the destination - , LD II32 (AddrRegReg base_reg offset_reg) dst - , JMP_TBL (AddrRegImm dst (ImmInt 0)) ids label - , NOP ] - where (offset, ids) = switchTargetsToTable targets - -generateJumpTableForInstr :: DynFlags -> Instr - -> Maybe (NatCmmDecl RawCmmStatics Instr) -generateJumpTableForInstr dflags (JMP_TBL _ ids label) = - let jumpTable = map (jumpTableEntry dflags) ids - in Just (CmmData (Section ReadOnlyData label) (RawCmmStatics label jumpTable)) -generateJumpTableForInstr _ _ = Nothing - - - --- ----------------------------------------------------------------------------- --- Generating C calls - -{- - Now the biggest nightmare---calls. Most of the nastiness is buried in - @get_arg@, which moves the arguments to the correct registers/stack - locations. Apart from that, the code is easy. - - The SPARC calling convention is an absolute - nightmare. The first 6x32 bits of arguments are mapped into - %o0 through %o5, and the remaining arguments are dumped to the - stack, beginning at [%sp+92]. (Note that %o6 == %sp.) - - If we have to put args on the stack, move %o6==%sp down by - the number of words to go on the stack, to ensure there's enough space. - - According to Fraser and Hanson's lcc book, page 478, fig 17.2, - 16 words above the stack pointer is a word for the address of - a structure return value. I use this as a temporary location - for moving values from float to int regs. Certainly it isn't - safe to put anything in the 16 words starting at %sp, since - this area can get trashed at any time due to window overflows - caused by signal handlers. - - A final complication (if the above isn't enough) is that - we can't blithely calculate the arguments one by one into - %o0 .. %o5. Consider the following nested calls: - - fff a (fff b c) - - Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately - the inner call will itself use %o0, which trashes the value put there - in preparation for the outer call. Upshot: we need to calculate the - args into temporary regs, and move those to arg regs or onto the - stack only immediately prior to the call proper. Sigh. --} - -genCCall - :: ForeignTarget -- function to call - -> [CmmFormal] -- where to put the result - -> [CmmActual] -- arguments (of mixed type) - -> NatM InstrBlock - - - --- On SPARC under TSO (Total Store Ordering), writes earlier in the instruction stream --- are guaranteed to take place before writes afterwards (unlike on PowerPC). --- Ref: Section 8.4 of the SPARC V9 Architecture manual. --- --- In the SPARC case we don't need a barrier. --- -genCCall (PrimTarget MO_ReadBarrier) _ _ - = return $ nilOL -genCCall (PrimTarget MO_WriteBarrier) _ _ - = return $ nilOL - -genCCall (PrimTarget (MO_Prefetch_Data _)) _ _ - = return $ nilOL - -genCCall target dest_regs args - = do -- work out the arguments, and assign them to integer regs - argcode_and_vregs <- mapM arg_to_int_vregs args - let (argcodes, vregss) = unzip argcode_and_vregs - let vregs = concat vregss - - let n_argRegs = length allArgRegs - let n_argRegs_used = min (length vregs) n_argRegs - - - -- deal with static vs dynamic call targets - callinsns <- case target of - ForeignTarget (CmmLit (CmmLabel lbl)) _ -> - return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False)) - - ForeignTarget expr _ - -> do (dyn_c, dyn_rs) <- arg_to_int_vregs expr - let dyn_r = case dyn_rs of - [dyn_r'] -> dyn_r' - _ -> panic "SPARC.CodeGen.genCCall: arg_to_int" - return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False) - - PrimTarget mop - -> do res <- outOfLineMachOp mop - lblOrMopExpr <- case res of - Left lbl -> do - return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False)) - - Right mopExpr -> do - (dyn_c, dyn_rs) <- arg_to_int_vregs mopExpr - let dyn_r = case dyn_rs of - [dyn_r'] -> dyn_r' - _ -> panic "SPARC.CodeGen.genCCall: arg_to_int" - return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False) - - return lblOrMopExpr - - let argcode = concatOL argcodes - - let (move_sp_down, move_sp_up) - = let diff = length vregs - n_argRegs - nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment - in if nn <= 0 - then (nilOL, nilOL) - else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn))) - - let transfer_code - = toOL (move_final vregs allArgRegs extraStackArgsHere) - - dflags <- getDynFlags - return - $ argcode `appOL` - move_sp_down `appOL` - transfer_code `appOL` - callinsns `appOL` - unitOL NOP `appOL` - move_sp_up `appOL` - assign_code (targetPlatform dflags) dest_regs - - --- | Generate code to calculate an argument, and move it into one --- or two integer vregs. -arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg]) -arg_to_int_vregs arg = do dflags <- getDynFlags - arg_to_int_vregs' dflags arg - -arg_to_int_vregs' :: DynFlags -> CmmExpr -> NatM (OrdList Instr, [Reg]) -arg_to_int_vregs' dflags arg - - -- If the expr produces a 64 bit int, then we can just use iselExpr64 - | isWord64 (cmmExprType dflags arg) - = do (ChildCode64 code r_lo) <- iselExpr64 arg - let r_hi = getHiVRegFromLo r_lo - return (code, [r_hi, r_lo]) - - | otherwise - = do (src, code) <- getSomeReg arg - let pk = cmmExprType dflags arg - - case cmmTypeFormat pk of - - -- Load a 64 bit float return value into two integer regs. - FF64 -> do - v1 <- getNewRegNat II32 - v2 <- getNewRegNat II32 - - let code2 = - code `snocOL` - FMOV FF64 src f0 `snocOL` - ST FF32 f0 (spRel 16) `snocOL` - LD II32 (spRel 16) v1 `snocOL` - ST FF32 f1 (spRel 16) `snocOL` - LD II32 (spRel 16) v2 - - return (code2, [v1,v2]) - - -- Load a 32 bit float return value into an integer reg - FF32 -> do - v1 <- getNewRegNat II32 - - let code2 = - code `snocOL` - ST FF32 src (spRel 16) `snocOL` - LD II32 (spRel 16) v1 - - return (code2, [v1]) - - -- Move an integer return value into its destination reg. - _ -> do - v1 <- getNewRegNat II32 - - let code2 = - code `snocOL` - OR False g0 (RIReg src) v1 - - return (code2, [v1]) - - --- | Move args from the integer vregs into which they have been --- marshalled, into %o0 .. %o5, and the rest onto the stack. --- -move_final :: [Reg] -> [Reg] -> Int -> [Instr] - --- all args done -move_final [] _ _ - = [] - --- out of aregs; move to stack -move_final (v:vs) [] offset - = ST II32 v (spRel offset) - : move_final vs [] (offset+1) - --- move into an arg (%o[0..5]) reg -move_final (v:vs) (a:az) offset - = OR False g0 (RIReg v) a - : move_final vs az offset - - --- | Assign results returned from the call into their --- destination regs. --- -assign_code :: Platform -> [LocalReg] -> OrdList Instr - -assign_code _ [] = nilOL - -assign_code platform [dest] - = let rep = localRegType dest - width = typeWidth rep - r_dest = getRegisterReg platform (CmmLocal dest) - - result - | isFloatType rep - , W32 <- width - = unitOL $ FMOV FF32 (regSingle $ fReg 0) r_dest - - | isFloatType rep - , W64 <- width - = unitOL $ FMOV FF64 (regSingle $ fReg 0) r_dest - - | not $ isFloatType rep - , W32 <- width - = unitOL $ mkRegRegMoveInstr platform (regSingle $ oReg 0) r_dest - - | not $ isFloatType rep - , W64 <- width - , r_dest_hi <- getHiVRegFromLo r_dest - = toOL [ mkRegRegMoveInstr platform (regSingle $ oReg 0) r_dest_hi - , mkRegRegMoveInstr platform (regSingle $ oReg 1) r_dest] - - | otherwise - = panic "SPARC.CodeGen.GenCCall: no match" - - in result - -assign_code _ _ - = panic "SPARC.CodeGen.GenCCall: no match" - - - --- | Generate a call to implement an out-of-line floating point operation -outOfLineMachOp - :: CallishMachOp - -> NatM (Either CLabel CmmExpr) - -outOfLineMachOp mop - = do let functionName - = outOfLineMachOp_table mop - - dflags <- getDynFlags - mopExpr <- cmmMakeDynamicReference dflags CallReference - $ mkForeignLabel functionName Nothing ForeignLabelInExternalPackage IsFunction - - let mopLabelOrExpr - = case mopExpr of - CmmLit (CmmLabel lbl) -> Left lbl - _ -> Right mopExpr - - return mopLabelOrExpr - - --- | Decide what C function to use to implement a CallishMachOp --- -outOfLineMachOp_table - :: CallishMachOp - -> FastString - -outOfLineMachOp_table mop - = case mop of - MO_F32_Exp -> fsLit "expf" - MO_F32_ExpM1 -> fsLit "expm1f" - MO_F32_Log -> fsLit "logf" - MO_F32_Log1P -> fsLit "log1pf" - MO_F32_Sqrt -> fsLit "sqrtf" - MO_F32_Fabs -> unsupported - MO_F32_Pwr -> fsLit "powf" - - MO_F32_Sin -> fsLit "sinf" - MO_F32_Cos -> fsLit "cosf" - MO_F32_Tan -> fsLit "tanf" - - MO_F32_Asin -> fsLit "asinf" - MO_F32_Acos -> fsLit "acosf" - MO_F32_Atan -> fsLit "atanf" - - MO_F32_Sinh -> fsLit "sinhf" - MO_F32_Cosh -> fsLit "coshf" - MO_F32_Tanh -> fsLit "tanhf" - - MO_F32_Asinh -> fsLit "asinhf" - MO_F32_Acosh -> fsLit "acoshf" - MO_F32_Atanh -> fsLit "atanhf" - - MO_F64_Exp -> fsLit "exp" - MO_F64_ExpM1 -> fsLit "expm1" - MO_F64_Log -> fsLit "log" - MO_F64_Log1P -> fsLit "log1p" - MO_F64_Sqrt -> fsLit "sqrt" - MO_F64_Fabs -> unsupported - MO_F64_Pwr -> fsLit "pow" - - MO_F64_Sin -> fsLit "sin" - MO_F64_Cos -> fsLit "cos" - MO_F64_Tan -> fsLit "tan" - - MO_F64_Asin -> fsLit "asin" - MO_F64_Acos -> fsLit "acos" - MO_F64_Atan -> fsLit "atan" - - MO_F64_Sinh -> fsLit "sinh" - MO_F64_Cosh -> fsLit "cosh" - MO_F64_Tanh -> fsLit "tanh" - - MO_F64_Asinh -> fsLit "asinh" - MO_F64_Acosh -> fsLit "acosh" - MO_F64_Atanh -> fsLit "atanh" - - MO_UF_Conv w -> fsLit $ word2FloatLabel w - - MO_Memcpy _ -> fsLit "memcpy" - MO_Memset _ -> fsLit "memset" - MO_Memmove _ -> fsLit "memmove" - MO_Memcmp _ -> fsLit "memcmp" - - MO_BSwap w -> fsLit $ bSwapLabel w - MO_BRev w -> fsLit $ bRevLabel w - MO_PopCnt w -> fsLit $ popCntLabel w - MO_Pdep w -> fsLit $ pdepLabel w - MO_Pext w -> fsLit $ pextLabel w - MO_Clz w -> fsLit $ clzLabel w - MO_Ctz w -> fsLit $ ctzLabel w - MO_AtomicRMW w amop -> fsLit $ atomicRMWLabel w amop - MO_Cmpxchg w -> fsLit $ cmpxchgLabel w - MO_AtomicRead w -> fsLit $ atomicReadLabel w - MO_AtomicWrite w -> fsLit $ atomicWriteLabel w - - MO_S_Mul2 {} -> unsupported - MO_S_QuotRem {} -> unsupported - MO_U_QuotRem {} -> unsupported - MO_U_QuotRem2 {} -> unsupported - MO_Add2 {} -> unsupported - MO_AddWordC {} -> unsupported - MO_SubWordC {} -> unsupported - MO_AddIntC {} -> unsupported - MO_SubIntC {} -> unsupported - MO_U_Mul2 {} -> unsupported - MO_ReadBarrier -> unsupported - MO_WriteBarrier -> unsupported - MO_Touch -> unsupported - (MO_Prefetch_Data _) -> unsupported - where unsupported = panic ("outOfLineCmmOp: " ++ show mop - ++ " not supported here") - diff --git a/compiler/nativeGen/SPARC/CodeGen/Amode.hs b/compiler/nativeGen/SPARC/CodeGen/Amode.hs deleted file mode 100644 index 5351fc054b..0000000000 --- a/compiler/nativeGen/SPARC/CodeGen/Amode.hs +++ /dev/null @@ -1,74 +0,0 @@ -module SPARC.CodeGen.Amode ( - getAmode -) - -where - -import GhcPrelude - -import {-# SOURCE #-} SPARC.CodeGen.Gen32 -import SPARC.CodeGen.Base -import SPARC.AddrMode -import SPARC.Imm -import SPARC.Instr -import SPARC.Regs -import SPARC.Base -import NCGMonad -import Format - -import GHC.Cmm - -import OrdList - - --- | Generate code to reference a memory address. -getAmode - :: CmmExpr -- ^ expr producing an address - -> NatM Amode - -getAmode tree@(CmmRegOff _ _) - = do dflags <- getDynFlags - getAmode (mangleIndexTree dflags tree) - -getAmode (CmmMachOp (MO_Sub _) [x, CmmLit (CmmInt i _)]) - | fits13Bits (-i) - = do - (reg, code) <- getSomeReg x - let - off = ImmInt (-(fromInteger i)) - return (Amode (AddrRegImm reg off) code) - - -getAmode (CmmMachOp (MO_Add _) [x, CmmLit (CmmInt i _)]) - | fits13Bits i - = do - (reg, code) <- getSomeReg x - let - off = ImmInt (fromInteger i) - return (Amode (AddrRegImm reg off) code) - -getAmode (CmmMachOp (MO_Add _) [x, y]) - = do - (regX, codeX) <- getSomeReg x - (regY, codeY) <- getSomeReg y - let - code = codeX `appOL` codeY - return (Amode (AddrRegReg regX regY) code) - -getAmode (CmmLit lit) - = do - let imm__2 = litToImm lit - tmp1 <- getNewRegNat II32 - tmp2 <- getNewRegNat II32 - - let code = toOL [ SETHI (HI imm__2) tmp1 - , OR False tmp1 (RIImm (LO imm__2)) tmp2] - - return (Amode (AddrRegReg tmp2 g0) code) - -getAmode other - = do - (reg, code) <- getSomeReg other - let - off = ImmInt 0 - return (Amode (AddrRegImm reg off) code) diff --git a/compiler/nativeGen/SPARC/CodeGen/Base.hs b/compiler/nativeGen/SPARC/CodeGen/Base.hs deleted file mode 100644 index ba07f2311d..0000000000 --- a/compiler/nativeGen/SPARC/CodeGen/Base.hs +++ /dev/null @@ -1,119 +0,0 @@ -module SPARC.CodeGen.Base ( - InstrBlock, - CondCode(..), - ChildCode64(..), - Amode(..), - - Register(..), - setFormatOfRegister, - - getRegisterReg, - mangleIndexTree -) - -where - -import GhcPrelude - -import SPARC.Instr -import SPARC.Cond -import SPARC.AddrMode -import SPARC.Regs -import Format -import Reg - -import GHC.Platform.Regs -import GHC.Driver.Session -import GHC.Cmm -import GHC.Cmm.Ppr.Expr () -- For Outputable instances -import GHC.Platform - -import Outputable -import OrdList - --------------------------------------------------------------------------------- --- | 'InstrBlock's are the insn sequences generated by the insn selectors. --- They are really trees of insns to facilitate fast appending, where a --- left-to-right traversal yields the insns in the correct order. --- -type InstrBlock - = OrdList Instr - - --- | Condition codes passed up the tree. --- -data CondCode - = CondCode Bool Cond InstrBlock - - --- | a.k.a "Register64" --- Reg is the lower 32-bit temporary which contains the result. --- Use getHiVRegFromLo to find the other VRegUnique. --- --- Rules of this simplified insn selection game are therefore that --- the returned Reg may be modified --- -data ChildCode64 - = ChildCode64 - InstrBlock - Reg - - --- | Holds code that references a memory address. -data Amode - = Amode - -- the AddrMode we can use in the instruction - -- that does the real load\/store. - AddrMode - - -- other setup code we have to run first before we can use the - -- above AddrMode. - InstrBlock - - - --------------------------------------------------------------------------------- --- | Code to produce a result into a register. --- If the result must go in a specific register, it comes out as Fixed. --- Otherwise, the parent can decide which register to put it in. --- -data Register - = Fixed Format Reg InstrBlock - | Any Format (Reg -> InstrBlock) - - --- | Change the format field in a Register. -setFormatOfRegister - :: Register -> Format -> Register - -setFormatOfRegister reg format - = case reg of - Fixed _ reg code -> Fixed format reg code - Any _ codefn -> Any format codefn - - --------------------------------------------------------------------------------- --- | Grab the Reg for a CmmReg -getRegisterReg :: Platform -> CmmReg -> Reg - -getRegisterReg _ (CmmLocal (LocalReg u pk)) - = RegVirtual $ mkVirtualReg u (cmmTypeFormat pk) - -getRegisterReg platform (CmmGlobal mid) - = case globalRegMaybe platform mid of - Just reg -> RegReal reg - Nothing -> pprPanic - "SPARC.CodeGen.Base.getRegisterReg: global is in memory" - (ppr $ CmmGlobal mid) - - --- Expand CmmRegOff. ToDo: should we do it this way around, or convert --- CmmExprs into CmmRegOff? -mangleIndexTree :: DynFlags -> CmmExpr -> CmmExpr - -mangleIndexTree dflags (CmmRegOff reg off) - = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)] - where width = typeWidth (cmmRegType dflags reg) - -mangleIndexTree _ _ - = panic "SPARC.CodeGen.Base.mangleIndexTree: no match" diff --git a/compiler/nativeGen/SPARC/CodeGen/CondCode.hs b/compiler/nativeGen/SPARC/CodeGen/CondCode.hs deleted file mode 100644 index 892cbb1a8f..0000000000 --- a/compiler/nativeGen/SPARC/CodeGen/CondCode.hs +++ /dev/null @@ -1,110 +0,0 @@ -module SPARC.CodeGen.CondCode ( - getCondCode, - condIntCode, - condFltCode -) - -where - -import GhcPrelude - -import {-# SOURCE #-} SPARC.CodeGen.Gen32 -import SPARC.CodeGen.Base -import SPARC.Instr -import SPARC.Regs -import SPARC.Cond -import SPARC.Imm -import SPARC.Base -import NCGMonad -import Format - -import GHC.Cmm - -import OrdList -import Outputable - - -getCondCode :: CmmExpr -> NatM CondCode -getCondCode (CmmMachOp mop [x, y]) - = - case mop of - MO_F_Eq W32 -> condFltCode EQQ x y - MO_F_Ne W32 -> condFltCode NE x y - MO_F_Gt W32 -> condFltCode GTT x y - MO_F_Ge W32 -> condFltCode GE x y - MO_F_Lt W32 -> condFltCode LTT x y - MO_F_Le W32 -> condFltCode LE x y - - MO_F_Eq W64 -> condFltCode EQQ x y - MO_F_Ne W64 -> condFltCode NE x y - MO_F_Gt W64 -> condFltCode GTT x y - MO_F_Ge W64 -> condFltCode GE x y - MO_F_Lt W64 -> condFltCode LTT x y - MO_F_Le W64 -> condFltCode LE x y - - MO_Eq _ -> condIntCode EQQ x y - MO_Ne _ -> condIntCode NE x y - - MO_S_Gt _ -> condIntCode GTT x y - MO_S_Ge _ -> condIntCode GE x y - MO_S_Lt _ -> condIntCode LTT x y - MO_S_Le _ -> condIntCode LE x y - - MO_U_Gt _ -> condIntCode GU x y - MO_U_Ge _ -> condIntCode GEU x y - MO_U_Lt _ -> condIntCode LU x y - MO_U_Le _ -> condIntCode LEU x y - - _ -> pprPanic "SPARC.CodeGen.CondCode.getCondCode" (ppr (CmmMachOp mop [x,y])) - -getCondCode other = pprPanic "SPARC.CodeGen.CondCode.getCondCode" (ppr other) - - - - - --- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be --- passed back up the tree. - -condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode -condIntCode cond x (CmmLit (CmmInt y _)) - | fits13Bits y - = do - (src1, code) <- getSomeReg x - let - src2 = ImmInt (fromInteger y) - code' = code `snocOL` SUB False True src1 (RIImm src2) g0 - return (CondCode False cond code') - -condIntCode cond x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - let - code__2 = code1 `appOL` code2 `snocOL` - SUB False True src1 (RIReg src2) g0 - return (CondCode False cond code__2) - - -condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode -condFltCode cond x y = do - dflags <- getDynFlags - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - tmp <- getNewRegNat FF64 - let - promote x = FxTOy FF32 FF64 x tmp - - pk1 = cmmExprType dflags x - pk2 = cmmExprType dflags y - - code__2 = - if pk1 `cmmEqType` pk2 then - code1 `appOL` code2 `snocOL` - FCMP True (cmmTypeFormat pk1) src1 src2 - else if typeWidth pk1 == W32 then - code1 `snocOL` promote src1 `appOL` code2 `snocOL` - FCMP True FF64 tmp src2 - else - code1 `appOL` code2 `snocOL` promote src2 `snocOL` - FCMP True FF64 src1 tmp - return (CondCode True cond code__2) diff --git a/compiler/nativeGen/SPARC/CodeGen/Expand.hs b/compiler/nativeGen/SPARC/CodeGen/Expand.hs deleted file mode 100644 index b6d78a9f79..0000000000 --- a/compiler/nativeGen/SPARC/CodeGen/Expand.hs +++ /dev/null @@ -1,156 +0,0 @@ -{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} - --- | Expand out synthetic instructions into single machine instrs. -module SPARC.CodeGen.Expand ( - expandTop -) - -where - -import GhcPrelude - -import SPARC.Instr -import SPARC.Imm -import SPARC.AddrMode -import SPARC.Regs -import Instruction -import Reg -import Format -import GHC.Cmm - - -import Outputable -import OrdList - --- | Expand out synthetic instructions in this top level thing -expandTop :: NatCmmDecl RawCmmStatics Instr -> NatCmmDecl RawCmmStatics Instr -expandTop top@(CmmData{}) - = top - -expandTop (CmmProc info lbl live (ListGraph blocks)) - = CmmProc info lbl live (ListGraph $ map expandBlock blocks) - - --- | Expand out synthetic instructions in this block -expandBlock :: NatBasicBlock Instr -> NatBasicBlock Instr - -expandBlock (BasicBlock label instrs) - = let instrs_ol = expandBlockInstrs instrs - instrs' = fromOL instrs_ol - in BasicBlock label instrs' - - --- | Expand out some instructions -expandBlockInstrs :: [Instr] -> OrdList Instr -expandBlockInstrs [] = nilOL - -expandBlockInstrs (ii:is) - = let ii_doubleRegs = remapRegPair ii - is_misaligned = expandMisalignedDoubles ii_doubleRegs - - in is_misaligned `appOL` expandBlockInstrs is - - - --- | In the SPARC instruction set the FP register pairs that are used --- to hold 64 bit floats are referred to by just the first reg --- of the pair. Remap our internal reg pairs to the appropriate reg. --- --- For example: --- ldd [%l1], (%f0 | %f1) --- --- gets mapped to --- ldd [$l1], %f0 --- -remapRegPair :: Instr -> Instr -remapRegPair instr - = let patchF reg - = case reg of - RegReal (RealRegSingle _) - -> reg - - RegReal (RealRegPair r1 r2) - - -- sanity checking - | r1 >= 32 - , r1 <= 63 - , r1 `mod` 2 == 0 - , r2 == r1 + 1 - -> RegReal (RealRegSingle r1) - - | otherwise - -> pprPanic "SPARC.CodeGen.Expand: not remapping dodgy looking reg pair " (ppr reg) - - RegVirtual _ - -> pprPanic "SPARC.CodeGen.Expand: not remapping virtual reg " (ppr reg) - - in patchRegsOfInstr instr patchF - - - - --- Expand out 64 bit load/stores into individual instructions to handle --- possible double alignment problems. --- --- TODO: It'd be better to use a scratch reg instead of the add/sub thing. --- We might be able to do this faster if we use the UA2007 instr set --- instead of restricting ourselves to SPARC V9. --- -expandMisalignedDoubles :: Instr -> OrdList Instr -expandMisalignedDoubles instr - - -- Translate to: - -- add g1,g2,g1 - -- ld [g1],%fn - -- ld [g1+4],%f(n+1) - -- sub g1,g2,g1 -- to restore g1 - | LD FF64 (AddrRegReg r1 r2) fReg <- instr - = toOL [ ADD False False r1 (RIReg r2) r1 - , LD FF32 (AddrRegReg r1 g0) fReg - , LD FF32 (AddrRegImm r1 (ImmInt 4)) (fRegHi fReg) - , SUB False False r1 (RIReg r2) r1 ] - - -- Translate to - -- ld [addr],%fn - -- ld [addr+4],%f(n+1) - | LD FF64 addr fReg <- instr - = let Just addr' = addrOffset addr 4 - in toOL [ LD FF32 addr fReg - , LD FF32 addr' (fRegHi fReg) ] - - -- Translate to: - -- add g1,g2,g1 - -- st %fn,[g1] - -- st %f(n+1),[g1+4] - -- sub g1,g2,g1 -- to restore g1 - | ST FF64 fReg (AddrRegReg r1 r2) <- instr - = toOL [ ADD False False r1 (RIReg r2) r1 - , ST FF32 fReg (AddrRegReg r1 g0) - , ST FF32 (fRegHi fReg) (AddrRegImm r1 (ImmInt 4)) - , SUB False False r1 (RIReg r2) r1 ] - - -- Translate to - -- ld [addr],%fn - -- ld [addr+4],%f(n+1) - | ST FF64 fReg addr <- instr - = let Just addr' = addrOffset addr 4 - in toOL [ ST FF32 fReg addr - , ST FF32 (fRegHi fReg) addr' ] - - -- some other instr - | otherwise - = unitOL instr - - - --- | The high partner for this float reg. -fRegHi :: Reg -> Reg -fRegHi (RegReal (RealRegSingle r1)) - | r1 >= 32 - , r1 <= 63 - , r1 `mod` 2 == 0 - = (RegReal $ RealRegSingle (r1 + 1)) - --- Can't take high partner for non-low reg. -fRegHi reg - = pprPanic "SPARC.CodeGen.Expand: can't take fRegHi from " (ppr reg) diff --git a/compiler/nativeGen/SPARC/CodeGen/Gen32.hs b/compiler/nativeGen/SPARC/CodeGen/Gen32.hs deleted file mode 100644 index 4f3409ebc5..0000000000 --- a/compiler/nativeGen/SPARC/CodeGen/Gen32.hs +++ /dev/null @@ -1,692 +0,0 @@ --- | Evaluation of 32 bit values. -module SPARC.CodeGen.Gen32 ( - getSomeReg, - getRegister -) - -where - -import GhcPrelude - -import SPARC.CodeGen.CondCode -import SPARC.CodeGen.Amode -import SPARC.CodeGen.Gen64 -import SPARC.CodeGen.Base -import SPARC.Stack -import SPARC.Instr -import SPARC.Cond -import SPARC.AddrMode -import SPARC.Imm -import SPARC.Regs -import SPARC.Base -import NCGMonad -import Format -import Reg - -import GHC.Cmm - -import Control.Monad (liftM) -import GHC.Driver.Session -import OrdList -import Outputable - --- | The dual to getAnyReg: compute an expression into a register, but --- we don't mind which one it is. -getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock) -getSomeReg expr = do - r <- getRegister expr - case r of - Any rep code -> do - tmp <- getNewRegNat rep - return (tmp, code tmp) - Fixed _ reg code -> - return (reg, code) - - - --- | Make code to evaluate a 32 bit expression. --- -getRegister :: CmmExpr -> NatM Register - -getRegister (CmmReg reg) - = do dflags <- getDynFlags - let platform = targetPlatform dflags - return (Fixed (cmmTypeFormat (cmmRegType dflags reg)) - (getRegisterReg platform reg) nilOL) - -getRegister tree@(CmmRegOff _ _) - = do dflags <- getDynFlags - getRegister (mangleIndexTree dflags tree) - -getRegister (CmmMachOp (MO_UU_Conv W64 W32) - [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do - ChildCode64 code rlo <- iselExpr64 x - return $ Fixed II32 (getHiVRegFromLo rlo) code - -getRegister (CmmMachOp (MO_SS_Conv W64 W32) - [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do - ChildCode64 code rlo <- iselExpr64 x - return $ Fixed II32 (getHiVRegFromLo rlo) code - -getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do - ChildCode64 code rlo <- iselExpr64 x - return $ Fixed II32 rlo code - -getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do - ChildCode64 code rlo <- iselExpr64 x - return $ Fixed II32 rlo code - - --- Load a literal float into a float register. --- The actual literal is stored in a new data area, and we load it --- at runtime. -getRegister (CmmLit (CmmFloat f W32)) = do - - -- a label for the new data area - lbl <- getNewLabelNat - tmp <- getNewRegNat II32 - - let code dst = toOL [ - -- the data area - LDATA (Section ReadOnlyData lbl) $ RawCmmStatics lbl - [CmmStaticLit (CmmFloat f W32)], - - -- load the literal - SETHI (HI (ImmCLbl lbl)) tmp, - LD II32 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst] - - return (Any FF32 code) - -getRegister (CmmLit (CmmFloat d W64)) = do - lbl <- getNewLabelNat - tmp <- getNewRegNat II32 - let code dst = toOL [ - LDATA (Section ReadOnlyData lbl) $ RawCmmStatics lbl - [CmmStaticLit (CmmFloat d W64)], - SETHI (HI (ImmCLbl lbl)) tmp, - LD II64 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst] - return (Any FF64 code) - - --- Unary machine ops -getRegister (CmmMachOp mop [x]) - = case mop of - -- Floating point negation ------------------------- - MO_F_Neg W32 -> trivialUFCode FF32 (FNEG FF32) x - MO_F_Neg W64 -> trivialUFCode FF64 (FNEG FF64) x - - - -- Integer negation -------------------------------- - MO_S_Neg rep -> trivialUCode (intFormat rep) (SUB False False g0) x - MO_Not rep -> trivialUCode (intFormat rep) (XNOR False g0) x - - - -- Float word size conversion ---------------------- - MO_FF_Conv W64 W32 -> coerceDbl2Flt x - MO_FF_Conv W32 W64 -> coerceFlt2Dbl x - - - -- Float <-> Signed Int conversion ----------------- - MO_FS_Conv from to -> coerceFP2Int from to x - MO_SF_Conv from to -> coerceInt2FP from to x - - - -- Unsigned integer word size conversions ---------- - - -- If it's the same size, then nothing needs to be done. - MO_UU_Conv from to - | from == to -> conversionNop (intFormat to) x - - -- To narrow an unsigned word, mask out the high bits to simulate what would - -- happen if we copied the value into a smaller register. - MO_UU_Conv W16 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8)) - MO_UU_Conv W32 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8)) - - -- for narrowing 32 bit to 16 bit, don't use a literal mask value like the W16->W8 - -- case because the only way we can load it is via SETHI, which needs 2 ops. - -- Do some shifts to chop out the high bits instead. - MO_UU_Conv W32 W16 - -> do tmpReg <- getNewRegNat II32 - (xReg, xCode) <- getSomeReg x - let code dst - = xCode - `appOL` toOL - [ SLL xReg (RIImm $ ImmInt 16) tmpReg - , SRL tmpReg (RIImm $ ImmInt 16) dst] - - return $ Any II32 code - - -- trivialCode W16 (AND False) x (CmmLit (CmmInt 65535 W16)) - - -- To widen an unsigned word we don't have to do anything. - -- Just leave it in the same register and mark the result as the new size. - MO_UU_Conv W8 W16 -> conversionNop (intFormat W16) x - MO_UU_Conv W8 W32 -> conversionNop (intFormat W32) x - MO_UU_Conv W16 W32 -> conversionNop (intFormat W32) x - - - -- Signed integer word size conversions ------------ - - -- Mask out high bits when narrowing them - MO_SS_Conv W16 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8)) - MO_SS_Conv W32 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8)) - MO_SS_Conv W32 W16 -> trivialCode W16 (AND False) x (CmmLit (CmmInt 65535 W16)) - - -- Sign extend signed words when widening them. - MO_SS_Conv W8 W16 -> integerExtend W8 W16 x - MO_SS_Conv W8 W32 -> integerExtend W8 W32 x - MO_SS_Conv W16 W32 -> integerExtend W16 W32 x - - _ -> panic ("Unknown unary mach op: " ++ show mop) - - --- Binary machine ops -getRegister (CmmMachOp mop [x, y]) - = case mop of - MO_Eq _ -> condIntReg EQQ x y - MO_Ne _ -> condIntReg NE x y - - MO_S_Gt _ -> condIntReg GTT x y - MO_S_Ge _ -> condIntReg GE x y - MO_S_Lt _ -> condIntReg LTT x y - MO_S_Le _ -> condIntReg LE x y - - MO_U_Gt W32 -> condIntReg GU x y - MO_U_Ge W32 -> condIntReg GEU x y - MO_U_Lt W32 -> condIntReg LU x y - MO_U_Le W32 -> condIntReg LEU x y - - MO_U_Gt W16 -> condIntReg GU x y - MO_U_Ge W16 -> condIntReg GEU x y - MO_U_Lt W16 -> condIntReg LU x y - MO_U_Le W16 -> condIntReg LEU x y - - MO_Add W32 -> trivialCode W32 (ADD False False) x y - MO_Sub W32 -> trivialCode W32 (SUB False False) x y - - MO_S_MulMayOflo rep -> imulMayOflo rep x y - - MO_S_Quot W32 -> idiv True False x y - MO_U_Quot W32 -> idiv False False x y - - MO_S_Rem W32 -> irem True x y - MO_U_Rem W32 -> irem False x y - - MO_F_Eq _ -> condFltReg EQQ x y - MO_F_Ne _ -> condFltReg NE x y - - MO_F_Gt _ -> condFltReg GTT x y - MO_F_Ge _ -> condFltReg GE x y - MO_F_Lt _ -> condFltReg LTT x y - MO_F_Le _ -> condFltReg LE x y - - MO_F_Add w -> trivialFCode w FADD x y - MO_F_Sub w -> trivialFCode w FSUB x y - MO_F_Mul w -> trivialFCode w FMUL x y - MO_F_Quot w -> trivialFCode w FDIV x y - - MO_And rep -> trivialCode rep (AND False) x y - MO_Or rep -> trivialCode rep (OR False) x y - MO_Xor rep -> trivialCode rep (XOR False) x y - - MO_Mul rep -> trivialCode rep (SMUL False) x y - - MO_Shl rep -> trivialCode rep SLL x y - MO_U_Shr rep -> trivialCode rep SRL x y - MO_S_Shr rep -> trivialCode rep SRA x y - - _ -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop) - -getRegister (CmmLoad mem pk) = do - Amode src code <- getAmode mem - let - code__2 dst = code `snocOL` LD (cmmTypeFormat pk) src dst - return (Any (cmmTypeFormat pk) code__2) - -getRegister (CmmLit (CmmInt i _)) - | fits13Bits i - = let - src = ImmInt (fromInteger i) - code dst = unitOL (OR False g0 (RIImm src) dst) - in - return (Any II32 code) - -getRegister (CmmLit lit) - = let imm = litToImm lit - code dst = toOL [ - SETHI (HI imm) dst, - OR False dst (RIImm (LO imm)) dst] - in return (Any II32 code) - - -getRegister _ - = panic "SPARC.CodeGen.Gen32.getRegister: no match" - - --- | sign extend and widen -integerExtend - :: Width -- ^ width of source expression - -> Width -- ^ width of result - -> CmmExpr -- ^ source expression - -> NatM Register - -integerExtend from to expr - = do -- load the expr into some register - (reg, e_code) <- getSomeReg expr - tmp <- getNewRegNat II32 - let bitCount - = case (from, to) of - (W8, W32) -> 24 - (W16, W32) -> 16 - (W8, W16) -> 24 - _ -> panic "SPARC.CodeGen.Gen32: no match" - let code dst - = e_code - - -- local shift word left to load the sign bit - `snocOL` SLL reg (RIImm (ImmInt bitCount)) tmp - - -- arithmetic shift right to sign extend - `snocOL` SRA tmp (RIImm (ImmInt bitCount)) dst - - return (Any (intFormat to) code) - - --- | For nop word format conversions we set the resulting value to have the --- required size, but don't need to generate any actual code. --- -conversionNop - :: Format -> CmmExpr -> NatM Register - -conversionNop new_rep expr - = do e_code <- getRegister expr - return (setFormatOfRegister e_code new_rep) - - - --- | Generate an integer division instruction. -idiv :: Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register - --- For unsigned division with a 32 bit numerator, --- we can just clear the Y register. -idiv False cc x y - = do - (a_reg, a_code) <- getSomeReg x - (b_reg, b_code) <- getSomeReg y - - let code dst - = a_code - `appOL` b_code - `appOL` toOL - [ WRY g0 g0 - , UDIV cc a_reg (RIReg b_reg) dst] - - return (Any II32 code) - - --- For _signed_ division with a 32 bit numerator, --- we have to sign extend the numerator into the Y register. -idiv True cc x y - = do - (a_reg, a_code) <- getSomeReg x - (b_reg, b_code) <- getSomeReg y - - tmp <- getNewRegNat II32 - - let code dst - = a_code - `appOL` b_code - `appOL` toOL - [ SRA a_reg (RIImm (ImmInt 16)) tmp -- sign extend - , SRA tmp (RIImm (ImmInt 16)) tmp - - , WRY tmp g0 - , SDIV cc a_reg (RIReg b_reg) dst] - - return (Any II32 code) - - --- | Do an integer remainder. --- --- NOTE: The SPARC v8 architecture manual says that integer division --- instructions _may_ generate a remainder, depending on the implementation. --- If so it is _recommended_ that the remainder is placed in the Y register. --- --- The UltraSparc 2007 manual says Y is _undefined_ after division. --- --- The SPARC T2 doesn't store the remainder, not sure about the others. --- It's probably best not to worry about it, and just generate our own --- remainders. --- -irem :: Bool -> CmmExpr -> CmmExpr -> NatM Register - --- For unsigned operands: --- Division is between a 64 bit numerator and a 32 bit denominator, --- so we still have to clear the Y register. -irem False x y - = do - (a_reg, a_code) <- getSomeReg x - (b_reg, b_code) <- getSomeReg y - - tmp_reg <- getNewRegNat II32 - - let code dst - = a_code - `appOL` b_code - `appOL` toOL - [ WRY g0 g0 - , UDIV False a_reg (RIReg b_reg) tmp_reg - , UMUL False tmp_reg (RIReg b_reg) tmp_reg - , SUB False False a_reg (RIReg tmp_reg) dst] - - return (Any II32 code) - - - --- For signed operands: --- Make sure to sign extend into the Y register, or the remainder --- will have the wrong sign when the numerator is negative. --- --- TODO: When sign extending, GCC only shifts the a_reg right by 17 bits, --- not the full 32. Not sure why this is, something to do with overflow? --- If anyone cares enough about the speed of signed remainder they --- can work it out themselves (then tell me). -- BL 2009/01/20 -irem True x y - = do - (a_reg, a_code) <- getSomeReg x - (b_reg, b_code) <- getSomeReg y - - tmp1_reg <- getNewRegNat II32 - tmp2_reg <- getNewRegNat II32 - - let code dst - = a_code - `appOL` b_code - `appOL` toOL - [ SRA a_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend - , SRA tmp1_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend - , WRY tmp1_reg g0 - - , SDIV False a_reg (RIReg b_reg) tmp2_reg - , SMUL False tmp2_reg (RIReg b_reg) tmp2_reg - , SUB False False a_reg (RIReg tmp2_reg) dst] - - return (Any II32 code) - - -imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register -imulMayOflo rep a b - = do - (a_reg, a_code) <- getSomeReg a - (b_reg, b_code) <- getSomeReg b - res_lo <- getNewRegNat II32 - res_hi <- getNewRegNat II32 - - let shift_amt = case rep of - W32 -> 31 - W64 -> 63 - _ -> panic "shift_amt" - - let code dst = a_code `appOL` b_code `appOL` - toOL [ - SMUL False a_reg (RIReg b_reg) res_lo, - RDY res_hi, - SRA res_lo (RIImm (ImmInt shift_amt)) res_lo, - SUB False False res_lo (RIReg res_hi) dst - ] - return (Any II32 code) - - --- ----------------------------------------------------------------------------- --- 'trivial*Code': deal with trivial instructions - --- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode', --- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions. --- Only look for constants on the right hand side, because that's --- where the generic optimizer will have put them. - --- Similarly, for unary instructions, we don't have to worry about --- matching an StInt as the argument, because genericOpt will already --- have handled the constant-folding. - -trivialCode - :: Width - -> (Reg -> RI -> Reg -> Instr) - -> CmmExpr - -> CmmExpr - -> NatM Register - -trivialCode _ instr x (CmmLit (CmmInt y _)) - | fits13Bits y - = do - (src1, code) <- getSomeReg x - let - src2 = ImmInt (fromInteger y) - code__2 dst = code `snocOL` instr src1 (RIImm src2) dst - return (Any II32 code__2) - - -trivialCode _ instr x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - let - code__2 dst = code1 `appOL` code2 `snocOL` - instr src1 (RIReg src2) dst - return (Any II32 code__2) - - -trivialFCode - :: Width - -> (Format -> Reg -> Reg -> Reg -> Instr) - -> CmmExpr - -> CmmExpr - -> NatM Register - -trivialFCode pk instr x y = do - dflags <- getDynFlags - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - tmp <- getNewRegNat FF64 - let - promote x = FxTOy FF32 FF64 x tmp - - pk1 = cmmExprType dflags x - pk2 = cmmExprType dflags y - - code__2 dst = - if pk1 `cmmEqType` pk2 then - code1 `appOL` code2 `snocOL` - instr (floatFormat pk) src1 src2 dst - else if typeWidth pk1 == W32 then - code1 `snocOL` promote src1 `appOL` code2 `snocOL` - instr FF64 tmp src2 dst - else - code1 `appOL` code2 `snocOL` promote src2 `snocOL` - instr FF64 src1 tmp dst - return (Any (cmmTypeFormat $ if pk1 `cmmEqType` pk2 then pk1 else cmmFloat W64) - code__2) - - - -trivialUCode - :: Format - -> (RI -> Reg -> Instr) - -> CmmExpr - -> NatM Register - -trivialUCode format instr x = do - (src, code) <- getSomeReg x - let - code__2 dst = code `snocOL` instr (RIReg src) dst - return (Any format code__2) - - -trivialUFCode - :: Format - -> (Reg -> Reg -> Instr) - -> CmmExpr - -> NatM Register - -trivialUFCode pk instr x = do - (src, code) <- getSomeReg x - let - code__2 dst = code `snocOL` instr src dst - return (Any pk code__2) - - - - --- Coercions ------------------------------------------------------------------- - --- | Coerce a integer value to floating point -coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register -coerceInt2FP width1 width2 x = do - (src, code) <- getSomeReg x - let - code__2 dst = code `appOL` toOL [ - ST (intFormat width1) src (spRel (-2)), - LD (intFormat width1) (spRel (-2)) dst, - FxTOy (intFormat width1) (floatFormat width2) dst dst] - return (Any (floatFormat $ width2) code__2) - - - --- | Coerce a floating point value to integer --- --- NOTE: On sparc v9 there are no instructions to move a value from an --- FP register directly to an int register, so we have to use a load/store. --- -coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register -coerceFP2Int width1 width2 x - = do let fformat1 = floatFormat width1 - fformat2 = floatFormat width2 - - iformat2 = intFormat width2 - - (fsrc, code) <- getSomeReg x - fdst <- getNewRegNat fformat2 - - let code2 dst - = code - `appOL` toOL - -- convert float to int format, leaving it in a float reg. - [ FxTOy fformat1 iformat2 fsrc fdst - - -- store the int into mem, then load it back to move - -- it into an actual int reg. - , ST fformat2 fdst (spRel (-2)) - , LD iformat2 (spRel (-2)) dst] - - return (Any iformat2 code2) - - --- | Coerce a double precision floating point value to single precision. -coerceDbl2Flt :: CmmExpr -> NatM Register -coerceDbl2Flt x = do - (src, code) <- getSomeReg x - return (Any FF32 (\dst -> code `snocOL` FxTOy FF64 FF32 src dst)) - - --- | Coerce a single precision floating point value to double precision -coerceFlt2Dbl :: CmmExpr -> NatM Register -coerceFlt2Dbl x = do - (src, code) <- getSomeReg x - return (Any FF64 (\dst -> code `snocOL` FxTOy FF32 FF64 src dst)) - - - - --- Condition Codes ------------------------------------------------------------- --- --- Evaluate a comparison, and get the result into a register. --- --- Do not fill the delay slots here. you will confuse the register allocator. --- -condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register -condIntReg EQQ x (CmmLit (CmmInt 0 _)) = do - (src, code) <- getSomeReg x - let - code__2 dst = code `appOL` toOL [ - SUB False True g0 (RIReg src) g0, - SUB True False g0 (RIImm (ImmInt (-1))) dst] - return (Any II32 code__2) - -condIntReg EQQ x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - let - code__2 dst = code1 `appOL` code2 `appOL` toOL [ - XOR False src1 (RIReg src2) dst, - SUB False True g0 (RIReg dst) g0, - SUB True False g0 (RIImm (ImmInt (-1))) dst] - return (Any II32 code__2) - -condIntReg NE x (CmmLit (CmmInt 0 _)) = do - (src, code) <- getSomeReg x - let - code__2 dst = code `appOL` toOL [ - SUB False True g0 (RIReg src) g0, - ADD True False g0 (RIImm (ImmInt 0)) dst] - return (Any II32 code__2) - -condIntReg NE x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - let - code__2 dst = code1 `appOL` code2 `appOL` toOL [ - XOR False src1 (RIReg src2) dst, - SUB False True g0 (RIReg dst) g0, - ADD True False g0 (RIImm (ImmInt 0)) dst] - return (Any II32 code__2) - -condIntReg cond x y = do - bid1 <- liftM (\a -> seq a a) getBlockIdNat - bid2 <- liftM (\a -> seq a a) getBlockIdNat - CondCode _ cond cond_code <- condIntCode cond x y - let - code__2 dst - = cond_code - `appOL` toOL - [ BI cond False bid1 - , NOP - - , OR False g0 (RIImm (ImmInt 0)) dst - , BI ALWAYS False bid2 - , NOP - - , NEWBLOCK bid1 - , OR False g0 (RIImm (ImmInt 1)) dst - , BI ALWAYS False bid2 - , NOP - - , NEWBLOCK bid2] - - return (Any II32 code__2) - - -condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register -condFltReg cond x y = do - bid1 <- liftM (\a -> seq a a) getBlockIdNat - bid2 <- liftM (\a -> seq a a) getBlockIdNat - - CondCode _ cond cond_code <- condFltCode cond x y - let - code__2 dst - = cond_code - `appOL` toOL - [ NOP - , BF cond False bid1 - , NOP - - , OR False g0 (RIImm (ImmInt 0)) dst - , BI ALWAYS False bid2 - , NOP - - , NEWBLOCK bid1 - , OR False g0 (RIImm (ImmInt 1)) dst - , BI ALWAYS False bid2 - , NOP - - , NEWBLOCK bid2 ] - - return (Any II32 code__2) diff --git a/compiler/nativeGen/SPARC/CodeGen/Gen32.hs-boot b/compiler/nativeGen/SPARC/CodeGen/Gen32.hs-boot deleted file mode 100644 index 1dbd2d3612..0000000000 --- a/compiler/nativeGen/SPARC/CodeGen/Gen32.hs-boot +++ /dev/null @@ -1,16 +0,0 @@ - -module SPARC.CodeGen.Gen32 ( - getSomeReg, - getRegister -) - -where - -import SPARC.CodeGen.Base -import NCGMonad -import Reg - -import GHC.Cmm - -getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock) -getRegister :: CmmExpr -> NatM Register diff --git a/compiler/nativeGen/SPARC/CodeGen/Gen64.hs b/compiler/nativeGen/SPARC/CodeGen/Gen64.hs deleted file mode 100644 index 7c04101ec4..0000000000 --- a/compiler/nativeGen/SPARC/CodeGen/Gen64.hs +++ /dev/null @@ -1,216 +0,0 @@ --- | Evaluation of 64 bit values on 32 bit platforms. -module SPARC.CodeGen.Gen64 ( - assignMem_I64Code, - assignReg_I64Code, - iselExpr64 -) - -where - -import GhcPrelude - -import {-# SOURCE #-} SPARC.CodeGen.Gen32 -import SPARC.CodeGen.Base -import SPARC.CodeGen.Amode -import SPARC.Regs -import SPARC.AddrMode -import SPARC.Imm -import SPARC.Instr --- import SPARC.Ppr() -import NCGMonad -import Instruction -import Format -import Reg - -import GHC.Cmm - -import GHC.Driver.Session -import OrdList -import Outputable - --- | Code to assign a 64 bit value to memory. -assignMem_I64Code - :: CmmExpr -- ^ expr producing the destination address - -> CmmExpr -- ^ expr producing the source value. - -> NatM InstrBlock - -assignMem_I64Code addrTree valueTree - = do - ChildCode64 vcode rlo <- iselExpr64 valueTree - - (src, acode) <- getSomeReg addrTree - let - rhi = getHiVRegFromLo rlo - - -- Big-endian store - mov_hi = ST II32 rhi (AddrRegImm src (ImmInt 0)) - mov_lo = ST II32 rlo (AddrRegImm src (ImmInt 4)) - - code = vcode `appOL` acode `snocOL` mov_hi `snocOL` mov_lo - -{- pprTrace "assignMem_I64Code" - (vcat [ text "addrTree: " <+> ppr addrTree - , text "valueTree: " <+> ppr valueTree - , text "vcode:" - , vcat $ map ppr $ fromOL vcode - , text "" - , text "acode:" - , vcat $ map ppr $ fromOL acode ]) - $ -} - return code - - --- | Code to assign a 64 bit value to a register. -assignReg_I64Code - :: CmmReg -- ^ the destination register - -> CmmExpr -- ^ expr producing the source value - -> NatM InstrBlock - -assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree - = do - ChildCode64 vcode r_src_lo <- iselExpr64 valueTree - let - r_dst_lo = RegVirtual $ mkVirtualReg u_dst (cmmTypeFormat pk) - r_dst_hi = getHiVRegFromLo r_dst_lo - r_src_hi = getHiVRegFromLo r_src_lo - mov_lo = mkMOV r_src_lo r_dst_lo - mov_hi = mkMOV r_src_hi r_dst_hi - mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg - - return (vcode `snocOL` mov_hi `snocOL` mov_lo) - -assignReg_I64Code _ _ - = panic "assignReg_I64Code(sparc): invalid lvalue" - - - - --- | Get the value of an expression into a 64 bit register. - -iselExpr64 :: CmmExpr -> NatM ChildCode64 - --- Load a 64 bit word -iselExpr64 (CmmLoad addrTree ty) - | isWord64 ty - = do Amode amode addr_code <- getAmode addrTree - let result - - | AddrRegReg r1 r2 <- amode - = do rlo <- getNewRegNat II32 - tmp <- getNewRegNat II32 - let rhi = getHiVRegFromLo rlo - - return $ ChildCode64 - ( addr_code - `appOL` toOL - [ ADD False False r1 (RIReg r2) tmp - , LD II32 (AddrRegImm tmp (ImmInt 0)) rhi - , LD II32 (AddrRegImm tmp (ImmInt 4)) rlo ]) - rlo - - | AddrRegImm r1 (ImmInt i) <- amode - = do rlo <- getNewRegNat II32 - let rhi = getHiVRegFromLo rlo - - return $ ChildCode64 - ( addr_code - `appOL` toOL - [ LD II32 (AddrRegImm r1 (ImmInt $ 0 + i)) rhi - , LD II32 (AddrRegImm r1 (ImmInt $ 4 + i)) rlo ]) - rlo - - | otherwise - = panic "SPARC.CodeGen.Gen64: no match" - - result - - --- Add a literal to a 64 bit integer -iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) - = do ChildCode64 code1 r1_lo <- iselExpr64 e1 - let r1_hi = getHiVRegFromLo r1_lo - - r_dst_lo <- getNewRegNat II32 - let r_dst_hi = getHiVRegFromLo r_dst_lo - - let code = code1 - `appOL` toOL - [ ADD False True r1_lo (RIImm (ImmInteger i)) r_dst_lo - , ADD True False r1_hi (RIReg g0) r_dst_hi ] - - return $ ChildCode64 code r_dst_lo - - --- Addition of II64 -iselExpr64 (CmmMachOp (MO_Add _) [e1, e2]) - = do ChildCode64 code1 r1_lo <- iselExpr64 e1 - let r1_hi = getHiVRegFromLo r1_lo - - ChildCode64 code2 r2_lo <- iselExpr64 e2 - let r2_hi = getHiVRegFromLo r2_lo - - r_dst_lo <- getNewRegNat II32 - let r_dst_hi = getHiVRegFromLo r_dst_lo - - let code = code1 - `appOL` code2 - `appOL` toOL - [ ADD False True r1_lo (RIReg r2_lo) r_dst_lo - , ADD True False r1_hi (RIReg r2_hi) r_dst_hi ] - - return $ ChildCode64 code r_dst_lo - - -iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty))) - | isWord64 ty - = do - r_dst_lo <- getNewRegNat II32 - let r_dst_hi = getHiVRegFromLo r_dst_lo - r_src_lo = RegVirtual $ mkVirtualReg uq II32 - r_src_hi = getHiVRegFromLo r_src_lo - mov_lo = mkMOV r_src_lo r_dst_lo - mov_hi = mkMOV r_src_hi r_dst_hi - mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg - return ( - ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo - ) - --- Convert something into II64 -iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) - = do - r_dst_lo <- getNewRegNat II32 - let r_dst_hi = getHiVRegFromLo r_dst_lo - - -- compute expr and load it into r_dst_lo - (a_reg, a_code) <- getSomeReg expr - - dflags <- getDynFlags - let platform = targetPlatform dflags - code = a_code - `appOL` toOL - [ mkRegRegMoveInstr platform g0 r_dst_hi -- clear high 32 bits - , mkRegRegMoveInstr platform a_reg r_dst_lo ] - - return $ ChildCode64 code r_dst_lo - --- only W32 supported for now -iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) - = do - r_dst_lo <- getNewRegNat II32 - let r_dst_hi = getHiVRegFromLo r_dst_lo - - -- compute expr and load it into r_dst_lo - (a_reg, a_code) <- getSomeReg expr - - dflags <- getDynFlags - let platform = targetPlatform dflags - code = a_code - `appOL` toOL - [ SRA a_reg (RIImm (ImmInt 31)) r_dst_hi - , mkRegRegMoveInstr platform a_reg r_dst_lo ] - - return $ ChildCode64 code r_dst_lo - - -iselExpr64 expr - = pprPanic "iselExpr64(sparc)" (ppr expr) diff --git a/compiler/nativeGen/SPARC/CodeGen/Sanity.hs b/compiler/nativeGen/SPARC/CodeGen/Sanity.hs deleted file mode 100644 index b60c958a73..0000000000 --- a/compiler/nativeGen/SPARC/CodeGen/Sanity.hs +++ /dev/null @@ -1,69 +0,0 @@ --- | One ounce of sanity checking is worth 10000000000000000 ounces --- of staring blindly at assembly code trying to find the problem.. -module SPARC.CodeGen.Sanity ( - checkBlock -) - -where - -import GhcPrelude - -import SPARC.Instr -import SPARC.Ppr () -- For Outputable instances -import Instruction - -import GHC.Cmm - -import Outputable - - --- | Enforce intra-block invariants. --- -checkBlock :: CmmBlock - -> NatBasicBlock Instr - -> NatBasicBlock Instr - -checkBlock cmm block@(BasicBlock _ instrs) - | checkBlockInstrs instrs - = block - - | otherwise - = pprPanic - ("SPARC.CodeGen: bad block\n") - ( vcat [ text " -- cmm -----------------\n" - , ppr cmm - , text " -- native code ---------\n" - , ppr block ]) - - -checkBlockInstrs :: [Instr] -> Bool -checkBlockInstrs ii - - -- An unconditional jumps end the block. - -- There must be an unconditional jump in the block, otherwise - -- the register liveness determinator will get the liveness - -- information wrong. - -- - -- If the block ends with a cmm call that never returns - -- then there can be unreachable instructions after the jump, - -- but we don't mind here. - -- - | instr : NOP : _ <- ii - , isUnconditionalJump instr - = True - - -- All jumps must have a NOP in their branch delay slot. - -- The liveness determinator and register allocators aren't smart - -- enough to handle branch delay slots. - -- - | instr : NOP : is <- ii - , isJumpishInstr instr - = checkBlockInstrs is - - -- keep checking - | _:i2:is <- ii - = checkBlockInstrs (i2:is) - - -- this block is no good - | otherwise - = False diff --git a/compiler/nativeGen/SPARC/Cond.hs b/compiler/nativeGen/SPARC/Cond.hs deleted file mode 100644 index 3fbfb8603f..0000000000 --- a/compiler/nativeGen/SPARC/Cond.hs +++ /dev/null @@ -1,54 +0,0 @@ -module SPARC.Cond ( - Cond(..), - condUnsigned, - condToSigned, - condToUnsigned -) - -where - -import GhcPrelude - --- | Branch condition codes. -data Cond - = ALWAYS - | EQQ - | GE - | GEU - | GTT - | GU - | LE - | LEU - | LTT - | LU - | NE - | NEG - | NEVER - | POS - | VC - | VS - deriving Eq - - -condUnsigned :: Cond -> Bool -condUnsigned GU = True -condUnsigned LU = True -condUnsigned GEU = True -condUnsigned LEU = True -condUnsigned _ = False - - -condToSigned :: Cond -> Cond -condToSigned GU = GTT -condToSigned LU = LTT -condToSigned GEU = GE -condToSigned LEU = LE -condToSigned x = x - - -condToUnsigned :: Cond -> Cond -condToUnsigned GTT = GU -condToUnsigned LTT = LU -condToUnsigned GE = GEU -condToUnsigned LE = LEU -condToUnsigned x = x diff --git a/compiler/nativeGen/SPARC/Imm.hs b/compiler/nativeGen/SPARC/Imm.hs deleted file mode 100644 index 78b6612bbf..0000000000 --- a/compiler/nativeGen/SPARC/Imm.hs +++ /dev/null @@ -1,67 +0,0 @@ -module SPARC.Imm ( - -- immediate values - Imm(..), - strImmLit, - litToImm -) - -where - -import GhcPrelude - -import GHC.Cmm -import GHC.Cmm.CLabel - -import Outputable - --- | An immediate value. --- Not all of these are directly representable by the machine. --- Things like ImmLit are slurped out and put in a data segment instead. --- -data Imm - = ImmInt Int - - -- Sigh. - | ImmInteger Integer - - -- AbstractC Label (with baggage) - | ImmCLbl CLabel - - -- Simple string - | ImmLit SDoc - | ImmIndex CLabel Int - | ImmFloat Rational - | ImmDouble Rational - - | ImmConstantSum Imm Imm - | ImmConstantDiff Imm Imm - - | LO Imm - | HI Imm - - --- | Create a ImmLit containing this string. -strImmLit :: String -> Imm -strImmLit s = ImmLit (text s) - - --- | Convert a CmmLit to an Imm. --- Narrow to the width: a CmmInt might be out of --- range, but we assume that ImmInteger only contains --- in-range values. A signed value should be fine here. --- -litToImm :: CmmLit -> Imm -litToImm lit - = case lit of - CmmInt i w -> ImmInteger (narrowS w i) - CmmFloat f W32 -> ImmFloat f - CmmFloat f W64 -> ImmDouble f - CmmLabel l -> ImmCLbl l - CmmLabelOff l off -> ImmIndex l off - - CmmLabelDiffOff l1 l2 off _ - -> ImmConstantSum - (ImmConstantDiff (ImmCLbl l1) (ImmCLbl l2)) - (ImmInt off) - - _ -> panic "SPARC.Regs.litToImm: no match" diff --git a/compiler/nativeGen/SPARC/Instr.hs b/compiler/nativeGen/SPARC/Instr.hs deleted file mode 100644 index d49d82fa7e..0000000000 --- a/compiler/nativeGen/SPARC/Instr.hs +++ /dev/null @@ -1,481 +0,0 @@ -{-# LANGUAGE CPP #-} - ------------------------------------------------------------------------------ --- --- Machine-dependent assembly language --- --- (c) The University of Glasgow 1993-2004 --- ------------------------------------------------------------------------------ -#include "HsVersions.h" - -module SPARC.Instr ( - RI(..), - riZero, - - fpRelEA, - moveSp, - - isUnconditionalJump, - - Instr(..), - maxSpillSlots -) - -where - -import GhcPrelude - -import SPARC.Stack -import SPARC.Imm -import SPARC.AddrMode -import SPARC.Cond -import SPARC.Regs -import SPARC.Base -import TargetReg -import Instruction -import RegClass -import Reg -import Format - -import GHC.Cmm.CLabel -import GHC.Platform.Regs -import GHC.Cmm.BlockId -import GHC.Driver.Session -import GHC.Cmm -import FastString -import Outputable -import GHC.Platform - - --- | Register or immediate -data RI - = RIReg Reg - | RIImm Imm - --- | Check if a RI represents a zero value. --- - a literal zero --- - register %g0, which is always zero. --- -riZero :: RI -> Bool -riZero (RIImm (ImmInt 0)) = True -riZero (RIImm (ImmInteger 0)) = True -riZero (RIReg (RegReal (RealRegSingle 0))) = True -riZero _ = False - - --- | Calculate the effective address which would be used by the --- corresponding fpRel sequence. -fpRelEA :: Int -> Reg -> Instr -fpRelEA n dst - = ADD False False fp (RIImm (ImmInt (n * wordLength))) dst - - --- | Code to shift the stack pointer by n words. -moveSp :: Int -> Instr -moveSp n - = ADD False False sp (RIImm (ImmInt (n * wordLength))) sp - --- | An instruction that will cause the one after it never to be exectuted -isUnconditionalJump :: Instr -> Bool -isUnconditionalJump ii - = case ii of - CALL{} -> True - JMP{} -> True - JMP_TBL{} -> True - BI ALWAYS _ _ -> True - BF ALWAYS _ _ -> True - _ -> False - - --- | instance for sparc instruction set -instance Instruction Instr where - regUsageOfInstr = sparc_regUsageOfInstr - patchRegsOfInstr = sparc_patchRegsOfInstr - isJumpishInstr = sparc_isJumpishInstr - jumpDestsOfInstr = sparc_jumpDestsOfInstr - patchJumpInstr = sparc_patchJumpInstr - mkSpillInstr = sparc_mkSpillInstr - mkLoadInstr = sparc_mkLoadInstr - takeDeltaInstr = sparc_takeDeltaInstr - isMetaInstr = sparc_isMetaInstr - mkRegRegMoveInstr = sparc_mkRegRegMoveInstr - takeRegRegMoveInstr = sparc_takeRegRegMoveInstr - mkJumpInstr = sparc_mkJumpInstr - mkStackAllocInstr = panic "no sparc_mkStackAllocInstr" - mkStackDeallocInstr = panic "no sparc_mkStackDeallocInstr" - - --- | SPARC instruction set. --- Not complete. This is only the ones we need. --- -data Instr - - -- meta ops -------------------------------------------------- - -- comment pseudo-op - = COMMENT FastString - - -- some static data spat out during code generation. - -- Will be extracted before pretty-printing. - | LDATA Section RawCmmStatics - - -- Start a new basic block. Useful during codegen, removed later. - -- Preceding instruction should be a jump, as per the invariants - -- for a BasicBlock (see Cmm). - | NEWBLOCK BlockId - - -- specify current stack offset for benefit of subsequent passes. - | DELTA Int - - -- real instrs ----------------------------------------------- - -- Loads and stores. - | LD Format AddrMode Reg -- format, src, dst - | ST Format Reg AddrMode -- format, src, dst - - -- Int Arithmetic. - -- x: add/sub with carry bit. - -- In SPARC V9 addx and friends were renamed addc. - -- - -- cc: modify condition codes - -- - | ADD Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst - | SUB Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst - - | UMUL Bool Reg RI Reg -- cc?, src1, src2, dst - | SMUL Bool Reg RI Reg -- cc?, src1, src2, dst - - - -- The SPARC divide instructions perform 64bit by 32bit division - -- The Y register is xored into the first operand. - - -- On _some implementations_ the Y register is overwritten by - -- the remainder, so we have to make sure it is 0 each time. - - -- dst <- ((Y `shiftL` 32) `or` src1) `div` src2 - | UDIV Bool Reg RI Reg -- cc?, src1, src2, dst - | SDIV Bool Reg RI Reg -- cc?, src1, src2, dst - - | RDY Reg -- move contents of Y register to reg - | WRY Reg Reg -- Y <- src1 `xor` src2 - - -- Logic operations. - | AND Bool Reg RI Reg -- cc?, src1, src2, dst - | ANDN Bool Reg RI Reg -- cc?, src1, src2, dst - | OR Bool Reg RI Reg -- cc?, src1, src2, dst - | ORN Bool Reg RI Reg -- cc?, src1, src2, dst - | XOR Bool Reg RI Reg -- cc?, src1, src2, dst - | XNOR Bool Reg RI Reg -- cc?, src1, src2, dst - | SLL Reg RI Reg -- src1, src2, dst - | SRL Reg RI Reg -- src1, src2, dst - | SRA Reg RI Reg -- src1, src2, dst - - -- Load immediates. - | SETHI Imm Reg -- src, dst - - -- Do nothing. - -- Implemented by the assembler as SETHI 0, %g0, but worth an alias - | NOP - - -- Float Arithmetic. - -- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single - -- instructions right up until we spit them out. - -- - | FABS Format Reg Reg -- src dst - | FADD Format Reg Reg Reg -- src1, src2, dst - | FCMP Bool Format Reg Reg -- exception?, src1, src2, dst - | FDIV Format Reg Reg Reg -- src1, src2, dst - | FMOV Format Reg Reg -- src, dst - | FMUL Format Reg Reg Reg -- src1, src2, dst - | FNEG Format Reg Reg -- src, dst - | FSQRT Format Reg Reg -- src, dst - | FSUB Format Reg Reg Reg -- src1, src2, dst - | FxTOy Format Format Reg Reg -- src, dst - - -- Jumping around. - | BI Cond Bool BlockId -- cond, annul?, target - | BF Cond Bool BlockId -- cond, annul?, target - - | JMP AddrMode -- target - - -- With a tabled jump we know all the possible destinations. - -- We also need this info so we can work out what regs are live across the jump. - -- - | JMP_TBL AddrMode [Maybe BlockId] CLabel - - | CALL (Either Imm Reg) Int Bool -- target, args, terminal - - --- | regUsage returns the sets of src and destination registers used --- by a particular instruction. Machine registers that are --- pre-allocated to stgRegs are filtered out, because they are --- uninteresting from a register allocation standpoint. (We wouldn't --- want them to end up on the free list!) As far as we are concerned, --- the fixed registers simply don't exist (for allocation purposes, --- anyway). - --- regUsage doesn't need to do any trickery for jumps and such. Just --- state precisely the regs read and written by that insn. The --- consequences of control flow transfers, as far as register --- allocation goes, are taken care of by the register allocator. --- -sparc_regUsageOfInstr :: Platform -> Instr -> RegUsage -sparc_regUsageOfInstr platform instr - = case instr of - LD _ addr reg -> usage (regAddr addr, [reg]) - ST _ reg addr -> usage (reg : regAddr addr, []) - ADD _ _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - SUB _ _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - UMUL _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - SMUL _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - UDIV _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - SDIV _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - RDY rd -> usage ([], [rd]) - WRY r1 r2 -> usage ([r1, r2], []) - AND _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - ANDN _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - OR _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - ORN _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - XOR _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - XNOR _ r1 ar r2 -> usage (r1 : regRI ar, [r2]) - SLL r1 ar r2 -> usage (r1 : regRI ar, [r2]) - SRL r1 ar r2 -> usage (r1 : regRI ar, [r2]) - SRA r1 ar r2 -> usage (r1 : regRI ar, [r2]) - SETHI _ reg -> usage ([], [reg]) - FABS _ r1 r2 -> usage ([r1], [r2]) - FADD _ r1 r2 r3 -> usage ([r1, r2], [r3]) - FCMP _ _ r1 r2 -> usage ([r1, r2], []) - FDIV _ r1 r2 r3 -> usage ([r1, r2], [r3]) - FMOV _ r1 r2 -> usage ([r1], [r2]) - FMUL _ r1 r2 r3 -> usage ([r1, r2], [r3]) - FNEG _ r1 r2 -> usage ([r1], [r2]) - FSQRT _ r1 r2 -> usage ([r1], [r2]) - FSUB _ r1 r2 r3 -> usage ([r1, r2], [r3]) - FxTOy _ _ r1 r2 -> usage ([r1], [r2]) - - JMP addr -> usage (regAddr addr, []) - JMP_TBL addr _ _ -> usage (regAddr addr, []) - - CALL (Left _ ) _ True -> noUsage - CALL (Left _ ) n False -> usage (argRegs n, callClobberedRegs) - CALL (Right reg) _ True -> usage ([reg], []) - CALL (Right reg) n False -> usage (reg : (argRegs n), callClobberedRegs) - _ -> noUsage - - where - usage (src, dst) - = RU (filter (interesting platform) src) - (filter (interesting platform) dst) - - regAddr (AddrRegReg r1 r2) = [r1, r2] - regAddr (AddrRegImm r1 _) = [r1] - - regRI (RIReg r) = [r] - regRI _ = [] - - --- | Interesting regs are virtuals, or ones that are allocatable --- by the register allocator. -interesting :: Platform -> Reg -> Bool -interesting platform reg - = case reg of - RegVirtual _ -> True - RegReal (RealRegSingle r1) -> freeReg platform r1 - RegReal (RealRegPair r1 _) -> freeReg platform r1 - - - --- | Apply a given mapping to tall the register references in this instruction. -sparc_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr -sparc_patchRegsOfInstr instr env = case instr of - LD fmt addr reg -> LD fmt (fixAddr addr) (env reg) - ST fmt reg addr -> ST fmt (env reg) (fixAddr addr) - - ADD x cc r1 ar r2 -> ADD x cc (env r1) (fixRI ar) (env r2) - SUB x cc r1 ar r2 -> SUB x cc (env r1) (fixRI ar) (env r2) - UMUL cc r1 ar r2 -> UMUL cc (env r1) (fixRI ar) (env r2) - SMUL cc r1 ar r2 -> SMUL cc (env r1) (fixRI ar) (env r2) - UDIV cc r1 ar r2 -> UDIV cc (env r1) (fixRI ar) (env r2) - SDIV cc r1 ar r2 -> SDIV cc (env r1) (fixRI ar) (env r2) - RDY rd -> RDY (env rd) - WRY r1 r2 -> WRY (env r1) (env r2) - AND b r1 ar r2 -> AND b (env r1) (fixRI ar) (env r2) - ANDN b r1 ar r2 -> ANDN b (env r1) (fixRI ar) (env r2) - OR b r1 ar r2 -> OR b (env r1) (fixRI ar) (env r2) - ORN b r1 ar r2 -> ORN b (env r1) (fixRI ar) (env r2) - XOR b r1 ar r2 -> XOR b (env r1) (fixRI ar) (env r2) - XNOR b r1 ar r2 -> XNOR b (env r1) (fixRI ar) (env r2) - SLL r1 ar r2 -> SLL (env r1) (fixRI ar) (env r2) - SRL r1 ar r2 -> SRL (env r1) (fixRI ar) (env r2) - SRA r1 ar r2 -> SRA (env r1) (fixRI ar) (env r2) - - SETHI imm reg -> SETHI imm (env reg) - - FABS s r1 r2 -> FABS s (env r1) (env r2) - FADD s r1 r2 r3 -> FADD s (env r1) (env r2) (env r3) - FCMP e s r1 r2 -> FCMP e s (env r1) (env r2) - FDIV s r1 r2 r3 -> FDIV s (env r1) (env r2) (env r3) - FMOV s r1 r2 -> FMOV s (env r1) (env r2) - FMUL s r1 r2 r3 -> FMUL s (env r1) (env r2) (env r3) - FNEG s r1 r2 -> FNEG s (env r1) (env r2) - FSQRT s r1 r2 -> FSQRT s (env r1) (env r2) - FSUB s r1 r2 r3 -> FSUB s (env r1) (env r2) (env r3) - FxTOy s1 s2 r1 r2 -> FxTOy s1 s2 (env r1) (env r2) - - JMP addr -> JMP (fixAddr addr) - JMP_TBL addr ids l -> JMP_TBL (fixAddr addr) ids l - - CALL (Left i) n t -> CALL (Left i) n t - CALL (Right r) n t -> CALL (Right (env r)) n t - _ -> instr - - where - fixAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2) - fixAddr (AddrRegImm r1 i) = AddrRegImm (env r1) i - - fixRI (RIReg r) = RIReg (env r) - fixRI other = other - - --------------------------------------------------------------------------------- -sparc_isJumpishInstr :: Instr -> Bool -sparc_isJumpishInstr instr - = case instr of - BI{} -> True - BF{} -> True - JMP{} -> True - JMP_TBL{} -> True - CALL{} -> True - _ -> False - -sparc_jumpDestsOfInstr :: Instr -> [BlockId] -sparc_jumpDestsOfInstr insn - = case insn of - BI _ _ id -> [id] - BF _ _ id -> [id] - JMP_TBL _ ids _ -> [id | Just id <- ids] - _ -> [] - - -sparc_patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr -sparc_patchJumpInstr insn patchF - = case insn of - BI cc annul id -> BI cc annul (patchF id) - BF cc annul id -> BF cc annul (patchF id) - JMP_TBL n ids l -> JMP_TBL n (map (fmap patchF) ids) l - _ -> insn - - --------------------------------------------------------------------------------- --- | Make a spill instruction. --- On SPARC we spill below frame pointer leaving 2 words/spill -sparc_mkSpillInstr - :: DynFlags - -> Reg -- ^ register to spill - -> Int -- ^ current stack delta - -> Int -- ^ spill slot to use - -> Instr - -sparc_mkSpillInstr dflags reg _ slot - = let platform = targetPlatform dflags - off = spillSlotToOffset dflags slot - off_w = 1 + (off `div` 4) - fmt = case targetClassOfReg platform reg of - RcInteger -> II32 - RcFloat -> FF32 - RcDouble -> FF64 - - in ST fmt reg (fpRel (negate off_w)) - - --- | Make a spill reload instruction. -sparc_mkLoadInstr - :: DynFlags - -> Reg -- ^ register to load into - -> Int -- ^ current stack delta - -> Int -- ^ spill slot to use - -> Instr - -sparc_mkLoadInstr dflags reg _ slot - = let platform = targetPlatform dflags - off = spillSlotToOffset dflags slot - off_w = 1 + (off `div` 4) - fmt = case targetClassOfReg platform reg of - RcInteger -> II32 - RcFloat -> FF32 - RcDouble -> FF64 - - in LD fmt (fpRel (- off_w)) reg - - --------------------------------------------------------------------------------- --- | See if this instruction is telling us the current C stack delta -sparc_takeDeltaInstr - :: Instr - -> Maybe Int - -sparc_takeDeltaInstr instr - = case instr of - DELTA i -> Just i - _ -> Nothing - - -sparc_isMetaInstr - :: Instr - -> Bool - -sparc_isMetaInstr instr - = case instr of - COMMENT{} -> True - LDATA{} -> True - NEWBLOCK{} -> True - DELTA{} -> True - _ -> False - - --- | Make a reg-reg move instruction. --- On SPARC v8 there are no instructions to move directly between --- floating point and integer regs. If we need to do that then we --- have to go via memory. --- -sparc_mkRegRegMoveInstr - :: Platform - -> Reg - -> Reg - -> Instr - -sparc_mkRegRegMoveInstr platform src dst - | srcClass <- targetClassOfReg platform src - , dstClass <- targetClassOfReg platform dst - , srcClass == dstClass - = case srcClass of - RcInteger -> ADD False False src (RIReg g0) dst - RcDouble -> FMOV FF64 src dst - RcFloat -> FMOV FF32 src dst - - | otherwise - = panic "SPARC.Instr.mkRegRegMoveInstr: classes of src and dest not the same" - - --- | Check whether an instruction represents a reg-reg move. --- The register allocator attempts to eliminate reg->reg moves whenever it can, --- by assigning the src and dest temporaries to the same real register. --- -sparc_takeRegRegMoveInstr :: Instr -> Maybe (Reg,Reg) -sparc_takeRegRegMoveInstr instr - = case instr of - ADD False False src (RIReg src2) dst - | g0 == src2 -> Just (src, dst) - - FMOV FF64 src dst -> Just (src, dst) - FMOV FF32 src dst -> Just (src, dst) - _ -> Nothing - - --- | Make an unconditional branch instruction. -sparc_mkJumpInstr - :: BlockId - -> [Instr] - -sparc_mkJumpInstr id - = [BI ALWAYS False id - , NOP] -- fill the branch delay slot. diff --git a/compiler/nativeGen/SPARC/Ppr.hs b/compiler/nativeGen/SPARC/Ppr.hs deleted file mode 100644 index 566f438403..0000000000 --- a/compiler/nativeGen/SPARC/Ppr.hs +++ /dev/null @@ -1,645 +0,0 @@ -{-# LANGUAGE CPP #-} - ------------------------------------------------------------------------------ --- --- Pretty-printing assembly language --- --- (c) The University of Glasgow 1993-2005 --- ------------------------------------------------------------------------------ - -{-# OPTIONS_GHC -fno-warn-orphans #-} - -module SPARC.Ppr ( - pprNatCmmDecl, - pprBasicBlock, - pprData, - pprInstr, - pprFormat, - pprImm, - pprDataItem -) - -where - -#include "HsVersions.h" - -import GhcPrelude - -import SPARC.Regs -import SPARC.Instr -import SPARC.Cond -import SPARC.Imm -import SPARC.AddrMode -import SPARC.Base -import Instruction -import Reg -import Format -import PprBase - -import GHC.Cmm hiding (topInfoTable) -import GHC.Cmm.Ppr() -- For Outputable instances -import GHC.Cmm.BlockId -import GHC.Cmm.CLabel -import GHC.Cmm.Dataflow.Label -import GHC.Cmm.Dataflow.Collections - -import Unique ( pprUniqueAlways ) -import Outputable -import GHC.Platform -import FastString - --- ----------------------------------------------------------------------------- --- Printing this stuff out - -pprNatCmmDecl :: NatCmmDecl RawCmmStatics Instr -> SDoc -pprNatCmmDecl (CmmData section dats) = - pprSectionAlign section $$ pprDatas dats - -pprNatCmmDecl proc@(CmmProc top_info lbl _ (ListGraph blocks)) = - case topInfoTable proc of - Nothing -> - -- special case for code without info table: - pprSectionAlign (Section Text lbl) $$ - pprLabel lbl $$ -- blocks guaranteed not null, so label needed - vcat (map (pprBasicBlock top_info) blocks) - - Just (RawCmmStatics info_lbl _) -> - sdocWithPlatform $ \platform -> - (if platformHasSubsectionsViaSymbols platform - then pprSectionAlign dspSection $$ - ppr (mkDeadStripPreventer info_lbl) <> char ':' - else empty) $$ - vcat (map (pprBasicBlock top_info) blocks) $$ - -- above: Even the first block gets a label, because with branch-chain - -- elimination, it might be the target of a goto. - (if platformHasSubsectionsViaSymbols platform - then - -- See Note [Subsections Via Symbols] in X86/Ppr.hs - text "\t.long " - <+> ppr info_lbl - <+> char '-' - <+> ppr (mkDeadStripPreventer info_lbl) - else empty) - -dspSection :: Section -dspSection = Section Text $ - panic "subsections-via-symbols doesn't combine with split-sections" - -pprBasicBlock :: LabelMap RawCmmStatics -> NatBasicBlock Instr -> SDoc -pprBasicBlock info_env (BasicBlock blockid instrs) - = maybe_infotable $$ - pprLabel (blockLbl blockid) $$ - vcat (map pprInstr instrs) - where - maybe_infotable = case mapLookup blockid info_env of - Nothing -> empty - Just (RawCmmStatics info_lbl info) -> - pprAlignForSection Text $$ - vcat (map pprData info) $$ - pprLabel info_lbl - - -pprDatas :: RawCmmStatics -> SDoc --- See note [emit-time elimination of static indirections] in CLabel. -pprDatas (RawCmmStatics alias [CmmStaticLit (CmmLabel lbl), CmmStaticLit ind, _, _]) - | lbl == mkIndStaticInfoLabel - , let labelInd (CmmLabelOff l _) = Just l - labelInd (CmmLabel l) = Just l - labelInd _ = Nothing - , Just ind' <- labelInd ind - , alias `mayRedirectTo` ind' - = pprGloblDecl alias - $$ text ".equiv" <+> ppr alias <> comma <> ppr (CmmLabel ind') -pprDatas (RawCmmStatics lbl dats) = vcat (pprLabel lbl : map pprData dats) - -pprData :: CmmStatic -> SDoc -pprData (CmmString str) = pprBytes str -pprData (CmmUninitialised bytes) = text ".skip " <> int bytes -pprData (CmmStaticLit lit) = pprDataItem lit - -pprGloblDecl :: CLabel -> SDoc -pprGloblDecl lbl - | not (externallyVisibleCLabel lbl) = empty - | otherwise = text ".global " <> ppr lbl - -pprTypeAndSizeDecl :: CLabel -> SDoc -pprTypeAndSizeDecl lbl - = sdocWithPlatform $ \platform -> - if platformOS platform == OSLinux && externallyVisibleCLabel lbl - then text ".type " <> ppr lbl <> ptext (sLit ", @object") - else empty - -pprLabel :: CLabel -> SDoc -pprLabel lbl = pprGloblDecl lbl - $$ pprTypeAndSizeDecl lbl - $$ (ppr lbl <> char ':') - --- ----------------------------------------------------------------------------- --- pprInstr: print an 'Instr' - -instance Outputable Instr where - ppr instr = pprInstr instr - - --- | Pretty print a register. -pprReg :: Reg -> SDoc -pprReg reg - = case reg of - RegVirtual vr - -> case vr of - VirtualRegI u -> text "%vI_" <> pprUniqueAlways u - VirtualRegHi u -> text "%vHi_" <> pprUniqueAlways u - VirtualRegF u -> text "%vF_" <> pprUniqueAlways u - VirtualRegD u -> text "%vD_" <> pprUniqueAlways u - - - RegReal rr - -> case rr of - RealRegSingle r1 - -> pprReg_ofRegNo r1 - - RealRegPair r1 r2 - -> text "(" <> pprReg_ofRegNo r1 - <> vbar <> pprReg_ofRegNo r2 - <> text ")" - - - --- | Pretty print a register name, based on this register number. --- The definition has been unfolded so we get a jump-table in the --- object code. This function is called quite a lot when emitting --- the asm file.. --- -pprReg_ofRegNo :: Int -> SDoc -pprReg_ofRegNo i - = ptext - (case i of { - 0 -> sLit "%g0"; 1 -> sLit "%g1"; - 2 -> sLit "%g2"; 3 -> sLit "%g3"; - 4 -> sLit "%g4"; 5 -> sLit "%g5"; - 6 -> sLit "%g6"; 7 -> sLit "%g7"; - 8 -> sLit "%o0"; 9 -> sLit "%o1"; - 10 -> sLit "%o2"; 11 -> sLit "%o3"; - 12 -> sLit "%o4"; 13 -> sLit "%o5"; - 14 -> sLit "%o6"; 15 -> sLit "%o7"; - 16 -> sLit "%l0"; 17 -> sLit "%l1"; - 18 -> sLit "%l2"; 19 -> sLit "%l3"; - 20 -> sLit "%l4"; 21 -> sLit "%l5"; - 22 -> sLit "%l6"; 23 -> sLit "%l7"; - 24 -> sLit "%i0"; 25 -> sLit "%i1"; - 26 -> sLit "%i2"; 27 -> sLit "%i3"; - 28 -> sLit "%i4"; 29 -> sLit "%i5"; - 30 -> sLit "%i6"; 31 -> sLit "%i7"; - 32 -> sLit "%f0"; 33 -> sLit "%f1"; - 34 -> sLit "%f2"; 35 -> sLit "%f3"; - 36 -> sLit "%f4"; 37 -> sLit "%f5"; - 38 -> sLit "%f6"; 39 -> sLit "%f7"; - 40 -> sLit "%f8"; 41 -> sLit "%f9"; - 42 -> sLit "%f10"; 43 -> sLit "%f11"; - 44 -> sLit "%f12"; 45 -> sLit "%f13"; - 46 -> sLit "%f14"; 47 -> sLit "%f15"; - 48 -> sLit "%f16"; 49 -> sLit "%f17"; - 50 -> sLit "%f18"; 51 -> sLit "%f19"; - 52 -> sLit "%f20"; 53 -> sLit "%f21"; - 54 -> sLit "%f22"; 55 -> sLit "%f23"; - 56 -> sLit "%f24"; 57 -> sLit "%f25"; - 58 -> sLit "%f26"; 59 -> sLit "%f27"; - 60 -> sLit "%f28"; 61 -> sLit "%f29"; - 62 -> sLit "%f30"; 63 -> sLit "%f31"; - _ -> sLit "very naughty sparc register" }) - - --- | Pretty print a format for an instruction suffix. -pprFormat :: Format -> SDoc -pprFormat x - = ptext - (case x of - II8 -> sLit "ub" - II16 -> sLit "uh" - II32 -> sLit "" - II64 -> sLit "d" - FF32 -> sLit "" - FF64 -> sLit "d") - - --- | Pretty print a format for an instruction suffix. --- eg LD is 32bit on sparc, but LDD is 64 bit. -pprStFormat :: Format -> SDoc -pprStFormat x - = ptext - (case x of - II8 -> sLit "b" - II16 -> sLit "h" - II32 -> sLit "" - II64 -> sLit "x" - FF32 -> sLit "" - FF64 -> sLit "d") - - - --- | Pretty print a condition code. -pprCond :: Cond -> SDoc -pprCond c - = ptext - (case c of - ALWAYS -> sLit "" - NEVER -> sLit "n" - GEU -> sLit "geu" - LU -> sLit "lu" - EQQ -> sLit "e" - GTT -> sLit "g" - GE -> sLit "ge" - GU -> sLit "gu" - LTT -> sLit "l" - LE -> sLit "le" - LEU -> sLit "leu" - NE -> sLit "ne" - NEG -> sLit "neg" - POS -> sLit "pos" - VC -> sLit "vc" - VS -> sLit "vs") - - --- | Pretty print an address mode. -pprAddr :: AddrMode -> SDoc -pprAddr am - = case am of - AddrRegReg r1 (RegReal (RealRegSingle 0)) - -> pprReg r1 - - AddrRegReg r1 r2 - -> hcat [ pprReg r1, char '+', pprReg r2 ] - - AddrRegImm r1 (ImmInt i) - | i == 0 -> pprReg r1 - | not (fits13Bits i) -> largeOffsetError i - | otherwise -> hcat [ pprReg r1, pp_sign, int i ] - where - pp_sign = if i > 0 then char '+' else empty - - AddrRegImm r1 (ImmInteger i) - | i == 0 -> pprReg r1 - | not (fits13Bits i) -> largeOffsetError i - | otherwise -> hcat [ pprReg r1, pp_sign, integer i ] - where - pp_sign = if i > 0 then char '+' else empty - - AddrRegImm r1 imm - -> hcat [ pprReg r1, char '+', pprImm imm ] - - --- | Pretty print an immediate value. -pprImm :: Imm -> SDoc -pprImm imm - = case imm of - ImmInt i -> int i - ImmInteger i -> integer i - ImmCLbl l -> ppr l - ImmIndex l i -> ppr l <> char '+' <> int i - ImmLit s -> s - - ImmConstantSum a b - -> pprImm a <> char '+' <> pprImm b - - ImmConstantDiff a b - -> pprImm a <> char '-' <> lparen <> pprImm b <> rparen - - LO i - -> hcat [ text "%lo(", pprImm i, rparen ] - - HI i - -> hcat [ text "%hi(", pprImm i, rparen ] - - -- these should have been converted to bytes and placed - -- in the data section. - ImmFloat _ -> text "naughty float immediate" - ImmDouble _ -> text "naughty double immediate" - - --- | Pretty print a section \/ segment header. --- On SPARC all the data sections must be at least 8 byte aligned --- incase we store doubles in them. --- -pprSectionAlign :: Section -> SDoc -pprSectionAlign sec@(Section seg _) = - sdocWithPlatform $ \platform -> - pprSectionHeader platform sec $$ - pprAlignForSection seg - --- | Print appropriate alignment for the given section type. -pprAlignForSection :: SectionType -> SDoc -pprAlignForSection seg = - ptext (case seg of - Text -> sLit ".align 4" - Data -> sLit ".align 8" - ReadOnlyData -> sLit ".align 8" - RelocatableReadOnlyData - -> sLit ".align 8" - UninitialisedData -> sLit ".align 8" - ReadOnlyData16 -> sLit ".align 16" - -- TODO: This is copied from the ReadOnlyData case, but it can likely be - -- made more efficient. - CString -> sLit ".align 8" - OtherSection _ -> panic "PprMach.pprSectionHeader: unknown section") - --- | Pretty print a data item. -pprDataItem :: CmmLit -> SDoc -pprDataItem lit - = sdocWithDynFlags $ \dflags -> - vcat (ppr_item (cmmTypeFormat $ cmmLitType dflags lit) lit) - where - imm = litToImm lit - - ppr_item II8 _ = [text "\t.byte\t" <> pprImm imm] - ppr_item II32 _ = [text "\t.long\t" <> pprImm imm] - - ppr_item FF32 (CmmFloat r _) - = let bs = floatToBytes (fromRational r) - in map (\b -> text "\t.byte\t" <> pprImm (ImmInt b)) bs - - ppr_item FF64 (CmmFloat r _) - = let bs = doubleToBytes (fromRational r) - in map (\b -> text "\t.byte\t" <> pprImm (ImmInt b)) bs - - ppr_item II16 _ = [text "\t.short\t" <> pprImm imm] - ppr_item II64 _ = [text "\t.quad\t" <> pprImm imm] - ppr_item _ _ = panic "SPARC.Ppr.pprDataItem: no match" - - --- | Pretty print an instruction. -pprInstr :: Instr -> SDoc - --- nuke comments. -pprInstr (COMMENT _) - = empty - -pprInstr (DELTA d) - = pprInstr (COMMENT (mkFastString ("\tdelta = " ++ show d))) - --- Newblocks and LData should have been slurped out before producing the .s file. -pprInstr (NEWBLOCK _) - = panic "X86.Ppr.pprInstr: NEWBLOCK" - -pprInstr (LDATA _ _) - = panic "PprMach.pprInstr: LDATA" - --- 64 bit FP loads are expanded into individual instructions in CodeGen.Expand -pprInstr (LD FF64 _ reg) - | RegReal (RealRegSingle{}) <- reg - = panic "SPARC.Ppr: not emitting potentially misaligned LD FF64 instr" - -pprInstr (LD format addr reg) - = hcat [ - text "\tld", - pprFormat format, - char '\t', - lbrack, - pprAddr addr, - pp_rbracket_comma, - pprReg reg - ] - --- 64 bit FP stores are expanded into individual instructions in CodeGen.Expand -pprInstr (ST FF64 reg _) - | RegReal (RealRegSingle{}) <- reg - = panic "SPARC.Ppr: not emitting potentially misaligned ST FF64 instr" - --- no distinction is made between signed and unsigned bytes on stores for the --- Sparc opcodes (at least I cannot see any, and gas is nagging me --SOF), --- so we call a special-purpose pprFormat for ST.. -pprInstr (ST format reg addr) - = hcat [ - text "\tst", - pprStFormat format, - char '\t', - pprReg reg, - pp_comma_lbracket, - pprAddr addr, - rbrack - ] - - -pprInstr (ADD x cc reg1 ri reg2) - | not x && not cc && riZero ri - = hcat [ text "\tmov\t", pprReg reg1, comma, pprReg reg2 ] - - | otherwise - = pprRegRIReg (if x then sLit "addx" else sLit "add") cc reg1 ri reg2 - - -pprInstr (SUB x cc reg1 ri reg2) - | not x && cc && reg2 == g0 - = hcat [ text "\tcmp\t", pprReg reg1, comma, pprRI ri ] - - | not x && not cc && riZero ri - = hcat [ text "\tmov\t", pprReg reg1, comma, pprReg reg2 ] - - | otherwise - = pprRegRIReg (if x then sLit "subx" else sLit "sub") cc reg1 ri reg2 - -pprInstr (AND b reg1 ri reg2) = pprRegRIReg (sLit "and") b reg1 ri reg2 - -pprInstr (ANDN b reg1 ri reg2) = pprRegRIReg (sLit "andn") b reg1 ri reg2 - -pprInstr (OR b reg1 ri reg2) - | not b && reg1 == g0 - = let doit = hcat [ text "\tmov\t", pprRI ri, comma, pprReg reg2 ] - in case ri of - RIReg rrr | rrr == reg2 -> empty - _ -> doit - - | otherwise - = pprRegRIReg (sLit "or") b reg1 ri reg2 - -pprInstr (ORN b reg1 ri reg2) = pprRegRIReg (sLit "orn") b reg1 ri reg2 - -pprInstr (XOR b reg1 ri reg2) = pprRegRIReg (sLit "xor") b reg1 ri reg2 -pprInstr (XNOR b reg1 ri reg2) = pprRegRIReg (sLit "xnor") b reg1 ri reg2 - -pprInstr (SLL reg1 ri reg2) = pprRegRIReg (sLit "sll") False reg1 ri reg2 -pprInstr (SRL reg1 ri reg2) = pprRegRIReg (sLit "srl") False reg1 ri reg2 -pprInstr (SRA reg1 ri reg2) = pprRegRIReg (sLit "sra") False reg1 ri reg2 - -pprInstr (RDY rd) = text "\trd\t%y," <> pprReg rd -pprInstr (WRY reg1 reg2) - = text "\twr\t" - <> pprReg reg1 - <> char ',' - <> pprReg reg2 - <> char ',' - <> text "%y" - -pprInstr (SMUL b reg1 ri reg2) = pprRegRIReg (sLit "smul") b reg1 ri reg2 -pprInstr (UMUL b reg1 ri reg2) = pprRegRIReg (sLit "umul") b reg1 ri reg2 -pprInstr (SDIV b reg1 ri reg2) = pprRegRIReg (sLit "sdiv") b reg1 ri reg2 -pprInstr (UDIV b reg1 ri reg2) = pprRegRIReg (sLit "udiv") b reg1 ri reg2 - -pprInstr (SETHI imm reg) - = hcat [ - text "\tsethi\t", - pprImm imm, - comma, - pprReg reg - ] - -pprInstr NOP - = text "\tnop" - -pprInstr (FABS format reg1 reg2) - = pprFormatRegReg (sLit "fabs") format reg1 reg2 - -pprInstr (FADD format reg1 reg2 reg3) - = pprFormatRegRegReg (sLit "fadd") format reg1 reg2 reg3 - -pprInstr (FCMP e format reg1 reg2) - = pprFormatRegReg (if e then sLit "fcmpe" else sLit "fcmp") - format reg1 reg2 - -pprInstr (FDIV format reg1 reg2 reg3) - = pprFormatRegRegReg (sLit "fdiv") format reg1 reg2 reg3 - -pprInstr (FMOV format reg1 reg2) - = pprFormatRegReg (sLit "fmov") format reg1 reg2 - -pprInstr (FMUL format reg1 reg2 reg3) - = pprFormatRegRegReg (sLit "fmul") format reg1 reg2 reg3 - -pprInstr (FNEG format reg1 reg2) - = pprFormatRegReg (sLit "fneg") format reg1 reg2 - -pprInstr (FSQRT format reg1 reg2) - = pprFormatRegReg (sLit "fsqrt") format reg1 reg2 - -pprInstr (FSUB format reg1 reg2 reg3) - = pprFormatRegRegReg (sLit "fsub") format reg1 reg2 reg3 - -pprInstr (FxTOy format1 format2 reg1 reg2) - = hcat [ - text "\tf", - ptext - (case format1 of - II32 -> sLit "ito" - FF32 -> sLit "sto" - FF64 -> sLit "dto" - _ -> panic "SPARC.Ppr.pprInstr.FxToY: no match"), - ptext - (case format2 of - II32 -> sLit "i\t" - II64 -> sLit "x\t" - FF32 -> sLit "s\t" - FF64 -> sLit "d\t" - _ -> panic "SPARC.Ppr.pprInstr.FxToY: no match"), - pprReg reg1, comma, pprReg reg2 - ] - - -pprInstr (BI cond b blockid) - = hcat [ - text "\tb", pprCond cond, - if b then pp_comma_a else empty, - char '\t', - ppr (blockLbl blockid) - ] - -pprInstr (BF cond b blockid) - = hcat [ - text "\tfb", pprCond cond, - if b then pp_comma_a else empty, - char '\t', - ppr (blockLbl blockid) - ] - -pprInstr (JMP addr) = text "\tjmp\t" <> pprAddr addr -pprInstr (JMP_TBL op _ _) = pprInstr (JMP op) - -pprInstr (CALL (Left imm) n _) - = hcat [ text "\tcall\t", pprImm imm, comma, int n ] - -pprInstr (CALL (Right reg) n _) - = hcat [ text "\tcall\t", pprReg reg, comma, int n ] - - --- | Pretty print a RI -pprRI :: RI -> SDoc -pprRI (RIReg r) = pprReg r -pprRI (RIImm r) = pprImm r - - --- | Pretty print a two reg instruction. -pprFormatRegReg :: PtrString -> Format -> Reg -> Reg -> SDoc -pprFormatRegReg name format reg1 reg2 - = hcat [ - char '\t', - ptext name, - (case format of - FF32 -> text "s\t" - FF64 -> text "d\t" - _ -> panic "SPARC.Ppr.pprFormatRegReg: no match"), - - pprReg reg1, - comma, - pprReg reg2 - ] - - --- | Pretty print a three reg instruction. -pprFormatRegRegReg :: PtrString -> Format -> Reg -> Reg -> Reg -> SDoc -pprFormatRegRegReg name format reg1 reg2 reg3 - = hcat [ - char '\t', - ptext name, - (case format of - FF32 -> text "s\t" - FF64 -> text "d\t" - _ -> panic "SPARC.Ppr.pprFormatRegReg: no match"), - pprReg reg1, - comma, - pprReg reg2, - comma, - pprReg reg3 - ] - - --- | Pretty print an instruction of two regs and a ri. -pprRegRIReg :: PtrString -> Bool -> Reg -> RI -> Reg -> SDoc -pprRegRIReg name b reg1 ri reg2 - = hcat [ - char '\t', - ptext name, - if b then text "cc\t" else char '\t', - pprReg reg1, - comma, - pprRI ri, - comma, - pprReg reg2 - ] - -{- -pprRIReg :: PtrString -> Bool -> RI -> Reg -> SDoc -pprRIReg name b ri reg1 - = hcat [ - char '\t', - ptext name, - if b then text "cc\t" else char '\t', - pprRI ri, - comma, - pprReg reg1 - ] --} - -{- -pp_ld_lbracket :: SDoc -pp_ld_lbracket = text "\tld\t[" --} - -pp_rbracket_comma :: SDoc -pp_rbracket_comma = text "]," - - -pp_comma_lbracket :: SDoc -pp_comma_lbracket = text ",[" - - -pp_comma_a :: SDoc -pp_comma_a = text ",a" diff --git a/compiler/nativeGen/SPARC/Regs.hs b/compiler/nativeGen/SPARC/Regs.hs deleted file mode 100644 index 8f470ad79d..0000000000 --- a/compiler/nativeGen/SPARC/Regs.hs +++ /dev/null @@ -1,259 +0,0 @@ --- ----------------------------------------------------------------------------- --- --- (c) The University of Glasgow 1994-2004 --- --- ----------------------------------------------------------------------------- - -module SPARC.Regs ( - -- registers - showReg, - virtualRegSqueeze, - realRegSqueeze, - classOfRealReg, - allRealRegs, - - -- machine specific info - gReg, iReg, lReg, oReg, fReg, - fp, sp, g0, g1, g2, o0, o1, f0, f1, f6, f8, f22, f26, f27, - - -- allocatable - allocatableRegs, - - -- args - argRegs, - allArgRegs, - callClobberedRegs, - - -- - mkVirtualReg, - regDotColor -) - -where - - -import GhcPrelude - -import GHC.Platform.SPARC -import Reg -import RegClass -import Format - -import Unique -import Outputable - -{- - The SPARC has 64 registers of interest; 32 integer registers and 32 - floating point registers. The mapping of STG registers to SPARC - machine registers is defined in StgRegs.h. We are, of course, - prepared for any eventuality. - - The whole fp-register pairing thing on sparcs is a huge nuisance. See - includes/stg/MachRegs.h for a description of what's going on - here. --} - - --- | Get the standard name for the register with this number. -showReg :: RegNo -> String -showReg n - | n >= 0 && n < 8 = "%g" ++ show n - | n >= 8 && n < 16 = "%o" ++ show (n-8) - | n >= 16 && n < 24 = "%l" ++ show (n-16) - | n >= 24 && n < 32 = "%i" ++ show (n-24) - | n >= 32 && n < 64 = "%f" ++ show (n-32) - | otherwise = panic "SPARC.Regs.showReg: unknown sparc register" - - --- Get the register class of a certain real reg -classOfRealReg :: RealReg -> RegClass -classOfRealReg reg - = case reg of - RealRegSingle i - | i < 32 -> RcInteger - | otherwise -> RcFloat - - RealRegPair{} -> RcDouble - - --- | regSqueeze_class reg --- Calculate the maximum number of register colors that could be --- denied to a node of this class due to having this reg --- as a neighbour. --- -{-# INLINE virtualRegSqueeze #-} -virtualRegSqueeze :: RegClass -> VirtualReg -> Int - -virtualRegSqueeze cls vr - = case cls of - RcInteger - -> case vr of - VirtualRegI{} -> 1 - VirtualRegHi{} -> 1 - _other -> 0 - - RcFloat - -> case vr of - VirtualRegF{} -> 1 - VirtualRegD{} -> 2 - _other -> 0 - - RcDouble - -> case vr of - VirtualRegF{} -> 1 - VirtualRegD{} -> 1 - _other -> 0 - - -{-# INLINE realRegSqueeze #-} -realRegSqueeze :: RegClass -> RealReg -> Int - -realRegSqueeze cls rr - = case cls of - RcInteger - -> case rr of - RealRegSingle regNo - | regNo < 32 -> 1 - | otherwise -> 0 - - RealRegPair{} -> 0 - - RcFloat - -> case rr of - RealRegSingle regNo - | regNo < 32 -> 0 - | otherwise -> 1 - - RealRegPair{} -> 2 - - RcDouble - -> case rr of - RealRegSingle regNo - | regNo < 32 -> 0 - | otherwise -> 1 - - RealRegPair{} -> 1 - - --- | All the allocatable registers in the machine, --- including register pairs. -allRealRegs :: [RealReg] -allRealRegs - = [ (RealRegSingle i) | i <- [0..63] ] - ++ [ (RealRegPair i (i+1)) | i <- [32, 34 .. 62 ] ] - - --- | Get the regno for this sort of reg -gReg, lReg, iReg, oReg, fReg :: Int -> RegNo - -gReg x = x -- global regs -oReg x = (8 + x) -- output regs -lReg x = (16 + x) -- local regs -iReg x = (24 + x) -- input regs -fReg x = (32 + x) -- float regs - - --- | Some specific regs used by the code generator. -g0, g1, g2, fp, sp, o0, o1, f0, f1, f6, f8, f22, f26, f27 :: Reg - -f6 = RegReal (RealRegSingle (fReg 6)) -f8 = RegReal (RealRegSingle (fReg 8)) -f22 = RegReal (RealRegSingle (fReg 22)) -f26 = RegReal (RealRegSingle (fReg 26)) -f27 = RegReal (RealRegSingle (fReg 27)) - --- g0 is always zero, and writes to it vanish. -g0 = RegReal (RealRegSingle (gReg 0)) -g1 = RegReal (RealRegSingle (gReg 1)) -g2 = RegReal (RealRegSingle (gReg 2)) - --- FP, SP, int and float return (from C) regs. -fp = RegReal (RealRegSingle (iReg 6)) -sp = RegReal (RealRegSingle (oReg 6)) -o0 = RegReal (RealRegSingle (oReg 0)) -o1 = RegReal (RealRegSingle (oReg 1)) -f0 = RegReal (RealRegSingle (fReg 0)) -f1 = RegReal (RealRegSingle (fReg 1)) - --- | Produce the second-half-of-a-double register given the first half. -{- -fPair :: Reg -> Maybe Reg -fPair (RealReg n) - | n >= 32 && n `mod` 2 == 0 = Just (RealReg (n+1)) - -fPair (VirtualRegD u) - = Just (VirtualRegHi u) - -fPair reg - = trace ("MachInstrs.fPair: can't get high half of supposed double reg " ++ showPpr reg) - Nothing --} - - --- | All the regs that the register allocator can allocate to, --- with the fixed use regs removed. --- -allocatableRegs :: [RealReg] -allocatableRegs - = let isFree rr - = case rr of - RealRegSingle r -> freeReg r - RealRegPair r1 r2 -> freeReg r1 && freeReg r2 - in filter isFree allRealRegs - - --- | The registers to place arguments for function calls, --- for some number of arguments. --- -argRegs :: RegNo -> [Reg] -argRegs r - = case r of - 0 -> [] - 1 -> map (RegReal . RealRegSingle . oReg) [0] - 2 -> map (RegReal . RealRegSingle . oReg) [0,1] - 3 -> map (RegReal . RealRegSingle . oReg) [0,1,2] - 4 -> map (RegReal . RealRegSingle . oReg) [0,1,2,3] - 5 -> map (RegReal . RealRegSingle . oReg) [0,1,2,3,4] - 6 -> map (RegReal . RealRegSingle . oReg) [0,1,2,3,4,5] - _ -> panic "MachRegs.argRegs(sparc): don't know about >6 arguments!" - - --- | All all the regs that could possibly be returned by argRegs --- -allArgRegs :: [Reg] -allArgRegs - = map (RegReal . RealRegSingle) [oReg i | i <- [0..5]] - - --- These are the regs that we cannot assume stay alive over a C call. --- TODO: Why can we assume that o6 isn't clobbered? -- BL 2009/02 --- -callClobberedRegs :: [Reg] -callClobberedRegs - = map (RegReal . RealRegSingle) - ( oReg 7 : - [oReg i | i <- [0..5]] ++ - [gReg i | i <- [1..7]] ++ - [fReg i | i <- [0..31]] ) - - - --- | Make a virtual reg with this format. -mkVirtualReg :: Unique -> Format -> VirtualReg -mkVirtualReg u format - | not (isFloatFormat format) - = VirtualRegI u - - | otherwise - = case format of - FF32 -> VirtualRegF u - FF64 -> VirtualRegD u - _ -> panic "mkVReg" - - -regDotColor :: RealReg -> SDoc -regDotColor reg - = case classOfRealReg reg of - RcInteger -> text "blue" - RcFloat -> text "red" - _other -> text "green" diff --git a/compiler/nativeGen/SPARC/ShortcutJump.hs b/compiler/nativeGen/SPARC/ShortcutJump.hs deleted file mode 100644 index 35604b0b7e..0000000000 --- a/compiler/nativeGen/SPARC/ShortcutJump.hs +++ /dev/null @@ -1,74 +0,0 @@ -module SPARC.ShortcutJump ( - JumpDest(..), getJumpDestBlockId, - canShortcut, - shortcutJump, - shortcutStatics, - shortBlockId -) - -where - -import GhcPrelude - -import SPARC.Instr -import SPARC.Imm - -import GHC.Cmm.CLabel -import GHC.Cmm.BlockId -import GHC.Cmm - -import Panic -import Outputable - -data JumpDest - = DestBlockId BlockId - | DestImm Imm - --- Debug Instance -instance Outputable JumpDest where - ppr (DestBlockId bid) = text "blk:" <> ppr bid - ppr (DestImm _bid) = text "imm:?" - -getJumpDestBlockId :: JumpDest -> Maybe BlockId -getJumpDestBlockId (DestBlockId bid) = Just bid -getJumpDestBlockId _ = Nothing - - -canShortcut :: Instr -> Maybe JumpDest -canShortcut _ = Nothing - - -shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr -shortcutJump _ other = other - - - -shortcutStatics :: (BlockId -> Maybe JumpDest) -> RawCmmStatics -> RawCmmStatics -shortcutStatics fn (RawCmmStatics lbl statics) - = RawCmmStatics lbl $ map (shortcutStatic fn) statics - -- we need to get the jump tables, so apply the mapping to the entries - -- of a CmmData too. - -shortcutLabel :: (BlockId -> Maybe JumpDest) -> CLabel -> CLabel -shortcutLabel fn lab - | Just blkId <- maybeLocalBlockLabel lab = shortBlockId fn blkId - | otherwise = lab - -shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic -shortcutStatic fn (CmmStaticLit (CmmLabel lab)) - = CmmStaticLit (CmmLabel (shortcutLabel fn lab)) -shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off w)) - = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off w) --- slightly dodgy, we're ignoring the second label, but this --- works with the way we use CmmLabelDiffOff for jump tables now. -shortcutStatic _ other_static - = other_static - - -shortBlockId :: (BlockId -> Maybe JumpDest) -> BlockId -> CLabel -shortBlockId fn blockid = - case fn blockid of - Nothing -> blockLbl blockid - Just (DestBlockId blockid') -> shortBlockId fn blockid' - Just (DestImm (ImmCLbl lbl)) -> lbl - _other -> panic "shortBlockId" diff --git a/compiler/nativeGen/SPARC/Stack.hs b/compiler/nativeGen/SPARC/Stack.hs deleted file mode 100644 index 2553c6135d..0000000000 --- a/compiler/nativeGen/SPARC/Stack.hs +++ /dev/null @@ -1,59 +0,0 @@ -module SPARC.Stack ( - spRel, - fpRel, - spillSlotToOffset, - maxSpillSlots -) - -where - -import GhcPrelude - -import SPARC.AddrMode -import SPARC.Regs -import SPARC.Base -import SPARC.Imm - -import GHC.Driver.Session -import Outputable - --- | Get an AddrMode relative to the address in sp. --- This gives us a stack relative addressing mode for volatile --- temporaries and for excess call arguments. --- -spRel :: Int -- ^ stack offset in words, positive or negative - -> AddrMode - -spRel n = AddrRegImm sp (ImmInt (n * wordLength)) - - --- | Get an address relative to the frame pointer. --- This doesn't work work for offsets greater than 13 bits; we just hope for the best --- -fpRel :: Int -> AddrMode -fpRel n - = AddrRegImm fp (ImmInt (n * wordLength)) - - --- | Convert a spill slot number to a *byte* offset, with no sign. --- -spillSlotToOffset :: DynFlags -> Int -> Int -spillSlotToOffset dflags slot - | slot >= 0 && slot < maxSpillSlots dflags - = 64 + spillSlotSize * slot - - | otherwise - = pprPanic "spillSlotToOffset:" - ( text "invalid spill location: " <> int slot - $$ text "maxSpillSlots: " <> int (maxSpillSlots dflags)) - - --- | The maximum number of spill slots available on the C stack. --- If we use up all of the slots, then we're screwed. --- --- Why do we reserve 64 bytes, instead of using the whole thing?? --- -- BL 2009/02/15 --- -maxSpillSlots :: DynFlags -> Int -maxSpillSlots dflags - = ((spillAreaLength dflags - 64) `div` spillSlotSize) - 1 diff --git a/compiler/nativeGen/TargetReg.hs b/compiler/nativeGen/TargetReg.hs deleted file mode 100644 index e0eca9235d..0000000000 --- a/compiler/nativeGen/TargetReg.hs +++ /dev/null @@ -1,137 +0,0 @@ -{-# LANGUAGE CPP #-} --- | Hard wired things related to registers. --- This is module is preventing the native code generator being able to --- emit code for non-host architectures. --- --- TODO: Do a better job of the overloading, and eliminate this module. --- We'd probably do better with a Register type class, and hook this to --- Instruction somehow. --- --- TODO: We should also make arch specific versions of RegAlloc.Graph.TrivColorable -module TargetReg ( - targetVirtualRegSqueeze, - targetRealRegSqueeze, - targetClassOfRealReg, - targetMkVirtualReg, - targetRegDotColor, - targetClassOfReg -) - -where - -#include "HsVersions.h" - -import GhcPrelude - -import Reg -import RegClass -import Format - -import Outputable -import Unique -import GHC.Platform - -import qualified X86.Regs as X86 -import qualified X86.RegInfo as X86 - -import qualified PPC.Regs as PPC - -import qualified SPARC.Regs as SPARC - -targetVirtualRegSqueeze :: Platform -> RegClass -> VirtualReg -> Int -targetVirtualRegSqueeze platform - = case platformArch platform of - ArchX86 -> X86.virtualRegSqueeze - ArchX86_64 -> X86.virtualRegSqueeze - ArchPPC -> PPC.virtualRegSqueeze - ArchS390X -> panic "targetVirtualRegSqueeze ArchS390X" - ArchSPARC -> SPARC.virtualRegSqueeze - ArchSPARC64 -> panic "targetVirtualRegSqueeze ArchSPARC64" - ArchPPC_64 _ -> PPC.virtualRegSqueeze - ArchARM _ _ _ -> panic "targetVirtualRegSqueeze ArchARM" - ArchARM64 -> panic "targetVirtualRegSqueeze ArchARM64" - ArchAlpha -> panic "targetVirtualRegSqueeze ArchAlpha" - ArchMipseb -> panic "targetVirtualRegSqueeze ArchMipseb" - ArchMipsel -> panic "targetVirtualRegSqueeze ArchMipsel" - ArchJavaScript-> panic "targetVirtualRegSqueeze ArchJavaScript" - ArchUnknown -> panic "targetVirtualRegSqueeze ArchUnknown" - - -targetRealRegSqueeze :: Platform -> RegClass -> RealReg -> Int -targetRealRegSqueeze platform - = case platformArch platform of - ArchX86 -> X86.realRegSqueeze - ArchX86_64 -> X86.realRegSqueeze - ArchPPC -> PPC.realRegSqueeze - ArchS390X -> panic "targetRealRegSqueeze ArchS390X" - ArchSPARC -> SPARC.realRegSqueeze - ArchSPARC64 -> panic "targetRealRegSqueeze ArchSPARC64" - ArchPPC_64 _ -> PPC.realRegSqueeze - ArchARM _ _ _ -> panic "targetRealRegSqueeze ArchARM" - ArchARM64 -> panic "targetRealRegSqueeze ArchARM64" - ArchAlpha -> panic "targetRealRegSqueeze ArchAlpha" - ArchMipseb -> panic "targetRealRegSqueeze ArchMipseb" - ArchMipsel -> panic "targetRealRegSqueeze ArchMipsel" - ArchJavaScript-> panic "targetRealRegSqueeze ArchJavaScript" - ArchUnknown -> panic "targetRealRegSqueeze ArchUnknown" - -targetClassOfRealReg :: Platform -> RealReg -> RegClass -targetClassOfRealReg platform - = case platformArch platform of - ArchX86 -> X86.classOfRealReg platform - ArchX86_64 -> X86.classOfRealReg platform - ArchPPC -> PPC.classOfRealReg - ArchS390X -> panic "targetClassOfRealReg ArchS390X" - ArchSPARC -> SPARC.classOfRealReg - ArchSPARC64 -> panic "targetClassOfRealReg ArchSPARC64" - ArchPPC_64 _ -> PPC.classOfRealReg - ArchARM _ _ _ -> panic "targetClassOfRealReg ArchARM" - ArchARM64 -> panic "targetClassOfRealReg ArchARM64" - ArchAlpha -> panic "targetClassOfRealReg ArchAlpha" - ArchMipseb -> panic "targetClassOfRealReg ArchMipseb" - ArchMipsel -> panic "targetClassOfRealReg ArchMipsel" - ArchJavaScript-> panic "targetClassOfRealReg ArchJavaScript" - ArchUnknown -> panic "targetClassOfRealReg ArchUnknown" - -targetMkVirtualReg :: Platform -> Unique -> Format -> VirtualReg -targetMkVirtualReg platform - = case platformArch platform of - ArchX86 -> X86.mkVirtualReg - ArchX86_64 -> X86.mkVirtualReg - ArchPPC -> PPC.mkVirtualReg - ArchS390X -> panic "targetMkVirtualReg ArchS390X" - ArchSPARC -> SPARC.mkVirtualReg - ArchSPARC64 -> panic "targetMkVirtualReg ArchSPARC64" - ArchPPC_64 _ -> PPC.mkVirtualReg - ArchARM _ _ _ -> panic "targetMkVirtualReg ArchARM" - ArchARM64 -> panic "targetMkVirtualReg ArchARM64" - ArchAlpha -> panic "targetMkVirtualReg ArchAlpha" - ArchMipseb -> panic "targetMkVirtualReg ArchMipseb" - ArchMipsel -> panic "targetMkVirtualReg ArchMipsel" - ArchJavaScript-> panic "targetMkVirtualReg ArchJavaScript" - ArchUnknown -> panic "targetMkVirtualReg ArchUnknown" - -targetRegDotColor :: Platform -> RealReg -> SDoc -targetRegDotColor platform - = case platformArch platform of - ArchX86 -> X86.regDotColor platform - ArchX86_64 -> X86.regDotColor platform - ArchPPC -> PPC.regDotColor - ArchS390X -> panic "targetRegDotColor ArchS390X" - ArchSPARC -> SPARC.regDotColor - ArchSPARC64 -> panic "targetRegDotColor ArchSPARC64" - ArchPPC_64 _ -> PPC.regDotColor - ArchARM _ _ _ -> panic "targetRegDotColor ArchARM" - ArchARM64 -> panic "targetRegDotColor ArchARM64" - ArchAlpha -> panic "targetRegDotColor ArchAlpha" - ArchMipseb -> panic "targetRegDotColor ArchMipseb" - ArchMipsel -> panic "targetRegDotColor ArchMipsel" - ArchJavaScript-> panic "targetRegDotColor ArchJavaScript" - ArchUnknown -> panic "targetRegDotColor ArchUnknown" - - -targetClassOfReg :: Platform -> Reg -> RegClass -targetClassOfReg platform reg - = case reg of - RegVirtual vr -> classOfVirtualReg vr - RegReal rr -> targetClassOfRealReg platform rr diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs deleted file mode 100644 index 26797949f4..0000000000 --- a/compiler/nativeGen/X86/CodeGen.hs +++ /dev/null @@ -1,3743 +0,0 @@ -{-# LANGUAGE CPP, GADTs, NondecreasingIndentation #-} -{-# LANGUAGE TupleSections #-} -{-# LANGUAGE BangPatterns #-} - -#if __GLASGOW_HASKELL__ <= 808 --- GHC 8.10 deprecates this flag, but GHC 8.8 needs it --- The default iteration limit is a bit too low for the definitions --- in this module. -{-# OPTIONS_GHC -fmax-pmcheck-iterations=10000000 #-} -#endif - -{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-} - ------------------------------------------------------------------------------ --- --- Generating machine code (instruction selection) --- --- (c) The University of Glasgow 1996-2004 --- ------------------------------------------------------------------------------ - --- This is a big module, but, if you pay attention to --- (a) the sectioning, and (b) the type signatures, the --- structure should not be too overwhelming. - -module X86.CodeGen ( - cmmTopCodeGen, - generateJumpTableForInstr, - extractUnwindPoints, - invertCondBranches, - InstrBlock -) - -where - -#include "HsVersions.h" - --- NCG stuff: -import GhcPrelude - -import X86.Instr -import X86.Cond -import X86.Regs -import X86.Ppr ( ) -import X86.RegInfo - -import GHC.Platform.Regs -import CPrim -import GHC.Cmm.DebugBlock ( DebugBlock(..), UnwindPoint(..), UnwindTable - , UnwindExpr(UwReg), toUnwindExpr ) -import Instruction -import PIC -import NCGMonad ( NatM, getNewRegNat, getNewLabelNat, setDeltaNat - , getDeltaNat, getBlockIdNat, getPicBaseNat, getNewRegPairNat - , getPicBaseMaybeNat, getDebugBlock, getFileId - , addImmediateSuccessorNat, updateCfgNat) -import CFG -import Format -import Reg -import GHC.Platform - --- Our intermediate code: -import BasicTypes -import GHC.Cmm.BlockId -import Module ( primUnitId ) -import GHC.Cmm.Utils -import GHC.Cmm.Switch -import GHC.Cmm -import GHC.Cmm.Dataflow.Block -import GHC.Cmm.Dataflow.Collections -import GHC.Cmm.Dataflow.Graph -import GHC.Cmm.Dataflow.Label -import GHC.Cmm.CLabel -import CoreSyn ( Tickish(..) ) -import SrcLoc ( srcSpanFile, srcSpanStartLine, srcSpanStartCol ) - --- The rest: -import ForeignCall ( CCallConv(..) ) -import OrdList -import Outputable -import FastString -import GHC.Driver.Session -import Util -import UniqSupply ( getUniqueM ) - -import Control.Monad -import Data.Bits -import Data.Foldable (fold) -import Data.Int -import Data.Maybe -import Data.Word - -import qualified Data.Map as M - -is32BitPlatform :: NatM Bool -is32BitPlatform = do - dflags <- getDynFlags - return $ target32Bit (targetPlatform dflags) - -sse2Enabled :: NatM Bool -sse2Enabled = do - dflags <- getDynFlags - case platformArch (targetPlatform dflags) of - -- We Assume SSE1 and SSE2 operations are available on both - -- x86 and x86_64. Historically we didn't default to SSE2 and - -- SSE1 on x86, which results in defacto nondeterminism for how - -- rounding behaves in the associated x87 floating point instructions - -- because variations in the spill/fpu stack placement of arguments for - -- operations would change the precision and final result of what - -- would otherwise be the same expressions with respect to single or - -- double precision IEEE floating point computations. - ArchX86_64 -> return True - ArchX86 -> return True - _ -> panic "trying to generate x86/x86_64 on the wrong platform" - - -sse4_2Enabled :: NatM Bool -sse4_2Enabled = do - dflags <- getDynFlags - return (isSse4_2Enabled dflags) - - -cmmTopCodeGen - :: RawCmmDecl - -> NatM [NatCmmDecl (Alignment, RawCmmStatics) Instr] - -cmmTopCodeGen (CmmProc info lab live graph) = do - let blocks = toBlockListEntryFirst graph - (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks - picBaseMb <- getPicBaseMaybeNat - dflags <- getDynFlags - let proc = CmmProc info lab live (ListGraph $ concat nat_blocks) - tops = proc : concat statics - os = platformOS $ targetPlatform dflags - - case picBaseMb of - Just picBase -> initializePicBase_x86 ArchX86 os picBase tops - Nothing -> return tops - -cmmTopCodeGen (CmmData sec dat) = do - return [CmmData sec (mkAlignment 1, dat)] -- no translation, we just use CmmStatic - -{- Note [Verifying basic blocks] - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - We want to guarantee a few things about the results - of instruction selection. - - Namely that each basic blocks consists of: - * A (potentially empty) sequence of straight line instructions - followed by - * A (potentially empty) sequence of jump like instructions. - - We can verify this by going through the instructions and - making sure that any non-jumpish instruction can't appear - after a jumpish instruction. - - There are gotchas however: - * CALLs are strictly speaking control flow but here we care - not about them. Hence we treat them as regular instructions. - - It's safe for them to appear inside a basic block - as (ignoring side effects inside the call) they will result in - straight line code. - - * NEWBLOCK marks the start of a new basic block so can - be followed by any instructions. --} - --- Verifying basic blocks is cheap, but not cheap enough to enable it unconditionally. -verifyBasicBlock :: [Instr] -> () -verifyBasicBlock instrs - | debugIsOn = go False instrs - | otherwise = () - where - go _ [] = () - go atEnd (i:instr) - = case i of - -- Start a new basic block - NEWBLOCK {} -> go False instr - -- Calls are not viable block terminators - CALL {} | atEnd -> faultyBlockWith i - | not atEnd -> go atEnd instr - -- All instructions ok, check if we reached the end and continue. - _ | not atEnd -> go (isJumpishInstr i) instr - -- Only jumps allowed at the end of basic blocks. - | otherwise -> if isJumpishInstr i - then go True instr - else faultyBlockWith i - faultyBlockWith i - = pprPanic "Non control flow instructions after end of basic block." - (ppr i <+> text "in:" $$ vcat (map ppr instrs)) - -basicBlockCodeGen - :: CmmBlock - -> NatM ( [NatBasicBlock Instr] - , [NatCmmDecl (Alignment, RawCmmStatics) Instr]) - -basicBlockCodeGen block = do - let (_, nodes, tail) = blockSplit block - id = entryLabel block - stmts = blockToList nodes - -- Generate location directive - dbg <- getDebugBlock (entryLabel block) - loc_instrs <- case dblSourceTick =<< dbg of - Just (SourceNote span name) - -> do fileId <- getFileId (srcSpanFile span) - let line = srcSpanStartLine span; col = srcSpanStartCol span - return $ unitOL $ LOCATION fileId line col name - _ -> return nilOL - (mid_instrs,mid_bid) <- stmtsToInstrs id stmts - (!tail_instrs,_) <- stmtToInstrs mid_bid tail - let instrs = loc_instrs `appOL` mid_instrs `appOL` tail_instrs - return $! verifyBasicBlock (fromOL instrs) - instrs' <- fold <$> traverse addSpUnwindings instrs - -- code generation may introduce new basic block boundaries, which - -- are indicated by the NEWBLOCK instruction. We must split up the - -- instruction stream into basic blocks again. Also, we extract - -- LDATAs here too. - let - (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs' - - mkBlocks (NEWBLOCK id) (instrs,blocks,statics) - = ([], BasicBlock id instrs : blocks, statics) - mkBlocks (LDATA sec dat) (instrs,blocks,statics) - = (instrs, blocks, CmmData sec dat:statics) - mkBlocks instr (instrs,blocks,statics) - = (instr:instrs, blocks, statics) - return (BasicBlock id top : other_blocks, statics) - --- | Convert 'DELTA' instructions into 'UNWIND' instructions to capture changes --- in the @sp@ register. See Note [What is this unwinding business?] in Debug --- for details. -addSpUnwindings :: Instr -> NatM (OrdList Instr) -addSpUnwindings instr@(DELTA d) = do - dflags <- getDynFlags - if debugLevel dflags >= 1 - then do lbl <- mkAsmTempLabel <$> getUniqueM - let unwind = M.singleton MachSp (Just $ UwReg MachSp $ negate d) - return $ toOL [ instr, UNWIND lbl unwind ] - else return (unitOL instr) -addSpUnwindings instr = return $ unitOL instr - -{- Note [Keeping track of the current block] - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -When generating instructions for Cmm we sometimes require -the current block for things like retry loops. - -We also sometimes change the current block, if a MachOP -results in branching control flow. - -Issues arise if we have two statements in the same block, -which both depend on the current block id *and* change the -basic block after them. This happens for atomic primops -in the X86 backend where we want to update the CFG data structure -when introducing new basic blocks. - -For example in #17334 we got this Cmm code: - - c3Bf: // global - (_s3t1::I64) = call MO_AtomicRMW W64 AMO_And(_s3sQ::P64 + 88, 18); - (_s3t4::I64) = call MO_AtomicRMW W64 AMO_Or(_s3sQ::P64 + 88, 0); - _s3sT::I64 = _s3sV::I64; - goto c3B1; - -This resulted in two new basic blocks being inserted: - - c3Bf: - movl $18,%vI_n3Bo - movq 88(%vI_s3sQ),%rax - jmp _n3Bp - n3Bp: - ... - cmpxchgq %vI_n3Bq,88(%vI_s3sQ) - jne _n3Bp - ... - jmp _n3Bs - n3Bs: - ... - cmpxchgq %vI_n3Bt,88(%vI_s3sQ) - jne _n3Bs - ... - jmp _c3B1 - ... - -Based on the Cmm we called stmtToInstrs we translated both atomic operations under -the assumption they would be placed into their Cmm basic block `c3Bf`. -However for the retry loop we introduce new labels, so this is not the case -for the second statement. -This resulted in a desync between the explicit control flow graph -we construct as a separate data type and the actual control flow graph in the code. - -Instead we now return the new basic block if a statement causes a change -in the current block and use the block for all following statements. - -For this reason genCCall is also split into two parts. -One for calls which *won't* change the basic blocks in -which successive instructions will be placed. -A different one for calls which *are* known to change the -basic block. - --} - --- See Note [Keeping track of the current block] for why --- we pass the BlockId. -stmtsToInstrs :: BlockId -- ^ Basic block these statement will start to be placed in. - -> [CmmNode O O] -- ^ Cmm Statement - -> NatM (InstrBlock, BlockId) -- ^ Resulting instruction -stmtsToInstrs bid stmts = - go bid stmts nilOL - where - go bid [] instrs = return (instrs,bid) - go bid (s:stmts) instrs = do - (instrs',bid') <- stmtToInstrs bid s - -- If the statement introduced a new block, we use that one - let !newBid = fromMaybe bid bid' - go newBid stmts (instrs `appOL` instrs') - --- | `bid` refers to the current block and is used to update the CFG --- if new blocks are inserted in the control flow. --- See Note [Keeping track of the current block] for more details. -stmtToInstrs :: BlockId -- ^ Basic block this statement will start to be placed in. - -> CmmNode e x - -> NatM (InstrBlock, Maybe BlockId) - -- ^ Instructions, and bid of new block if successive - -- statements are placed in a different basic block. -stmtToInstrs bid stmt = do - dflags <- getDynFlags - is32Bit <- is32BitPlatform - case stmt of - CmmUnsafeForeignCall target result_regs args - -> genCCall dflags is32Bit target result_regs args bid - - _ -> (,Nothing) <$> case stmt of - CmmComment s -> return (unitOL (COMMENT s)) - CmmTick {} -> return nilOL - - CmmUnwind regs -> do - let to_unwind_entry :: (GlobalReg, Maybe CmmExpr) -> UnwindTable - to_unwind_entry (reg, expr) = M.singleton reg (fmap toUnwindExpr expr) - case foldMap to_unwind_entry regs of - tbl | M.null tbl -> return nilOL - | otherwise -> do - lbl <- mkAsmTempLabel <$> getUniqueM - return $ unitOL $ UNWIND lbl tbl - - CmmAssign reg src - | isFloatType ty -> assignReg_FltCode format reg src - | is32Bit && isWord64 ty -> assignReg_I64Code reg src - | otherwise -> assignReg_IntCode format reg src - where ty = cmmRegType dflags reg - format = cmmTypeFormat ty - - CmmStore addr src - | isFloatType ty -> assignMem_FltCode format addr src - | is32Bit && isWord64 ty -> assignMem_I64Code addr src - | otherwise -> assignMem_IntCode format addr src - where ty = cmmExprType dflags src - format = cmmTypeFormat ty - - CmmBranch id -> return $ genBranch id - - --We try to arrange blocks such that the likely branch is the fallthrough - --in GHC.Cmm.ContFlowOpt. So we can assume the condition is likely false here. - CmmCondBranch arg true false _ -> genCondBranch bid true false arg - CmmSwitch arg ids -> do dflags <- getDynFlags - genSwitch dflags arg ids - CmmCall { cml_target = arg - , cml_args_regs = gregs } -> do - dflags <- getDynFlags - genJump arg (jumpRegs dflags gregs) - _ -> - panic "stmtToInstrs: statement should have been cps'd away" - - -jumpRegs :: DynFlags -> [GlobalReg] -> [Reg] -jumpRegs dflags gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ] - where platform = targetPlatform dflags - --------------------------------------------------------------------------------- --- | 'InstrBlock's are the insn sequences generated by the insn selectors. --- They are really trees of insns to facilitate fast appending, where a --- left-to-right traversal yields the insns in the correct order. --- -type InstrBlock - = OrdList Instr - - --- | Condition codes passed up the tree. --- -data CondCode - = CondCode Bool Cond InstrBlock - - --- | a.k.a "Register64" --- Reg is the lower 32-bit temporary which contains the result. --- Use getHiVRegFromLo to find the other VRegUnique. --- --- Rules of this simplified insn selection game are therefore that --- the returned Reg may be modified --- -data ChildCode64 - = ChildCode64 - InstrBlock - Reg - - --- | Register's passed up the tree. If the stix code forces the register --- to live in a pre-decided machine register, it comes out as @Fixed@; --- otherwise, it comes out as @Any@, and the parent can decide which --- register to put it in. --- -data Register - = Fixed Format Reg InstrBlock - | Any Format (Reg -> InstrBlock) - - -swizzleRegisterRep :: Register -> Format -> Register -swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code -swizzleRegisterRep (Any _ codefn) format = Any format codefn - - --- | Grab the Reg for a CmmReg -getRegisterReg :: Platform -> CmmReg -> Reg - -getRegisterReg _ (CmmLocal (LocalReg u pk)) - = -- by Assuming SSE2, Int,Word,Float,Double all can be register allocated - let fmt = cmmTypeFormat pk in - RegVirtual (mkVirtualReg u fmt) - -getRegisterReg platform (CmmGlobal mid) - = case globalRegMaybe platform mid of - Just reg -> RegReal $ reg - Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid) - -- By this stage, the only MagicIds remaining should be the - -- ones which map to a real machine register on this - -- platform. Hence ... - - --- | Memory addressing modes passed up the tree. -data Amode - = Amode AddrMode InstrBlock - -{- -Now, given a tree (the argument to a CmmLoad) that references memory, -produce a suitable addressing mode. - -A Rule of the Game (tm) for Amodes: use of the addr bit must -immediately follow use of the code part, since the code part puts -values in registers which the addr then refers to. So you can't put -anything in between, lest it overwrite some of those registers. If -you need to do some other computation between the code part and use of -the addr bit, first store the effective address from the amode in a -temporary, then do the other computation, and then use the temporary: - - code - LEA amode, tmp - ... other computation ... - ... (tmp) ... --} - - --- | Check whether an integer will fit in 32 bits. --- A CmmInt is intended to be truncated to the appropriate --- number of bits, so here we truncate it to Int64. This is --- important because e.g. -1 as a CmmInt might be either --- -1 or 18446744073709551615. --- -is32BitInteger :: Integer -> Bool -is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000 - where i64 = fromIntegral i :: Int64 - - --- | Convert a BlockId to some CmmStatic data -jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic -jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags)) -jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel) - where blockLabel = blockLbl blockid - - --- ----------------------------------------------------------------------------- --- General things for putting together code sequences - --- Expand CmmRegOff. ToDo: should we do it this way around, or convert --- CmmExprs into CmmRegOff? -mangleIndexTree :: DynFlags -> CmmReg -> Int -> CmmExpr -mangleIndexTree dflags reg off - = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)] - where width = typeWidth (cmmRegType dflags reg) - --- | The dual to getAnyReg: compute an expression into a register, but --- we don't mind which one it is. -getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock) -getSomeReg expr = do - r <- getRegister expr - case r of - Any rep code -> do - tmp <- getNewRegNat rep - return (tmp, code tmp) - Fixed _ reg code -> - return (reg, code) - - -assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock -assignMem_I64Code addrTree valueTree = do - Amode addr addr_code <- getAmode addrTree - ChildCode64 vcode rlo <- iselExpr64 valueTree - let - rhi = getHiVRegFromLo rlo - - -- Little-endian store - mov_lo = MOV II32 (OpReg rlo) (OpAddr addr) - mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4))) - return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi) - - -assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock -assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do - ChildCode64 vcode r_src_lo <- iselExpr64 valueTree - let - r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32 - r_dst_hi = getHiVRegFromLo r_dst_lo - r_src_hi = getHiVRegFromLo r_src_lo - mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo) - mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi) - return ( - vcode `snocOL` mov_lo `snocOL` mov_hi - ) - -assignReg_I64Code _ _ - = panic "assignReg_I64Code(i386): invalid lvalue" - - -iselExpr64 :: CmmExpr -> NatM ChildCode64 -iselExpr64 (CmmLit (CmmInt i _)) = do - (rlo,rhi) <- getNewRegPairNat II32 - let - r = fromIntegral (fromIntegral i :: Word32) - q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32) - code = toOL [ - MOV II32 (OpImm (ImmInteger r)) (OpReg rlo), - MOV II32 (OpImm (ImmInteger q)) (OpReg rhi) - ] - return (ChildCode64 code rlo) - -iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do - Amode addr addr_code <- getAmode addrTree - (rlo,rhi) <- getNewRegPairNat II32 - let - mov_lo = MOV II32 (OpAddr addr) (OpReg rlo) - mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi) - return ( - ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) - rlo - ) - -iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty - = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32)) - --- we handle addition, but rather badly -iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do - ChildCode64 code1 r1lo <- iselExpr64 e1 - (rlo,rhi) <- getNewRegPairNat II32 - let - r = fromIntegral (fromIntegral i :: Word32) - q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32) - r1hi = getHiVRegFromLo r1lo - code = code1 `appOL` - toOL [ MOV II32 (OpReg r1lo) (OpReg rlo), - ADD II32 (OpImm (ImmInteger r)) (OpReg rlo), - MOV II32 (OpReg r1hi) (OpReg rhi), - ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ] - return (ChildCode64 code rlo) - -iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do - ChildCode64 code1 r1lo <- iselExpr64 e1 - ChildCode64 code2 r2lo <- iselExpr64 e2 - (rlo,rhi) <- getNewRegPairNat II32 - let - r1hi = getHiVRegFromLo r1lo - r2hi = getHiVRegFromLo r2lo - code = code1 `appOL` - code2 `appOL` - toOL [ MOV II32 (OpReg r1lo) (OpReg rlo), - ADD II32 (OpReg r2lo) (OpReg rlo), - MOV II32 (OpReg r1hi) (OpReg rhi), - ADC II32 (OpReg r2hi) (OpReg rhi) ] - return (ChildCode64 code rlo) - -iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do - ChildCode64 code1 r1lo <- iselExpr64 e1 - ChildCode64 code2 r2lo <- iselExpr64 e2 - (rlo,rhi) <- getNewRegPairNat II32 - let - r1hi = getHiVRegFromLo r1lo - r2hi = getHiVRegFromLo r2lo - code = code1 `appOL` - code2 `appOL` - toOL [ MOV II32 (OpReg r1lo) (OpReg rlo), - SUB II32 (OpReg r2lo) (OpReg rlo), - MOV II32 (OpReg r1hi) (OpReg rhi), - SBB II32 (OpReg r2hi) (OpReg rhi) ] - return (ChildCode64 code rlo) - -iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do - fn <- getAnyReg expr - r_dst_lo <- getNewRegNat II32 - let r_dst_hi = getHiVRegFromLo r_dst_lo - code = fn r_dst_lo - return ( - ChildCode64 (code `snocOL` - MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi)) - r_dst_lo - ) - -iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do - fn <- getAnyReg expr - r_dst_lo <- getNewRegNat II32 - let r_dst_hi = getHiVRegFromLo r_dst_lo - code = fn r_dst_lo - return ( - ChildCode64 (code `snocOL` - MOV II32 (OpReg r_dst_lo) (OpReg eax) `snocOL` - CLTD II32 `snocOL` - MOV II32 (OpReg eax) (OpReg r_dst_lo) `snocOL` - MOV II32 (OpReg edx) (OpReg r_dst_hi)) - r_dst_lo - ) - -iselExpr64 expr - = pprPanic "iselExpr64(i386)" (ppr expr) - - --------------------------------------------------------------------------------- -getRegister :: CmmExpr -> NatM Register -getRegister e = do dflags <- getDynFlags - is32Bit <- is32BitPlatform - getRegister' dflags is32Bit e - -getRegister' :: DynFlags -> Bool -> CmmExpr -> NatM Register - -getRegister' dflags is32Bit (CmmReg reg) - = case reg of - CmmGlobal PicBaseReg - | is32Bit -> - -- on x86_64, we have %rip for PicBaseReg, but it's not - -- a full-featured register, it can only be used for - -- rip-relative addressing. - do reg' <- getPicBaseNat (archWordFormat is32Bit) - return (Fixed (archWordFormat is32Bit) reg' nilOL) - _ -> - do - let - fmt = cmmTypeFormat (cmmRegType dflags reg) - format = fmt - -- - let platform = targetPlatform dflags - return (Fixed format - (getRegisterReg platform reg) - nilOL) - - -getRegister' dflags is32Bit (CmmRegOff r n) - = getRegister' dflags is32Bit $ mangleIndexTree dflags r n - -getRegister' dflags is32Bit (CmmMachOp (MO_AlignmentCheck align _) [e]) - = addAlignmentCheck align <$> getRegister' dflags is32Bit e - --- for 32-bit architectures, support some 64 -> 32 bit conversions: --- TO_W_(x), TO_W_(x >> 32) - -getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32) - [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) - | is32Bit = do - ChildCode64 code rlo <- iselExpr64 x - return $ Fixed II32 (getHiVRegFromLo rlo) code - -getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32) - [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) - | is32Bit = do - ChildCode64 code rlo <- iselExpr64 x - return $ Fixed II32 (getHiVRegFromLo rlo) code - -getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32) [x]) - | is32Bit = do - ChildCode64 code rlo <- iselExpr64 x - return $ Fixed II32 rlo code - -getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32) [x]) - | is32Bit = do - ChildCode64 code rlo <- iselExpr64 x - return $ Fixed II32 rlo code - -getRegister' _ _ (CmmLit lit@(CmmFloat f w)) = - float_const_sse2 where - float_const_sse2 - | f == 0.0 = do - let - format = floatFormat w - code dst = unitOL (XOR format (OpReg dst) (OpReg dst)) - -- I don't know why there are xorpd, xorps, and pxor instructions. - -- They all appear to do the same thing --SDM - return (Any format code) - - | otherwise = do - Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit - loadFloatAmode w addr code - --- catch simple cases of zero- or sign-extended load -getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do - code <- intLoadCode (MOVZxL II8) addr - return (Any II32 code) - -getRegister' _ _ (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do - code <- intLoadCode (MOVSxL II8) addr - return (Any II32 code) - -getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do - code <- intLoadCode (MOVZxL II16) addr - return (Any II32 code) - -getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do - code <- intLoadCode (MOVSxL II16) addr - return (Any II32 code) - --- catch simple cases of zero- or sign-extended load -getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _]) - | not is32Bit = do - code <- intLoadCode (MOVZxL II8) addr - return (Any II64 code) - -getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _]) - | not is32Bit = do - code <- intLoadCode (MOVSxL II8) addr - return (Any II64 code) - -getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _]) - | not is32Bit = do - code <- intLoadCode (MOVZxL II16) addr - return (Any II64 code) - -getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _]) - | not is32Bit = do - code <- intLoadCode (MOVSxL II16) addr - return (Any II64 code) - -getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _]) - | not is32Bit = do - code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend - return (Any II64 code) - -getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _]) - | not is32Bit = do - code <- intLoadCode (MOVSxL II32) addr - return (Any II64 code) - -getRegister' _ is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg), - CmmLit displacement]) - | not is32Bit = do - return $ Any II64 (\dst -> unitOL $ - LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst)) - -getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps - case mop of - MO_F_Neg w -> sse2NegCode w x - - - MO_S_Neg w -> triv_ucode NEGI (intFormat w) - MO_Not w -> triv_ucode NOT (intFormat w) - - -- Nop conversions - MO_UU_Conv W32 W8 -> toI8Reg W32 x - MO_SS_Conv W32 W8 -> toI8Reg W32 x - MO_XX_Conv W32 W8 -> toI8Reg W32 x - MO_UU_Conv W16 W8 -> toI8Reg W16 x - MO_SS_Conv W16 W8 -> toI8Reg W16 x - MO_XX_Conv W16 W8 -> toI8Reg W16 x - MO_UU_Conv W32 W16 -> toI16Reg W32 x - MO_SS_Conv W32 W16 -> toI16Reg W32 x - MO_XX_Conv W32 W16 -> toI16Reg W32 x - - MO_UU_Conv W64 W32 | not is32Bit -> conversionNop II64 x - MO_SS_Conv W64 W32 | not is32Bit -> conversionNop II64 x - MO_XX_Conv W64 W32 | not is32Bit -> conversionNop II64 x - MO_UU_Conv W64 W16 | not is32Bit -> toI16Reg W64 x - MO_SS_Conv W64 W16 | not is32Bit -> toI16Reg W64 x - MO_XX_Conv W64 W16 | not is32Bit -> toI16Reg W64 x - MO_UU_Conv W64 W8 | not is32Bit -> toI8Reg W64 x - MO_SS_Conv W64 W8 | not is32Bit -> toI8Reg W64 x - MO_XX_Conv W64 W8 | not is32Bit -> toI8Reg W64 x - - MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x - MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x - MO_XX_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x - - -- widenings - MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x - MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x - MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x - - MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x - MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x - MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x - - -- We don't care about the upper bits for MO_XX_Conv, so MOV is enough. However, on 32-bit we - -- have 8-bit registers only for a few registers (as opposed to x86-64 where every register - -- has 8-bit version). So for 32-bit code, we'll just zero-extend. - MO_XX_Conv W8 W32 - | is32Bit -> integerExtend W8 W32 MOVZxL x - | otherwise -> integerExtend W8 W32 MOV x - MO_XX_Conv W8 W16 - | is32Bit -> integerExtend W8 W16 MOVZxL x - | otherwise -> integerExtend W8 W16 MOV x - MO_XX_Conv W16 W32 -> integerExtend W16 W32 MOV x - - MO_UU_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVZxL x - MO_UU_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVZxL x - MO_UU_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVZxL x - MO_SS_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVSxL x - MO_SS_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVSxL x - MO_SS_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVSxL x - -- For 32-to-64 bit zero extension, amd64 uses an ordinary movl. - -- However, we don't want the register allocator to throw it - -- away as an unnecessary reg-to-reg move, so we keep it in - -- the form of a movzl and print it as a movl later. - -- This doesn't apply to MO_XX_Conv since in this case we don't care about - -- the upper bits. So we can just use MOV. - MO_XX_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOV x - MO_XX_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOV x - MO_XX_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOV x - - MO_FF_Conv W32 W64 -> coerceFP2FP W64 x - - - MO_FF_Conv W64 W32 -> coerceFP2FP W32 x - - MO_FS_Conv from to -> coerceFP2Int from to x - MO_SF_Conv from to -> coerceInt2FP from to x - - MO_V_Insert {} -> needLlvm - MO_V_Extract {} -> needLlvm - MO_V_Add {} -> needLlvm - MO_V_Sub {} -> needLlvm - MO_V_Mul {} -> needLlvm - MO_VS_Quot {} -> needLlvm - MO_VS_Rem {} -> needLlvm - MO_VS_Neg {} -> needLlvm - MO_VU_Quot {} -> needLlvm - MO_VU_Rem {} -> needLlvm - MO_VF_Insert {} -> needLlvm - MO_VF_Extract {} -> needLlvm - MO_VF_Add {} -> needLlvm - MO_VF_Sub {} -> needLlvm - MO_VF_Mul {} -> needLlvm - MO_VF_Quot {} -> needLlvm - MO_VF_Neg {} -> needLlvm - - _other -> pprPanic "getRegister" (pprMachOp mop) - where - triv_ucode :: (Format -> Operand -> Instr) -> Format -> NatM Register - triv_ucode instr format = trivialUCode format (instr format) x - - -- signed or unsigned extension. - integerExtend :: Width -> Width - -> (Format -> Operand -> Operand -> Instr) - -> CmmExpr -> NatM Register - integerExtend from to instr expr = do - (reg,e_code) <- if from == W8 then getByteReg expr - else getSomeReg expr - let - code dst = - e_code `snocOL` - instr (intFormat from) (OpReg reg) (OpReg dst) - return (Any (intFormat to) code) - - toI8Reg :: Width -> CmmExpr -> NatM Register - toI8Reg new_rep expr - = do codefn <- getAnyReg expr - return (Any (intFormat new_rep) codefn) - -- HACK: use getAnyReg to get a byte-addressable register. - -- If the source was a Fixed register, this will add the - -- mov instruction to put it into the desired destination. - -- We're assuming that the destination won't be a fixed - -- non-byte-addressable register; it won't be, because all - -- fixed registers are word-sized. - - toI16Reg = toI8Reg -- for now - - conversionNop :: Format -> CmmExpr -> NatM Register - conversionNop new_format expr - = do e_code <- getRegister' dflags is32Bit expr - return (swizzleRegisterRep e_code new_format) - - -getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps - case mop of - MO_F_Eq _ -> condFltReg is32Bit EQQ x y - MO_F_Ne _ -> condFltReg is32Bit NE x y - MO_F_Gt _ -> condFltReg is32Bit GTT x y - MO_F_Ge _ -> condFltReg is32Bit GE x y - -- Invert comparison condition and swap operands - -- See Note [SSE Parity Checks] - MO_F_Lt _ -> condFltReg is32Bit GTT y x - MO_F_Le _ -> condFltReg is32Bit GE y x - - MO_Eq _ -> condIntReg EQQ x y - MO_Ne _ -> condIntReg NE x y - - MO_S_Gt _ -> condIntReg GTT x y - MO_S_Ge _ -> condIntReg GE x y - MO_S_Lt _ -> condIntReg LTT x y - MO_S_Le _ -> condIntReg LE x y - - MO_U_Gt _ -> condIntReg GU x y - MO_U_Ge _ -> condIntReg GEU x y - MO_U_Lt _ -> condIntReg LU x y - MO_U_Le _ -> condIntReg LEU x y - - MO_F_Add w -> trivialFCode_sse2 w ADD x y - - MO_F_Sub w -> trivialFCode_sse2 w SUB x y - - MO_F_Quot w -> trivialFCode_sse2 w FDIV x y - - MO_F_Mul w -> trivialFCode_sse2 w MUL x y - - - MO_Add rep -> add_code rep x y - MO_Sub rep -> sub_code rep x y - - MO_S_Quot rep -> div_code rep True True x y - MO_S_Rem rep -> div_code rep True False x y - MO_U_Quot rep -> div_code rep False True x y - MO_U_Rem rep -> div_code rep False False x y - - MO_S_MulMayOflo rep -> imulMayOflo rep x y - - MO_Mul W8 -> imulW8 x y - MO_Mul rep -> triv_op rep IMUL - MO_And rep -> triv_op rep AND - MO_Or rep -> triv_op rep OR - MO_Xor rep -> triv_op rep XOR - - {- Shift ops on x86s have constraints on their source, it - either has to be Imm, CL or 1 - => trivialCode is not restrictive enough (sigh.) - -} - MO_Shl rep -> shift_code rep SHL x y {-False-} - MO_U_Shr rep -> shift_code rep SHR x y {-False-} - MO_S_Shr rep -> shift_code rep SAR x y {-False-} - - MO_V_Insert {} -> needLlvm - MO_V_Extract {} -> needLlvm - MO_V_Add {} -> needLlvm - MO_V_Sub {} -> needLlvm - MO_V_Mul {} -> needLlvm - MO_VS_Quot {} -> needLlvm - MO_VS_Rem {} -> needLlvm - MO_VS_Neg {} -> needLlvm - MO_VF_Insert {} -> needLlvm - MO_VF_Extract {} -> needLlvm - MO_VF_Add {} -> needLlvm - MO_VF_Sub {} -> needLlvm - MO_VF_Mul {} -> needLlvm - MO_VF_Quot {} -> needLlvm - MO_VF_Neg {} -> needLlvm - - _other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop) - where - -------------------- - triv_op width instr = trivialCode width op (Just op) x y - where op = instr (intFormat width) - - -- Special case for IMUL for bytes, since the result of IMULB will be in - -- %ax, the split to %dx/%edx/%rdx and %ax/%eax/%rax happens only for wider - -- values. - imulW8 :: CmmExpr -> CmmExpr -> NatM Register - imulW8 arg_a arg_b = do - (a_reg, a_code) <- getNonClobberedReg arg_a - b_code <- getAnyReg arg_b - - let code = a_code `appOL` b_code eax `appOL` - toOL [ IMUL2 format (OpReg a_reg) ] - format = intFormat W8 - - return (Fixed format eax code) - - - imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register - imulMayOflo rep a b = do - (a_reg, a_code) <- getNonClobberedReg a - b_code <- getAnyReg b - let - shift_amt = case rep of - W32 -> 31 - W64 -> 63 - _ -> panic "shift_amt" - - format = intFormat rep - code = a_code `appOL` b_code eax `appOL` - toOL [ - IMUL2 format (OpReg a_reg), -- result in %edx:%eax - SAR format (OpImm (ImmInt shift_amt)) (OpReg eax), - -- sign extend lower part - SUB format (OpReg edx) (OpReg eax) - -- compare against upper - -- eax==0 if high part == sign extended low part - ] - return (Fixed format eax code) - - -------------------- - shift_code :: Width - -> (Format -> Operand -> Operand -> Instr) - -> CmmExpr - -> CmmExpr - -> NatM Register - - {- Case1: shift length as immediate -} - shift_code width instr x (CmmLit lit) = do - x_code <- getAnyReg x - let - format = intFormat width - code dst - = x_code dst `snocOL` - instr format (OpImm (litToImm lit)) (OpReg dst) - return (Any format code) - - {- Case2: shift length is complex (non-immediate) - * y must go in %ecx. - * we cannot do y first *and* put its result in %ecx, because - %ecx might be clobbered by x. - * if we do y second, then x cannot be - in a clobbered reg. Also, we cannot clobber x's reg - with the instruction itself. - * so we can either: - - do y first, put its result in a fresh tmp, then copy it to %ecx later - - do y second and put its result into %ecx. x gets placed in a fresh - tmp. This is likely to be better, because the reg alloc can - eliminate this reg->reg move here (it won't eliminate the other one, - because the move is into the fixed %ecx). - -} - shift_code width instr x y{-amount-} = do - x_code <- getAnyReg x - let format = intFormat width - tmp <- getNewRegNat format - y_code <- getAnyReg y - let - code = x_code tmp `appOL` - y_code ecx `snocOL` - instr format (OpReg ecx) (OpReg tmp) - return (Fixed format tmp code) - - -------------------- - add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register - add_code rep x (CmmLit (CmmInt y _)) - | is32BitInteger y = add_int rep x y - add_code rep x y = trivialCode rep (ADD format) (Just (ADD format)) x y - where format = intFormat rep - -- TODO: There are other interesting patterns we want to replace - -- with a LEA, e.g. `(x + offset) + (y << shift)`. - - -------------------- - sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register - sub_code rep x (CmmLit (CmmInt y _)) - | is32BitInteger (-y) = add_int rep x (-y) - sub_code rep x y = trivialCode rep (SUB (intFormat rep)) Nothing x y - - -- our three-operand add instruction: - add_int width x y = do - (x_reg, x_code) <- getSomeReg x - let - format = intFormat width - imm = ImmInt (fromInteger y) - code dst - = x_code `snocOL` - LEA format - (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm)) - (OpReg dst) - -- - return (Any format code) - - ---------------------- - - -- See Note [DIV/IDIV for bytes] - div_code W8 signed quotient x y = do - let widen | signed = MO_SS_Conv W8 W16 - | otherwise = MO_UU_Conv W8 W16 - div_code - W16 - signed - quotient - (CmmMachOp widen [x]) - (CmmMachOp widen [y]) - - div_code width signed quotient x y = do - (y_op, y_code) <- getRegOrMem y -- cannot be clobbered - x_code <- getAnyReg x - let - format = intFormat width - widen | signed = CLTD format - | otherwise = XOR format (OpReg edx) (OpReg edx) - - instr | signed = IDIV - | otherwise = DIV - - code = y_code `appOL` - x_code eax `appOL` - toOL [widen, instr format y_op] - - result | quotient = eax - | otherwise = edx - - return (Fixed format result code) - - -getRegister' _ _ (CmmLoad mem pk) - | isFloatType pk - = do - Amode addr mem_code <- getAmode mem - loadFloatAmode (typeWidth pk) addr mem_code - -getRegister' _ is32Bit (CmmLoad mem pk) - | is32Bit && not (isWord64 pk) - = do - code <- intLoadCode instr mem - return (Any format code) - where - width = typeWidth pk - format = intFormat width - instr = case width of - W8 -> MOVZxL II8 - _other -> MOV format - -- We always zero-extend 8-bit loads, if we - -- can't think of anything better. This is because - -- we can't guarantee access to an 8-bit variant of every register - -- (esi and edi don't have 8-bit variants), so to make things - -- simpler we do our 8-bit arithmetic with full 32-bit registers. - --- Simpler memory load code on x86_64 -getRegister' _ is32Bit (CmmLoad mem pk) - | not is32Bit - = do - code <- intLoadCode (MOV format) mem - return (Any format code) - where format = intFormat $ typeWidth pk - -getRegister' _ is32Bit (CmmLit (CmmInt 0 width)) - = let - format = intFormat width - - -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits - format1 = if is32Bit then format - else case format of - II64 -> II32 - _ -> format - code dst - = unitOL (XOR format1 (OpReg dst) (OpReg dst)) - in - return (Any format code) - - -- optimisation for loading small literals on x86_64: take advantage - -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit - -- instruction forms are shorter. -getRegister' dflags is32Bit (CmmLit lit) - | not is32Bit, isWord64 (cmmLitType dflags lit), not (isBigLit lit) - = let - imm = litToImm lit - code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst)) - in - return (Any II64 code) - where - isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff - isBigLit _ = False - -- note1: not the same as (not.is32BitLit), because that checks for - -- signed literals that fit in 32 bits, but we want unsigned - -- literals here. - -- note2: all labels are small, because we're assuming the - -- small memory model (see gcc docs, -mcmodel=small). - -getRegister' dflags _ (CmmLit lit) - = do let format = cmmTypeFormat (cmmLitType dflags lit) - imm = litToImm lit - code dst = unitOL (MOV format (OpImm imm) (OpReg dst)) - return (Any format code) - -getRegister' _ _ other - | isVecExpr other = needLlvm - | otherwise = pprPanic "getRegister(x86)" (ppr other) - - -intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr - -> NatM (Reg -> InstrBlock) -intLoadCode instr mem = do - Amode src mem_code <- getAmode mem - return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst)) - --- Compute an expression into *any* register, adding the appropriate --- move instruction if necessary. -getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock) -getAnyReg expr = do - r <- getRegister expr - anyReg r - -anyReg :: Register -> NatM (Reg -> InstrBlock) -anyReg (Any _ code) = return code -anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst) - --- A bit like getSomeReg, but we want a reg that can be byte-addressed. --- Fixed registers might not be byte-addressable, so we make sure we've --- got a temporary, inserting an extra reg copy if necessary. -getByteReg :: CmmExpr -> NatM (Reg, InstrBlock) -getByteReg expr = do - is32Bit <- is32BitPlatform - if is32Bit - then do r <- getRegister expr - case r of - Any rep code -> do - tmp <- getNewRegNat rep - return (tmp, code tmp) - Fixed rep reg code - | isVirtualReg reg -> return (reg,code) - | otherwise -> do - tmp <- getNewRegNat rep - return (tmp, code `snocOL` reg2reg rep reg tmp) - -- ToDo: could optimise slightly by checking for - -- byte-addressable real registers, but that will - -- happen very rarely if at all. - else getSomeReg expr -- all regs are byte-addressable on x86_64 - --- Another variant: this time we want the result in a register that cannot --- be modified by code to evaluate an arbitrary expression. -getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock) -getNonClobberedReg expr = do - dflags <- getDynFlags - r <- getRegister expr - case r of - Any rep code -> do - tmp <- getNewRegNat rep - return (tmp, code tmp) - Fixed rep reg code - -- only certain regs can be clobbered - | reg `elem` instrClobberedRegs (targetPlatform dflags) - -> do - tmp <- getNewRegNat rep - return (tmp, code `snocOL` reg2reg rep reg tmp) - | otherwise -> - return (reg, code) - -reg2reg :: Format -> Reg -> Reg -> Instr -reg2reg format src dst = MOV format (OpReg src) (OpReg dst) - - --------------------------------------------------------------------------------- -getAmode :: CmmExpr -> NatM Amode -getAmode e = do is32Bit <- is32BitPlatform - getAmode' is32Bit e - -getAmode' :: Bool -> CmmExpr -> NatM Amode -getAmode' _ (CmmRegOff r n) = do dflags <- getDynFlags - getAmode $ mangleIndexTree dflags r n - -getAmode' is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg), - CmmLit displacement]) - | not is32Bit - = return $ Amode (ripRel (litToImm displacement)) nilOL - - --- This is all just ridiculous, since it carefully undoes --- what mangleIndexTree has just done. -getAmode' is32Bit (CmmMachOp (MO_Sub _rep) [x, CmmLit lit@(CmmInt i _)]) - | is32BitLit is32Bit lit - -- ASSERT(rep == II32)??? - = do (x_reg, x_code) <- getSomeReg x - let off = ImmInt (-(fromInteger i)) - return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code) - -getAmode' is32Bit (CmmMachOp (MO_Add _rep) [x, CmmLit lit]) - | is32BitLit is32Bit lit - -- ASSERT(rep == II32)??? - = do (x_reg, x_code) <- getSomeReg x - let off = litToImm lit - return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code) - --- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be --- recognised by the next rule. -getAmode' is32Bit (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _), - b@(CmmLit _)]) - = getAmode' is32Bit (CmmMachOp (MO_Add rep) [b,a]) - --- Matches: (x + offset) + (y << shift) -getAmode' _ (CmmMachOp (MO_Add _) [CmmRegOff x offset, - CmmMachOp (MO_Shl _) - [y, CmmLit (CmmInt shift _)]]) - | shift == 0 || shift == 1 || shift == 2 || shift == 3 - = x86_complex_amode (CmmReg x) y shift (fromIntegral offset) - -getAmode' _ (CmmMachOp (MO_Add _) [x, CmmMachOp (MO_Shl _) - [y, CmmLit (CmmInt shift _)]]) - | shift == 0 || shift == 1 || shift == 2 || shift == 3 - = x86_complex_amode x y shift 0 - -getAmode' _ (CmmMachOp (MO_Add _) - [x, CmmMachOp (MO_Add _) - [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)], - CmmLit (CmmInt offset _)]]) - | shift == 0 || shift == 1 || shift == 2 || shift == 3 - && is32BitInteger offset - = x86_complex_amode x y shift offset - -getAmode' _ (CmmMachOp (MO_Add _) [x,y]) - = x86_complex_amode x y 0 0 - -getAmode' is32Bit (CmmLit lit) | is32BitLit is32Bit lit - = return (Amode (ImmAddr (litToImm lit) 0) nilOL) - -getAmode' _ expr = do - (reg,code) <- getSomeReg expr - return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code) - --- | Like 'getAmode', but on 32-bit use simple register addressing --- (i.e. no index register). This stops us from running out of --- registers on x86 when using instructions such as cmpxchg, which can --- use up to three virtual registers and one fixed register. -getSimpleAmode :: DynFlags -> Bool -> CmmExpr -> NatM Amode -getSimpleAmode dflags is32Bit addr - | is32Bit = do - addr_code <- getAnyReg addr - addr_r <- getNewRegNat (intFormat (wordWidth dflags)) - let amode = AddrBaseIndex (EABaseReg addr_r) EAIndexNone (ImmInt 0) - return $! Amode amode (addr_code addr_r) - | otherwise = getAmode addr - -x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode -x86_complex_amode base index shift offset - = do (x_reg, x_code) <- getNonClobberedReg base - -- x must be in a temp, because it has to stay live over y_code - -- we could compare x_reg and y_reg and do something better here... - (y_reg, y_code) <- getSomeReg index - let - code = x_code `appOL` y_code - base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8; - n -> panic $ "x86_complex_amode: unhandled shift! (" ++ show n ++ ")" - return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset))) - code) - - - - --- ----------------------------------------------------------------------------- --- getOperand: sometimes any operand will do. - --- getNonClobberedOperand: the value of the operand will remain valid across --- the computation of an arbitrary expression, unless the expression --- is computed directly into a register which the operand refers to --- (see trivialCode where this function is used for an example). - -getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock) -getNonClobberedOperand (CmmLit lit) = do - if isSuitableFloatingPointLit lit - then do - let CmmFloat _ w = lit - Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit - return (OpAddr addr, code) - else do - - is32Bit <- is32BitPlatform - dflags <- getDynFlags - if is32BitLit is32Bit lit && not (isFloatType (cmmLitType dflags lit)) - then return (OpImm (litToImm lit), nilOL) - else getNonClobberedOperand_generic (CmmLit lit) - -getNonClobberedOperand (CmmLoad mem pk) = do - is32Bit <- is32BitPlatform - -- this logic could be simplified - -- TODO FIXME - if (if is32Bit then not (isWord64 pk) else True) - -- if 32bit and pk is at float/double/simd value - -- or if 64bit - -- this could use some eyeballs or i'll need to stare at it more later - then do - dflags <- getDynFlags - let platform = targetPlatform dflags - Amode src mem_code <- getAmode mem - (src',save_code) <- - if (amodeCouldBeClobbered platform src) - then do - tmp <- getNewRegNat (archWordFormat is32Bit) - return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0), - unitOL (LEA (archWordFormat is32Bit) - (OpAddr src) - (OpReg tmp))) - else - return (src, nilOL) - return (OpAddr src', mem_code `appOL` save_code) - else do - -- if its a word or gcptr on 32bit? - getNonClobberedOperand_generic (CmmLoad mem pk) - -getNonClobberedOperand e = getNonClobberedOperand_generic e - -getNonClobberedOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock) -getNonClobberedOperand_generic e = do - (reg, code) <- getNonClobberedReg e - return (OpReg reg, code) - -amodeCouldBeClobbered :: Platform -> AddrMode -> Bool -amodeCouldBeClobbered platform amode = any (regClobbered platform) (addrModeRegs amode) - -regClobbered :: Platform -> Reg -> Bool -regClobbered platform (RegReal (RealRegSingle rr)) = freeReg platform rr -regClobbered _ _ = False - --- getOperand: the operand is not required to remain valid across the --- computation of an arbitrary expression. -getOperand :: CmmExpr -> NatM (Operand, InstrBlock) - -getOperand (CmmLit lit) = do - use_sse2 <- sse2Enabled - if (use_sse2 && isSuitableFloatingPointLit lit) - then do - let CmmFloat _ w = lit - Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit - return (OpAddr addr, code) - else do - - is32Bit <- is32BitPlatform - dflags <- getDynFlags - if is32BitLit is32Bit lit && not (isFloatType (cmmLitType dflags lit)) - then return (OpImm (litToImm lit), nilOL) - else getOperand_generic (CmmLit lit) - -getOperand (CmmLoad mem pk) = do - is32Bit <- is32BitPlatform - use_sse2 <- sse2Enabled - if (not (isFloatType pk) || use_sse2) && (if is32Bit then not (isWord64 pk) else True) - then do - Amode src mem_code <- getAmode mem - return (OpAddr src, mem_code) - else - getOperand_generic (CmmLoad mem pk) - -getOperand e = getOperand_generic e - -getOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock) -getOperand_generic e = do - (reg, code) <- getSomeReg e - return (OpReg reg, code) - -isOperand :: Bool -> CmmExpr -> Bool -isOperand _ (CmmLoad _ _) = True -isOperand is32Bit (CmmLit lit) = is32BitLit is32Bit lit - || isSuitableFloatingPointLit lit -isOperand _ _ = False - --- | Given a 'Register', produce a new 'Register' with an instruction block --- which will check the value for alignment. Used for @-falignment-sanitisation@. -addAlignmentCheck :: Int -> Register -> Register -addAlignmentCheck align reg = - case reg of - Fixed fmt reg code -> Fixed fmt reg (code `appOL` check fmt reg) - Any fmt f -> Any fmt (\reg -> f reg `appOL` check fmt reg) - where - check :: Format -> Reg -> InstrBlock - check fmt reg = - ASSERT(not $ isFloatFormat fmt) - toOL [ TEST fmt (OpImm $ ImmInt $ align-1) (OpReg reg) - , JXX_GBL NE $ ImmCLbl mkBadAlignmentLabel - ] - -memConstant :: Alignment -> CmmLit -> NatM Amode -memConstant align lit = do - lbl <- getNewLabelNat - let rosection = Section ReadOnlyData lbl - dflags <- getDynFlags - (addr, addr_code) <- if target32Bit (targetPlatform dflags) - then do dynRef <- cmmMakeDynamicReference - dflags - DataReference - lbl - Amode addr addr_code <- getAmode dynRef - return (addr, addr_code) - else return (ripRel (ImmCLbl lbl), nilOL) - let code = - LDATA rosection (align, RawCmmStatics lbl [CmmStaticLit lit]) - `consOL` addr_code - return (Amode addr code) - - -loadFloatAmode :: Width -> AddrMode -> InstrBlock -> NatM Register -loadFloatAmode w addr addr_code = do - let format = floatFormat w - code dst = addr_code `snocOL` - MOV format (OpAddr addr) (OpReg dst) - - return (Any format code) - - --- if we want a floating-point literal as an operand, we can --- use it directly from memory. However, if the literal is --- zero, we're better off generating it into a register using --- xor. -isSuitableFloatingPointLit :: CmmLit -> Bool -isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0 -isSuitableFloatingPointLit _ = False - -getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock) -getRegOrMem e@(CmmLoad mem pk) = do - is32Bit <- is32BitPlatform - use_sse2 <- sse2Enabled - if (not (isFloatType pk) || use_sse2) && (if is32Bit then not (isWord64 pk) else True) - then do - Amode src mem_code <- getAmode mem - return (OpAddr src, mem_code) - else do - (reg, code) <- getNonClobberedReg e - return (OpReg reg, code) -getRegOrMem e = do - (reg, code) <- getNonClobberedReg e - return (OpReg reg, code) - -is32BitLit :: Bool -> CmmLit -> Bool -is32BitLit is32Bit (CmmInt i W64) - | not is32Bit - = -- assume that labels are in the range 0-2^31-1: this assumes the - -- small memory model (see gcc docs, -mcmodel=small). - is32BitInteger i -is32BitLit _ _ = True - - - - --- Set up a condition code for a conditional branch. - -getCondCode :: CmmExpr -> NatM CondCode - --- yes, they really do seem to want exactly the same! - -getCondCode (CmmMachOp mop [x, y]) - = - case mop of - MO_F_Eq W32 -> condFltCode EQQ x y - MO_F_Ne W32 -> condFltCode NE x y - MO_F_Gt W32 -> condFltCode GTT x y - MO_F_Ge W32 -> condFltCode GE x y - -- Invert comparison condition and swap operands - -- See Note [SSE Parity Checks] - MO_F_Lt W32 -> condFltCode GTT y x - MO_F_Le W32 -> condFltCode GE y x - - MO_F_Eq W64 -> condFltCode EQQ x y - MO_F_Ne W64 -> condFltCode NE x y - MO_F_Gt W64 -> condFltCode GTT x y - MO_F_Ge W64 -> condFltCode GE x y - MO_F_Lt W64 -> condFltCode GTT y x - MO_F_Le W64 -> condFltCode GE y x - - _ -> condIntCode (machOpToCond mop) x y - -getCondCode other = pprPanic "getCondCode(2)(x86,x86_64)" (ppr other) - -machOpToCond :: MachOp -> Cond -machOpToCond mo = case mo of - MO_Eq _ -> EQQ - MO_Ne _ -> NE - MO_S_Gt _ -> GTT - MO_S_Ge _ -> GE - MO_S_Lt _ -> LTT - MO_S_Le _ -> LE - MO_U_Gt _ -> GU - MO_U_Ge _ -> GEU - MO_U_Lt _ -> LU - MO_U_Le _ -> LEU - _other -> pprPanic "machOpToCond" (pprMachOp mo) - - --- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be --- passed back up the tree. - -condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode -condIntCode cond x y = do is32Bit <- is32BitPlatform - condIntCode' is32Bit cond x y - -condIntCode' :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode - --- memory vs immediate -condIntCode' is32Bit cond (CmmLoad x pk) (CmmLit lit) - | is32BitLit is32Bit lit = do - Amode x_addr x_code <- getAmode x - let - imm = litToImm lit - code = x_code `snocOL` - CMP (cmmTypeFormat pk) (OpImm imm) (OpAddr x_addr) - -- - return (CondCode False cond code) - --- anything vs zero, using a mask --- TODO: Add some sanity checking!!!! -condIntCode' is32Bit cond (CmmMachOp (MO_And _) [x,o2]) (CmmLit (CmmInt 0 pk)) - | (CmmLit lit@(CmmInt mask _)) <- o2, is32BitLit is32Bit lit - = do - (x_reg, x_code) <- getSomeReg x - let - code = x_code `snocOL` - TEST (intFormat pk) (OpImm (ImmInteger mask)) (OpReg x_reg) - -- - return (CondCode False cond code) - --- anything vs zero -condIntCode' _ cond x (CmmLit (CmmInt 0 pk)) = do - (x_reg, x_code) <- getSomeReg x - let - code = x_code `snocOL` - TEST (intFormat pk) (OpReg x_reg) (OpReg x_reg) - -- - return (CondCode False cond code) - --- anything vs operand -condIntCode' is32Bit cond x y - | isOperand is32Bit y = do - dflags <- getDynFlags - (x_reg, x_code) <- getNonClobberedReg x - (y_op, y_code) <- getOperand y - let - code = x_code `appOL` y_code `snocOL` - CMP (cmmTypeFormat (cmmExprType dflags x)) y_op (OpReg x_reg) - return (CondCode False cond code) --- operand vs. anything: invert the comparison so that we can use a --- single comparison instruction. - | isOperand is32Bit x - , Just revcond <- maybeFlipCond cond = do - dflags <- getDynFlags - (y_reg, y_code) <- getNonClobberedReg y - (x_op, x_code) <- getOperand x - let - code = y_code `appOL` x_code `snocOL` - CMP (cmmTypeFormat (cmmExprType dflags x)) x_op (OpReg y_reg) - return (CondCode False revcond code) - --- anything vs anything -condIntCode' _ cond x y = do - dflags <- getDynFlags - (y_reg, y_code) <- getNonClobberedReg y - (x_op, x_code) <- getRegOrMem x - let - code = y_code `appOL` - x_code `snocOL` - CMP (cmmTypeFormat (cmmExprType dflags x)) (OpReg y_reg) x_op - return (CondCode False cond code) - - - --------------------------------------------------------------------------------- -condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode - -condFltCode cond x y - = condFltCode_sse2 - where - - - -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be - -- an operand, but the right must be a reg. We can probably do better - -- than this general case... - condFltCode_sse2 = do - dflags <- getDynFlags - (x_reg, x_code) <- getNonClobberedReg x - (y_op, y_code) <- getOperand y - let - code = x_code `appOL` - y_code `snocOL` - CMP (floatFormat $ cmmExprWidth dflags x) y_op (OpReg x_reg) - -- NB(1): we need to use the unsigned comparison operators on the - -- result of this comparison. - return (CondCode True (condToUnsigned cond) code) - --- ----------------------------------------------------------------------------- --- Generating assignments - --- Assignments are really at the heart of the whole code generation --- business. Almost all top-level nodes of any real importance are --- assignments, which correspond to loads, stores, or register --- transfers. If we're really lucky, some of the register transfers --- will go away, because we can use the destination register to --- complete the code generation for the right hand side. This only --- fails when the right hand side is forced into a fixed register --- (e.g. the result of a call). - -assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock -assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock - -assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock -assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock - - --- integer assignment to memory - --- specific case of adding/subtracting an integer to a particular address. --- ToDo: catch other cases where we can use an operation directly on a memory --- address. -assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _, - CmmLit (CmmInt i _)]) - | addr == addr2, pk /= II64 || is32BitInteger i, - Just instr <- check op - = do Amode amode code_addr <- getAmode addr - let code = code_addr `snocOL` - instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode) - return code - where - check (MO_Add _) = Just ADD - check (MO_Sub _) = Just SUB - check _ = Nothing - -- ToDo: more? - --- general case -assignMem_IntCode pk addr src = do - is32Bit <- is32BitPlatform - Amode addr code_addr <- getAmode addr - (code_src, op_src) <- get_op_RI is32Bit src - let - code = code_src `appOL` - code_addr `snocOL` - MOV pk op_src (OpAddr addr) - -- NOTE: op_src is stable, so it will still be valid - -- after code_addr. This may involve the introduction - -- of an extra MOV to a temporary register, but we hope - -- the register allocator will get rid of it. - -- - return code - where - get_op_RI :: Bool -> CmmExpr -> NatM (InstrBlock,Operand) -- code, operator - get_op_RI is32Bit (CmmLit lit) | is32BitLit is32Bit lit - = return (nilOL, OpImm (litToImm lit)) - get_op_RI _ op - = do (reg,code) <- getNonClobberedReg op - return (code, OpReg reg) - - --- Assign; dst is a reg, rhs is mem -assignReg_IntCode pk reg (CmmLoad src _) = do - load_code <- intLoadCode (MOV pk) src - dflags <- getDynFlags - let platform = targetPlatform dflags - return (load_code (getRegisterReg platform reg)) - --- dst is a reg, but src could be anything -assignReg_IntCode _ reg src = do - dflags <- getDynFlags - let platform = targetPlatform dflags - code <- getAnyReg src - return (code (getRegisterReg platform reg)) - - --- Floating point assignment to memory -assignMem_FltCode pk addr src = do - (src_reg, src_code) <- getNonClobberedReg src - Amode addr addr_code <- getAmode addr - let - code = src_code `appOL` - addr_code `snocOL` - MOV pk (OpReg src_reg) (OpAddr addr) - - return code - --- Floating point assignment to a register/temporary -assignReg_FltCode _ reg src = do - src_code <- getAnyReg src - dflags <- getDynFlags - let platform = targetPlatform dflags - return (src_code (getRegisterReg platform reg)) - - -genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock - -genJump (CmmLoad mem _) regs = do - Amode target code <- getAmode mem - return (code `snocOL` JMP (OpAddr target) regs) - -genJump (CmmLit lit) regs = do - return (unitOL (JMP (OpImm (litToImm lit)) regs)) - -genJump expr regs = do - (reg,code) <- getSomeReg expr - return (code `snocOL` JMP (OpReg reg) regs) - - --- ----------------------------------------------------------------------------- --- Unconditional branches - -genBranch :: BlockId -> InstrBlock -genBranch = toOL . mkJumpInstr - - - --- ----------------------------------------------------------------------------- --- Conditional jumps/branches - -{- -Conditional jumps are always to local labels, so we can use branch -instructions. We peek at the arguments to decide what kind of -comparison to do. - -I386: First, we have to ensure that the condition -codes are set according to the supplied comparison operation. --} - - -genCondBranch - :: BlockId -- the source of the jump - -> BlockId -- the true branch target - -> BlockId -- the false branch target - -> CmmExpr -- the condition on which to branch - -> NatM InstrBlock -- Instructions - -genCondBranch bid id false expr = do - is32Bit <- is32BitPlatform - genCondBranch' is32Bit bid id false expr - --- | We return the instructions generated. -genCondBranch' :: Bool -> BlockId -> BlockId -> BlockId -> CmmExpr - -> NatM InstrBlock - --- 64-bit integer comparisons on 32-bit -genCondBranch' is32Bit _bid true false (CmmMachOp mop [e1,e2]) - | is32Bit, Just W64 <- maybeIntComparison mop = do - ChildCode64 code1 r1_lo <- iselExpr64 e1 - ChildCode64 code2 r2_lo <- iselExpr64 e2 - let r1_hi = getHiVRegFromLo r1_lo - r2_hi = getHiVRegFromLo r2_lo - cond = machOpToCond mop - Just cond' = maybeFlipCond cond - --TODO: Update CFG for x86 - let code = code1 `appOL` code2 `appOL` toOL [ - CMP II32 (OpReg r2_hi) (OpReg r1_hi), - JXX cond true, - JXX cond' false, - CMP II32 (OpReg r2_lo) (OpReg r1_lo), - JXX cond true] `appOL` genBranch false - return code - -genCondBranch' _ bid id false bool = do - CondCode is_float cond cond_code <- getCondCode bool - use_sse2 <- sse2Enabled - if not is_float || not use_sse2 - then - return (cond_code `snocOL` JXX cond id `appOL` genBranch false) - else do - -- See Note [SSE Parity Checks] - let jmpFalse = genBranch false - code - = case cond of - NE -> or_unordered - GU -> plain_test - GEU -> plain_test - -- Use ASSERT so we don't break releases if - -- LTT/LE creep in somehow. - LTT -> - ASSERT2(False, ppr "Should have been turned into >") - and_ordered - LE -> - ASSERT2(False, ppr "Should have been turned into >=") - and_ordered - _ -> and_ordered - - plain_test = unitOL ( - JXX cond id - ) `appOL` jmpFalse - or_unordered = toOL [ - JXX cond id, - JXX PARITY id - ] `appOL` jmpFalse - and_ordered = toOL [ - JXX PARITY false, - JXX cond id, - JXX ALWAYS false - ] - updateCfgNat (\cfg -> adjustEdgeWeight cfg (+3) bid false) - return (cond_code `appOL` code) - -{- Note [Introducing cfg edges inside basic blocks] - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - During instruction selection a statement `s` - in a block B with control of the sort: B -> C - will sometimes result in control - flow of the sort: - - ┌ < ┐ - v ^ - B -> B1 ┴ -> C - - as is the case for some atomic operations. - - Now to keep the CFG in sync when introducing B1 we clearly - want to insert it between B and C. However there is - a catch when we have to deal with self loops. - - We might start with code and a CFG of these forms: - - loop: - stmt1 ┌ < ┐ - .... v ^ - stmtX loop ┘ - stmtY - .... - goto loop: - - Now we introduce B1: - ┌ ─ ─ ─ ─ ─┐ - loop: │ ┌ < ┐ │ - instrs v │ │ ^ - .... loop ┴ B1 ┴ ┘ - instrsFromX - stmtY - goto loop: - - This is simple, all outgoing edges from loop now simply - start from B1 instead and the code generator knows which - new edges it introduced for the self loop of B1. - - Disaster strikes if the statement Y follows the same pattern. - If we apply the same rule that all outgoing edges change then - we end up with: - - loop ─> B1 ─> B2 ┬─┐ - │ │ └─<┤ │ - │ └───<───┘ │ - └───────<────────┘ - - This is problematic. The edge B1->B1 is modified as expected. - However the modification is wrong! - - The assembly in this case looked like this: - - _loop: - <instrs> - _B1: - ... - cmpxchgq ... - jne _B1 - <instrs> - <end _B1> - _B2: - ... - cmpxchgq ... - jne _B2 - <instrs> - jmp loop - - There is no edge _B2 -> _B1 here. It's still a self loop onto _B1. - - The problem here is that really B1 should be two basic blocks. - Otherwise we have control flow in the *middle* of a basic block. - A contradiction! - - So to account for this we add yet another basic block marker: - - _B: - <instrs> - _B1: - ... - cmpxchgq ... - jne _B1 - jmp _B1' - _B1': - <instrs> - <end _B1> - _B2: - ... - - Now when inserting B2 we will only look at the outgoing edges of B1' and - everything will work out nicely. - - You might also wonder why we don't insert jumps at the end of _B1'. There is - no way another block ends up jumping to the labels _B1 or _B2 since they are - essentially invisible to other blocks. View them as control flow labels local - to the basic block if you'd like. - - Not doing this ultimately caused (part 2 of) #17334. --} - - --- ----------------------------------------------------------------------------- --- Generating C calls - --- Now the biggest nightmare---calls. Most of the nastiness is buried in --- @get_arg@, which moves the arguments to the correct registers/stack --- locations. Apart from that, the code is easy. --- --- (If applicable) Do not fill the delay slots here; you will confuse the --- register allocator. --- --- See Note [Keeping track of the current block] for information why we need --- to take/return a block id. - -genCCall - :: DynFlags - -> Bool -- 32 bit platform? - -> ForeignTarget -- function to call - -> [CmmFormal] -- where to put the result - -> [CmmActual] -- arguments (of mixed type) - -> BlockId -- The block we are in - -> NatM (InstrBlock, Maybe BlockId) - --- First we deal with cases which might introduce new blocks in the stream. - -genCCall dflags is32Bit (PrimTarget (MO_AtomicRMW width amop)) - [dst] [addr, n] bid = do - Amode amode addr_code <- - if amop `elem` [AMO_Add, AMO_Sub] - then getAmode addr - else getSimpleAmode dflags is32Bit addr -- See genCCall for MO_Cmpxchg - arg <- getNewRegNat format - arg_code <- getAnyReg n - let platform = targetPlatform dflags - dst_r = getRegisterReg platform (CmmLocal dst) - (code, lbl) <- op_code dst_r arg amode - return (addr_code `appOL` arg_code arg `appOL` code, Just lbl) - where - -- Code for the operation - op_code :: Reg -- Destination reg - -> Reg -- Register containing argument - -> AddrMode -- Address of location to mutate - -> NatM (OrdList Instr,BlockId) -- TODO: Return Maybe BlockId - op_code dst_r arg amode = case amop of - -- In the common case where dst_r is a virtual register the - -- final move should go away, because it's the last use of arg - -- and the first use of dst_r. - AMO_Add -> return $ (toOL [ LOCK (XADD format (OpReg arg) (OpAddr amode)) - , MOV format (OpReg arg) (OpReg dst_r) - ], bid) - AMO_Sub -> return $ (toOL [ NEGI format (OpReg arg) - , LOCK (XADD format (OpReg arg) (OpAddr amode)) - , MOV format (OpReg arg) (OpReg dst_r) - ], bid) - -- In these cases we need a new block id, and have to return it so - -- that later instruction selection can reference it. - AMO_And -> cmpxchg_code (\ src dst -> unitOL $ AND format src dst) - AMO_Nand -> cmpxchg_code (\ src dst -> toOL [ AND format src dst - , NOT format dst - ]) - AMO_Or -> cmpxchg_code (\ src dst -> unitOL $ OR format src dst) - AMO_Xor -> cmpxchg_code (\ src dst -> unitOL $ XOR format src dst) - where - -- Simulate operation that lacks a dedicated instruction using - -- cmpxchg. - cmpxchg_code :: (Operand -> Operand -> OrdList Instr) - -> NatM (OrdList Instr, BlockId) - cmpxchg_code instrs = do - lbl1 <- getBlockIdNat - lbl2 <- getBlockIdNat - tmp <- getNewRegNat format - - --Record inserted blocks - -- We turn A -> B into A -> A' -> A'' -> B - -- with a self loop on A'. - addImmediateSuccessorNat bid lbl1 - addImmediateSuccessorNat lbl1 lbl2 - updateCfgNat (addWeightEdge lbl1 lbl1 0) - - return $ (toOL - [ MOV format (OpAddr amode) (OpReg eax) - , JXX ALWAYS lbl1 - , NEWBLOCK lbl1 - -- Keep old value so we can return it: - , MOV format (OpReg eax) (OpReg dst_r) - , MOV format (OpReg eax) (OpReg tmp) - ] - `appOL` instrs (OpReg arg) (OpReg tmp) `appOL` toOL - [ LOCK (CMPXCHG format (OpReg tmp) (OpAddr amode)) - , JXX NE lbl1 - -- See Note [Introducing cfg edges inside basic blocks] - -- why this basic block is required. - , JXX ALWAYS lbl2 - , NEWBLOCK lbl2 - ], - lbl2) - format = intFormat width - -genCCall dflags is32Bit (PrimTarget (MO_Ctz width)) [dst] [src] bid - | is32Bit, width == W64 = do - ChildCode64 vcode rlo <- iselExpr64 src - let rhi = getHiVRegFromLo rlo - dst_r = getRegisterReg platform (CmmLocal dst) - lbl1 <- getBlockIdNat - lbl2 <- getBlockIdNat - let format = if width == W8 then II16 else intFormat width - tmp_r <- getNewRegNat format - - -- New CFG Edges: - -- bid -> lbl2 - -- bid -> lbl1 -> lbl2 - -- We also changes edges originating at bid to start at lbl2 instead. - updateCfgNat (addWeightEdge bid lbl1 110 . - addWeightEdge lbl1 lbl2 110 . - addImmediateSuccessor bid lbl2) - - -- The following instruction sequence corresponds to the pseudo-code - -- - -- if (src) { - -- dst = src.lo32 ? BSF(src.lo32) : (BSF(src.hi32) + 32); - -- } else { - -- dst = 64; - -- } - let !instrs = vcode `appOL` toOL - ([ MOV II32 (OpReg rhi) (OpReg tmp_r) - , OR II32 (OpReg rlo) (OpReg tmp_r) - , MOV II32 (OpImm (ImmInt 64)) (OpReg dst_r) - , JXX EQQ lbl2 - , JXX ALWAYS lbl1 - - , NEWBLOCK lbl1 - , BSF II32 (OpReg rhi) dst_r - , ADD II32 (OpImm (ImmInt 32)) (OpReg dst_r) - , BSF II32 (OpReg rlo) tmp_r - , CMOV NE II32 (OpReg tmp_r) dst_r - , JXX ALWAYS lbl2 - - , NEWBLOCK lbl2 - ]) - return (instrs, Just lbl2) - - | otherwise = do - code_src <- getAnyReg src - let dst_r = getRegisterReg platform (CmmLocal dst) - - if isBmi2Enabled dflags - then do - src_r <- getNewRegNat (intFormat width) - let instrs = appOL (code_src src_r) $ case width of - W8 -> toOL - [ OR II32 (OpImm (ImmInteger 0xFFFFFF00)) (OpReg src_r) - , TZCNT II32 (OpReg src_r) dst_r - ] - W16 -> toOL - [ TZCNT II16 (OpReg src_r) dst_r - , MOVZxL II16 (OpReg dst_r) (OpReg dst_r) - ] - _ -> unitOL $ TZCNT (intFormat width) (OpReg src_r) dst_r - return (instrs, Nothing) - else do - -- The following insn sequence makes sure 'ctz 0' has a defined value. - -- starting with Haswell, one could use the TZCNT insn instead. - let format = if width == W8 then II16 else intFormat width - src_r <- getNewRegNat format - tmp_r <- getNewRegNat format - let !instrs = code_src src_r `appOL` toOL - ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++ - [ BSF format (OpReg src_r) tmp_r - , MOV II32 (OpImm (ImmInt bw)) (OpReg dst_r) - , CMOV NE format (OpReg tmp_r) dst_r - ]) -- NB: We don't need to zero-extend the result for the - -- W8/W16 cases because the 'MOV' insn already - -- took care of implicitly clearing the upper bits - return (instrs, Nothing) - where - bw = widthInBits width - platform = targetPlatform dflags - -genCCall dflags bits mop dst args bid = do - instr <- genCCall' dflags bits mop dst args bid - return (instr, Nothing) - --- genCCall' handles cases not introducing new code blocks. -genCCall' - :: DynFlags - -> Bool -- 32 bit platform? - -> ForeignTarget -- function to call - -> [CmmFormal] -- where to put the result - -> [CmmActual] -- arguments (of mixed type) - -> BlockId -- The block we are in - -> NatM InstrBlock - --- Unroll memcpy calls if the number of bytes to copy isn't too --- large. Otherwise, call C's memcpy. -genCCall' dflags _ (PrimTarget (MO_Memcpy align)) _ - [dst, src, CmmLit (CmmInt n _)] _ - | fromInteger insns <= maxInlineMemcpyInsns dflags = do - code_dst <- getAnyReg dst - dst_r <- getNewRegNat format - code_src <- getAnyReg src - src_r <- getNewRegNat format - tmp_r <- getNewRegNat format - return $ code_dst dst_r `appOL` code_src src_r `appOL` - go dst_r src_r tmp_r (fromInteger n) - where - -- The number of instructions we will generate (approx). We need 2 - -- instructions per move. - insns = 2 * ((n + sizeBytes - 1) `div` sizeBytes) - - maxAlignment = wordAlignment dflags -- only machine word wide MOVs are supported - effectiveAlignment = min (alignmentOf align) maxAlignment - format = intFormat . widthFromBytes $ alignmentBytes effectiveAlignment - - -- The size of each move, in bytes. - sizeBytes :: Integer - sizeBytes = fromIntegral (formatInBytes format) - - go :: Reg -> Reg -> Reg -> Integer -> OrdList Instr - go dst src tmp i - | i >= sizeBytes = - unitOL (MOV format (OpAddr src_addr) (OpReg tmp)) `appOL` - unitOL (MOV format (OpReg tmp) (OpAddr dst_addr)) `appOL` - go dst src tmp (i - sizeBytes) - -- Deal with remaining bytes. - | i >= 4 = -- Will never happen on 32-bit - unitOL (MOV II32 (OpAddr src_addr) (OpReg tmp)) `appOL` - unitOL (MOV II32 (OpReg tmp) (OpAddr dst_addr)) `appOL` - go dst src tmp (i - 4) - | i >= 2 = - unitOL (MOVZxL II16 (OpAddr src_addr) (OpReg tmp)) `appOL` - unitOL (MOV II16 (OpReg tmp) (OpAddr dst_addr)) `appOL` - go dst src tmp (i - 2) - | i >= 1 = - unitOL (MOVZxL II8 (OpAddr src_addr) (OpReg tmp)) `appOL` - unitOL (MOV II8 (OpReg tmp) (OpAddr dst_addr)) `appOL` - go dst src tmp (i - 1) - | otherwise = nilOL - where - src_addr = AddrBaseIndex (EABaseReg src) EAIndexNone - (ImmInteger (n - i)) - dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone - (ImmInteger (n - i)) - -genCCall' dflags _ (PrimTarget (MO_Memset align)) _ - [dst, - CmmLit (CmmInt c _), - CmmLit (CmmInt n _)] - _ - | fromInteger insns <= maxInlineMemsetInsns dflags = do - code_dst <- getAnyReg dst - dst_r <- getNewRegNat format - if format == II64 && n >= 8 then do - code_imm8byte <- getAnyReg (CmmLit (CmmInt c8 W64)) - imm8byte_r <- getNewRegNat II64 - return $ code_dst dst_r `appOL` - code_imm8byte imm8byte_r `appOL` - go8 dst_r imm8byte_r (fromInteger n) - else - return $ code_dst dst_r `appOL` - go4 dst_r (fromInteger n) - where - maxAlignment = wordAlignment dflags -- only machine word wide MOVs are supported - effectiveAlignment = min (alignmentOf align) maxAlignment - format = intFormat . widthFromBytes $ alignmentBytes effectiveAlignment - c2 = c `shiftL` 8 .|. c - c4 = c2 `shiftL` 16 .|. c2 - c8 = c4 `shiftL` 32 .|. c4 - - -- The number of instructions we will generate (approx). We need 1 - -- instructions per move. - insns = (n + sizeBytes - 1) `div` sizeBytes - - -- The size of each move, in bytes. - sizeBytes :: Integer - sizeBytes = fromIntegral (formatInBytes format) - - -- Depending on size returns the widest MOV instruction and its - -- width. - gen4 :: AddrMode -> Integer -> (InstrBlock, Integer) - gen4 addr size - | size >= 4 = - (unitOL (MOV II32 (OpImm (ImmInteger c4)) (OpAddr addr)), 4) - | size >= 2 = - (unitOL (MOV II16 (OpImm (ImmInteger c2)) (OpAddr addr)), 2) - | size >= 1 = - (unitOL (MOV II8 (OpImm (ImmInteger c)) (OpAddr addr)), 1) - | otherwise = (nilOL, 0) - - -- Generates a 64-bit wide MOV instruction from REG to MEM. - gen8 :: AddrMode -> Reg -> InstrBlock - gen8 addr reg8byte = - unitOL (MOV format (OpReg reg8byte) (OpAddr addr)) - - -- Unrolls memset when the widest MOV is <= 4 bytes. - go4 :: Reg -> Integer -> InstrBlock - go4 dst left = - if left <= 0 then nilOL - else curMov `appOL` go4 dst (left - curWidth) - where - possibleWidth = minimum [left, sizeBytes] - dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone (ImmInteger (n - left)) - (curMov, curWidth) = gen4 dst_addr possibleWidth - - -- Unrolls memset when the widest MOV is 8 bytes (thus another Reg - -- argument). Falls back to go4 when all 8 byte moves are - -- exhausted. - go8 :: Reg -> Reg -> Integer -> InstrBlock - go8 dst reg8byte left = - if possibleWidth >= 8 then - let curMov = gen8 dst_addr reg8byte - in curMov `appOL` go8 dst reg8byte (left - 8) - else go4 dst left - where - possibleWidth = minimum [left, sizeBytes] - dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone (ImmInteger (n - left)) - -genCCall' _ _ (PrimTarget MO_ReadBarrier) _ _ _ = return nilOL -genCCall' _ _ (PrimTarget MO_WriteBarrier) _ _ _ = return nilOL - -- barriers compile to no code on x86/x86-64; - -- we keep it this long in order to prevent earlier optimisations. - -genCCall' _ _ (PrimTarget MO_Touch) _ _ _ = return nilOL - -genCCall' _ is32bit (PrimTarget (MO_Prefetch_Data n )) _ [src] _ = - case n of - 0 -> genPrefetch src $ PREFETCH NTA format - 1 -> genPrefetch src $ PREFETCH Lvl2 format - 2 -> genPrefetch src $ PREFETCH Lvl1 format - 3 -> genPrefetch src $ PREFETCH Lvl0 format - l -> panic $ "unexpected prefetch level in genCCall MO_Prefetch_Data: " ++ (show l) - -- the c / llvm prefetch convention is 0, 1, 2, and 3 - -- the x86 corresponding names are : NTA, 2 , 1, and 0 - where - format = archWordFormat is32bit - -- need to know what register width for pointers! - genPrefetch inRegSrc prefetchCTor = - do - code_src <- getAnyReg inRegSrc - src_r <- getNewRegNat format - return $ code_src src_r `appOL` - (unitOL (prefetchCTor (OpAddr - ((AddrBaseIndex (EABaseReg src_r ) EAIndexNone (ImmInt 0)))) )) - -- prefetch always takes an address - -genCCall' dflags is32Bit (PrimTarget (MO_BSwap width)) [dst] [src] _ = do - let platform = targetPlatform dflags - let dst_r = getRegisterReg platform (CmmLocal dst) - case width of - W64 | is32Bit -> do - ChildCode64 vcode rlo <- iselExpr64 src - let dst_rhi = getHiVRegFromLo dst_r - rhi = getHiVRegFromLo rlo - return $ vcode `appOL` - toOL [ MOV II32 (OpReg rlo) (OpReg dst_rhi), - MOV II32 (OpReg rhi) (OpReg dst_r), - BSWAP II32 dst_rhi, - BSWAP II32 dst_r ] - W16 -> do code_src <- getAnyReg src - return $ code_src dst_r `appOL` - unitOL (BSWAP II32 dst_r) `appOL` - unitOL (SHR II32 (OpImm $ ImmInt 16) (OpReg dst_r)) - _ -> do code_src <- getAnyReg src - return $ code_src dst_r `appOL` unitOL (BSWAP format dst_r) - where - format = intFormat width - -genCCall' dflags is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst] - args@[src] bid = do - sse4_2 <- sse4_2Enabled - let platform = targetPlatform dflags - if sse4_2 - then do code_src <- getAnyReg src - src_r <- getNewRegNat format - let dst_r = getRegisterReg platform (CmmLocal dst) - return $ code_src src_r `appOL` - (if width == W8 then - -- The POPCNT instruction doesn't take a r/m8 - unitOL (MOVZxL II8 (OpReg src_r) (OpReg src_r)) `appOL` - unitOL (POPCNT II16 (OpReg src_r) dst_r) - else - unitOL (POPCNT format (OpReg src_r) dst_r)) `appOL` - (if width == W8 || width == W16 then - -- We used a 16-bit destination register above, - -- so zero-extend - unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r)) - else nilOL) - else do - targetExpr <- cmmMakeDynamicReference dflags - CallReference lbl - let target = ForeignTarget targetExpr (ForeignConvention CCallConv - [NoHint] [NoHint] - CmmMayReturn) - genCCall' dflags is32Bit target dest_regs args bid - where - format = intFormat width - lbl = mkCmmCodeLabel primUnitId (fsLit (popCntLabel width)) - -genCCall' dflags is32Bit (PrimTarget (MO_Pdep width)) dest_regs@[dst] - args@[src, mask] bid = do - let platform = targetPlatform dflags - if isBmi2Enabled dflags - then do code_src <- getAnyReg src - code_mask <- getAnyReg mask - src_r <- getNewRegNat format - mask_r <- getNewRegNat format - let dst_r = getRegisterReg platform (CmmLocal dst) - return $ code_src src_r `appOL` code_mask mask_r `appOL` - (if width == W8 then - -- The PDEP instruction doesn't take a r/m8 - unitOL (MOVZxL II8 (OpReg src_r ) (OpReg src_r )) `appOL` - unitOL (MOVZxL II8 (OpReg mask_r) (OpReg mask_r)) `appOL` - unitOL (PDEP II16 (OpReg mask_r) (OpReg src_r ) dst_r) - else - unitOL (PDEP format (OpReg mask_r) (OpReg src_r) dst_r)) `appOL` - (if width == W8 || width == W16 then - -- We used a 16-bit destination register above, - -- so zero-extend - unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r)) - else nilOL) - else do - targetExpr <- cmmMakeDynamicReference dflags - CallReference lbl - let target = ForeignTarget targetExpr (ForeignConvention CCallConv - [NoHint] [NoHint] - CmmMayReturn) - genCCall' dflags is32Bit target dest_regs args bid - where - format = intFormat width - lbl = mkCmmCodeLabel primUnitId (fsLit (pdepLabel width)) - -genCCall' dflags is32Bit (PrimTarget (MO_Pext width)) dest_regs@[dst] - args@[src, mask] bid = do - let platform = targetPlatform dflags - if isBmi2Enabled dflags - then do code_src <- getAnyReg src - code_mask <- getAnyReg mask - src_r <- getNewRegNat format - mask_r <- getNewRegNat format - let dst_r = getRegisterReg platform (CmmLocal dst) - return $ code_src src_r `appOL` code_mask mask_r `appOL` - (if width == W8 then - -- The PEXT instruction doesn't take a r/m8 - unitOL (MOVZxL II8 (OpReg src_r ) (OpReg src_r )) `appOL` - unitOL (MOVZxL II8 (OpReg mask_r) (OpReg mask_r)) `appOL` - unitOL (PEXT II16 (OpReg mask_r) (OpReg src_r) dst_r) - else - unitOL (PEXT format (OpReg mask_r) (OpReg src_r) dst_r)) `appOL` - (if width == W8 || width == W16 then - -- We used a 16-bit destination register above, - -- so zero-extend - unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r)) - else nilOL) - else do - targetExpr <- cmmMakeDynamicReference dflags - CallReference lbl - let target = ForeignTarget targetExpr (ForeignConvention CCallConv - [NoHint] [NoHint] - CmmMayReturn) - genCCall' dflags is32Bit target dest_regs args bid - where - format = intFormat width - lbl = mkCmmCodeLabel primUnitId (fsLit (pextLabel width)) - -genCCall' dflags is32Bit (PrimTarget (MO_Clz width)) dest_regs@[dst] args@[src] bid - | is32Bit && width == W64 = do - -- Fallback to `hs_clz64` on i386 - targetExpr <- cmmMakeDynamicReference dflags CallReference lbl - let target = ForeignTarget targetExpr (ForeignConvention CCallConv - [NoHint] [NoHint] - CmmMayReturn) - genCCall' dflags is32Bit target dest_regs args bid - - | otherwise = do - code_src <- getAnyReg src - let dst_r = getRegisterReg platform (CmmLocal dst) - if isBmi2Enabled dflags - then do - src_r <- getNewRegNat (intFormat width) - return $ appOL (code_src src_r) $ case width of - W8 -> toOL - [ MOVZxL II8 (OpReg src_r) (OpReg src_r) -- zero-extend to 32 bit - , LZCNT II32 (OpReg src_r) dst_r -- lzcnt with extra 24 zeros - , SUB II32 (OpImm (ImmInt 24)) (OpReg dst_r) -- compensate for extra zeros - ] - W16 -> toOL - [ LZCNT II16 (OpReg src_r) dst_r - , MOVZxL II16 (OpReg dst_r) (OpReg dst_r) -- zero-extend from 16 bit - ] - _ -> unitOL (LZCNT (intFormat width) (OpReg src_r) dst_r) - else do - let format = if width == W8 then II16 else intFormat width - src_r <- getNewRegNat format - tmp_r <- getNewRegNat format - return $ code_src src_r `appOL` toOL - ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++ - [ BSR format (OpReg src_r) tmp_r - , MOV II32 (OpImm (ImmInt (2*bw-1))) (OpReg dst_r) - , CMOV NE format (OpReg tmp_r) dst_r - , XOR format (OpImm (ImmInt (bw-1))) (OpReg dst_r) - ]) -- NB: We don't need to zero-extend the result for the - -- W8/W16 cases because the 'MOV' insn already - -- took care of implicitly clearing the upper bits - where - bw = widthInBits width - platform = targetPlatform dflags - lbl = mkCmmCodeLabel primUnitId (fsLit (clzLabel width)) - -genCCall' dflags is32Bit (PrimTarget (MO_UF_Conv width)) dest_regs args bid = do - targetExpr <- cmmMakeDynamicReference dflags - CallReference lbl - let target = ForeignTarget targetExpr (ForeignConvention CCallConv - [NoHint] [NoHint] - CmmMayReturn) - genCCall' dflags is32Bit target dest_regs args bid - where - lbl = mkCmmCodeLabel primUnitId (fsLit (word2FloatLabel width)) - -genCCall' dflags _ (PrimTarget (MO_AtomicRead width)) [dst] [addr] _ = do - load_code <- intLoadCode (MOV (intFormat width)) addr - let platform = targetPlatform dflags - - return (load_code (getRegisterReg platform (CmmLocal dst))) - -genCCall' _ _ (PrimTarget (MO_AtomicWrite width)) [] [addr, val] _ = do - code <- assignMem_IntCode (intFormat width) addr val - return $ code `snocOL` MFENCE - -genCCall' dflags is32Bit (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] _ = do - -- On x86 we don't have enough registers to use cmpxchg with a - -- complicated addressing mode, so on that architecture we - -- pre-compute the address first. - Amode amode addr_code <- getSimpleAmode dflags is32Bit addr - newval <- getNewRegNat format - newval_code <- getAnyReg new - oldval <- getNewRegNat format - oldval_code <- getAnyReg old - let platform = targetPlatform dflags - dst_r = getRegisterReg platform (CmmLocal dst) - code = toOL - [ MOV format (OpReg oldval) (OpReg eax) - , LOCK (CMPXCHG format (OpReg newval) (OpAddr amode)) - , MOV format (OpReg eax) (OpReg dst_r) - ] - return $ addr_code `appOL` newval_code newval `appOL` oldval_code oldval - `appOL` code - where - format = intFormat width - -genCCall' _ is32Bit target dest_regs args bid = do - dflags <- getDynFlags - let platform = targetPlatform dflags - case (target, dest_regs) of - -- void return type prim op - (PrimTarget op, []) -> - outOfLineCmmOp bid op Nothing args - -- we only cope with a single result for foreign calls - (PrimTarget op, [r]) -> case op of - MO_F32_Fabs -> case args of - [x] -> sse2FabsCode W32 x - _ -> panic "genCCall: Wrong number of arguments for fabs" - MO_F64_Fabs -> case args of - [x] -> sse2FabsCode W64 x - _ -> panic "genCCall: Wrong number of arguments for fabs" - - MO_F32_Sqrt -> actuallyInlineSSE2Op (\fmt r -> SQRT fmt (OpReg r)) FF32 args - MO_F64_Sqrt -> actuallyInlineSSE2Op (\fmt r -> SQRT fmt (OpReg r)) FF64 args - _other_op -> outOfLineCmmOp bid op (Just r) args - - where - actuallyInlineSSE2Op = actuallyInlineFloatOp' - - actuallyInlineFloatOp' instr format [x] - = do res <- trivialUFCode format (instr format) x - any <- anyReg res - return (any (getRegisterReg platform (CmmLocal r))) - - actuallyInlineFloatOp' _ _ args - = panic $ "genCCall.actuallyInlineFloatOp': bad number of arguments! (" - ++ show (length args) ++ ")" - - sse2FabsCode :: Width -> CmmExpr -> NatM InstrBlock - sse2FabsCode w x = do - let fmt = floatFormat w - x_code <- getAnyReg x - let - const | FF32 <- fmt = CmmInt 0x7fffffff W32 - | otherwise = CmmInt 0x7fffffffffffffff W64 - Amode amode amode_code <- memConstant (mkAlignment $ widthInBytes w) const - tmp <- getNewRegNat fmt - let - code dst = x_code dst `appOL` amode_code `appOL` toOL [ - MOV fmt (OpAddr amode) (OpReg tmp), - AND fmt (OpReg tmp) (OpReg dst) - ] - - return $ code (getRegisterReg platform (CmmLocal r)) - - (PrimTarget (MO_S_QuotRem width), _) -> divOp1 platform True width dest_regs args - (PrimTarget (MO_U_QuotRem width), _) -> divOp1 platform False width dest_regs args - (PrimTarget (MO_U_QuotRem2 width), _) -> divOp2 platform False width dest_regs args - (PrimTarget (MO_Add2 width), [res_h, res_l]) -> - case args of - [arg_x, arg_y] -> - do hCode <- getAnyReg (CmmLit (CmmInt 0 width)) - let format = intFormat width - lCode <- anyReg =<< trivialCode width (ADD_CC format) - (Just (ADD_CC format)) arg_x arg_y - let reg_l = getRegisterReg platform (CmmLocal res_l) - reg_h = getRegisterReg platform (CmmLocal res_h) - code = hCode reg_h `appOL` - lCode reg_l `snocOL` - ADC format (OpImm (ImmInteger 0)) (OpReg reg_h) - return code - _ -> panic "genCCall: Wrong number of arguments/results for add2" - (PrimTarget (MO_AddWordC width), [res_r, res_c]) -> - addSubIntC platform ADD_CC (const Nothing) CARRY width res_r res_c args - (PrimTarget (MO_SubWordC width), [res_r, res_c]) -> - addSubIntC platform SUB_CC (const Nothing) CARRY width res_r res_c args - (PrimTarget (MO_AddIntC width), [res_r, res_c]) -> - addSubIntC platform ADD_CC (Just . ADD_CC) OFLO width res_r res_c args - (PrimTarget (MO_SubIntC width), [res_r, res_c]) -> - addSubIntC platform SUB_CC (const Nothing) OFLO width res_r res_c args - (PrimTarget (MO_U_Mul2 width), [res_h, res_l]) -> - case args of - [arg_x, arg_y] -> - do (y_reg, y_code) <- getRegOrMem arg_y - x_code <- getAnyReg arg_x - let format = intFormat width - reg_h = getRegisterReg platform (CmmLocal res_h) - reg_l = getRegisterReg platform (CmmLocal res_l) - code = y_code `appOL` - x_code rax `appOL` - toOL [MUL2 format y_reg, - MOV format (OpReg rdx) (OpReg reg_h), - MOV format (OpReg rax) (OpReg reg_l)] - return code - _ -> panic "genCCall: Wrong number of arguments/results for mul2" - (PrimTarget (MO_S_Mul2 width), [res_c, res_h, res_l]) -> - case args of - [arg_x, arg_y] -> - do (y_reg, y_code) <- getRegOrMem arg_y - x_code <- getAnyReg arg_x - reg_tmp <- getNewRegNat II8 - let format = intFormat width - reg_h = getRegisterReg platform (CmmLocal res_h) - reg_l = getRegisterReg platform (CmmLocal res_l) - reg_c = getRegisterReg platform (CmmLocal res_c) - code = y_code `appOL` - x_code rax `appOL` - toOL [ IMUL2 format y_reg - , MOV format (OpReg rdx) (OpReg reg_h) - , MOV format (OpReg rax) (OpReg reg_l) - , SETCC CARRY (OpReg reg_tmp) - , MOVZxL II8 (OpReg reg_tmp) (OpReg reg_c) - ] - return code - _ -> panic "genCCall: Wrong number of arguments/results for imul2" - - _ -> if is32Bit - then genCCall32' dflags target dest_regs args - else genCCall64' dflags target dest_regs args - - where divOp1 platform signed width results [arg_x, arg_y] - = divOp platform signed width results Nothing arg_x arg_y - divOp1 _ _ _ _ _ - = panic "genCCall: Wrong number of arguments for divOp1" - divOp2 platform signed width results [arg_x_high, arg_x_low, arg_y] - = divOp platform signed width results (Just arg_x_high) arg_x_low arg_y - divOp2 _ _ _ _ _ - = panic "genCCall: Wrong number of arguments for divOp2" - - -- See Note [DIV/IDIV for bytes] - divOp platform signed W8 [res_q, res_r] m_arg_x_high arg_x_low arg_y = - let widen | signed = MO_SS_Conv W8 W16 - | otherwise = MO_UU_Conv W8 W16 - arg_x_low_16 = CmmMachOp widen [arg_x_low] - arg_y_16 = CmmMachOp widen [arg_y] - m_arg_x_high_16 = (\p -> CmmMachOp widen [p]) <$> m_arg_x_high - in divOp - platform signed W16 [res_q, res_r] - m_arg_x_high_16 arg_x_low_16 arg_y_16 - - divOp platform signed width [res_q, res_r] - m_arg_x_high arg_x_low arg_y - = do let format = intFormat width - reg_q = getRegisterReg platform (CmmLocal res_q) - reg_r = getRegisterReg platform (CmmLocal res_r) - widen | signed = CLTD format - | otherwise = XOR format (OpReg rdx) (OpReg rdx) - instr | signed = IDIV - | otherwise = DIV - (y_reg, y_code) <- getRegOrMem arg_y - x_low_code <- getAnyReg arg_x_low - x_high_code <- case m_arg_x_high of - Just arg_x_high -> - getAnyReg arg_x_high - Nothing -> - return $ const $ unitOL widen - return $ y_code `appOL` - x_low_code rax `appOL` - x_high_code rdx `appOL` - toOL [instr format y_reg, - MOV format (OpReg rax) (OpReg reg_q), - MOV format (OpReg rdx) (OpReg reg_r)] - divOp _ _ _ _ _ _ _ - = panic "genCCall: Wrong number of results for divOp" - - addSubIntC platform instr mrevinstr cond width - res_r res_c [arg_x, arg_y] - = do let format = intFormat width - rCode <- anyReg =<< trivialCode width (instr format) - (mrevinstr format) arg_x arg_y - reg_tmp <- getNewRegNat II8 - let reg_c = getRegisterReg platform (CmmLocal res_c) - reg_r = getRegisterReg platform (CmmLocal res_r) - code = rCode reg_r `snocOL` - SETCC cond (OpReg reg_tmp) `snocOL` - MOVZxL II8 (OpReg reg_tmp) (OpReg reg_c) - - return code - addSubIntC _ _ _ _ _ _ _ _ - = panic "genCCall: Wrong number of arguments/results for addSubIntC" - --- Note [DIV/IDIV for bytes] --- --- IDIV reminder: --- Size Dividend Divisor Quotient Remainder --- byte %ax r/m8 %al %ah --- word %dx:%ax r/m16 %ax %dx --- dword %edx:%eax r/m32 %eax %edx --- qword %rdx:%rax r/m64 %rax %rdx --- --- We do a special case for the byte division because the current --- codegen doesn't deal well with accessing %ah register (also, --- accessing %ah in 64-bit mode is complicated because it cannot be an --- operand of many instructions). So we just widen operands to 16 bits --- and get the results from %al, %dl. This is not optimal, but a few --- register moves are probably not a huge deal when doing division. - -genCCall32' :: DynFlags - -> ForeignTarget -- function to call - -> [CmmFormal] -- where to put the result - -> [CmmActual] -- arguments (of mixed type) - -> NatM InstrBlock -genCCall32' dflags target dest_regs args = do - let - prom_args = map (maybePromoteCArg dflags W32) args - - -- Align stack to 16n for calls, assuming a starting stack - -- alignment of 16n - word_size on procedure entry. Which we - -- maintiain. See Note [rts/StgCRun.c : Stack Alignment on X86] - sizes = map (arg_size_bytes . cmmExprType dflags) (reverse args) - raw_arg_size = sum sizes + wORD_SIZE dflags - arg_pad_size = (roundTo 16 $ raw_arg_size) - raw_arg_size - tot_arg_size = raw_arg_size + arg_pad_size - wORD_SIZE dflags - delta0 <- getDeltaNat - setDeltaNat (delta0 - arg_pad_size) - - push_codes <- mapM push_arg (reverse prom_args) - delta <- getDeltaNat - MASSERT(delta == delta0 - tot_arg_size) - - -- deal with static vs dynamic call targets - (callinsns,cconv) <- - case target of - ForeignTarget (CmmLit (CmmLabel lbl)) conv - -> -- ToDo: stdcall arg sizes - return (unitOL (CALL (Left fn_imm) []), conv) - where fn_imm = ImmCLbl lbl - ForeignTarget expr conv - -> do { (dyn_r, dyn_c) <- getSomeReg expr - ; ASSERT( isWord32 (cmmExprType dflags expr) ) - return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) } - PrimTarget _ - -> panic $ "genCCall: Can't handle PrimTarget call type here, error " - ++ "probably because too many return values." - - let push_code - | arg_pad_size /= 0 - = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp), - DELTA (delta0 - arg_pad_size)] - `appOL` concatOL push_codes - | otherwise - = concatOL push_codes - - -- Deallocate parameters after call for ccall; - -- but not for stdcall (callee does it) - -- - -- We have to pop any stack padding we added - -- even if we are doing stdcall, though (#5052) - pop_size - | ForeignConvention StdCallConv _ _ _ <- cconv = arg_pad_size - | otherwise = tot_arg_size - - call = callinsns `appOL` - toOL ( - (if pop_size==0 then [] else - [ADD II32 (OpImm (ImmInt pop_size)) (OpReg esp)]) - ++ - [DELTA delta0] - ) - setDeltaNat delta0 - - dflags <- getDynFlags - let platform = targetPlatform dflags - - let - -- assign the results, if necessary - assign_code [] = nilOL - assign_code [dest] - | isFloatType ty = - -- we assume SSE2 - let tmp_amode = AddrBaseIndex (EABaseReg esp) - EAIndexNone - (ImmInt 0) - fmt = floatFormat w - in toOL [ SUB II32 (OpImm (ImmInt b)) (OpReg esp), - DELTA (delta0 - b), - X87Store fmt tmp_amode, - -- X87Store only supported for the CDECL ABI - -- NB: This code will need to be - -- revisted once GHC does more work around - -- SIGFPE f - MOV fmt (OpAddr tmp_amode) (OpReg r_dest), - ADD II32 (OpImm (ImmInt b)) (OpReg esp), - DELTA delta0] - | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest), - MOV II32 (OpReg edx) (OpReg r_dest_hi)] - | otherwise = unitOL (MOV (intFormat w) - (OpReg eax) - (OpReg r_dest)) - where - ty = localRegType dest - w = typeWidth ty - b = widthInBytes w - r_dest_hi = getHiVRegFromLo r_dest - r_dest = getRegisterReg platform (CmmLocal dest) - assign_code many = pprPanic "genCCall.assign_code - too many return values:" (ppr many) - - return (push_code `appOL` - call `appOL` - assign_code dest_regs) - - where - -- If the size is smaller than the word, we widen things (see maybePromoteCArg) - arg_size_bytes :: CmmType -> Int - arg_size_bytes ty = max (widthInBytes (typeWidth ty)) (widthInBytes (wordWidth dflags)) - - roundTo a x | x `mod` a == 0 = x - | otherwise = x + a - (x `mod` a) - - push_arg :: CmmActual {-current argument-} - -> NatM InstrBlock -- code - - push_arg arg -- we don't need the hints on x86 - | isWord64 arg_ty = do - ChildCode64 code r_lo <- iselExpr64 arg - delta <- getDeltaNat - setDeltaNat (delta - 8) - let r_hi = getHiVRegFromLo r_lo - return ( code `appOL` - toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4), - PUSH II32 (OpReg r_lo), DELTA (delta - 8), - DELTA (delta-8)] - ) - - | isFloatType arg_ty = do - (reg, code) <- getSomeReg arg - delta <- getDeltaNat - setDeltaNat (delta-size) - return (code `appOL` - toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp), - DELTA (delta-size), - let addr = AddrBaseIndex (EABaseReg esp) - EAIndexNone - (ImmInt 0) - format = floatFormat (typeWidth arg_ty) - in - - -- assume SSE2 - MOV format (OpReg reg) (OpAddr addr) - - ] - ) - - | otherwise = do - -- Arguments can be smaller than 32-bit, but we still use @PUSH - -- II32@ - the usual calling conventions expect integers to be - -- 4-byte aligned. - ASSERT((typeWidth arg_ty) <= W32) return () - (operand, code) <- getOperand arg - delta <- getDeltaNat - setDeltaNat (delta-size) - return (code `snocOL` - PUSH II32 operand `snocOL` - DELTA (delta-size)) - - where - arg_ty = cmmExprType dflags arg - size = arg_size_bytes arg_ty -- Byte size - -genCCall64' :: DynFlags - -> ForeignTarget -- function to call - -> [CmmFormal] -- where to put the result - -> [CmmActual] -- arguments (of mixed type) - -> NatM InstrBlock -genCCall64' dflags target dest_regs args = do - -- load up the register arguments - let prom_args = map (maybePromoteCArg dflags W32) args - - (stack_args, int_regs_used, fp_regs_used, load_args_code, assign_args_code) - <- - if platformOS platform == OSMinGW32 - then load_args_win prom_args [] [] (allArgRegs platform) nilOL - else do - (stack_args, aregs, fregs, load_args_code, assign_args_code) - <- load_args prom_args (allIntArgRegs platform) - (allFPArgRegs platform) - nilOL nilOL - let used_regs rs as = reverse (drop (length rs) (reverse as)) - fregs_used = used_regs fregs (allFPArgRegs platform) - aregs_used = used_regs aregs (allIntArgRegs platform) - return (stack_args, aregs_used, fregs_used, load_args_code - , assign_args_code) - - let - arg_regs_used = int_regs_used ++ fp_regs_used - arg_regs = [eax] ++ arg_regs_used - -- for annotating the call instruction with - sse_regs = length fp_regs_used - arg_stack_slots = if platformOS platform == OSMinGW32 - then length stack_args + length (allArgRegs platform) - else length stack_args - tot_arg_size = arg_size * arg_stack_slots - - - -- Align stack to 16n for calls, assuming a starting stack - -- alignment of 16n - word_size on procedure entry. Which we - -- maintain. See Note [rts/StgCRun.c : Stack Alignment on X86] - (real_size, adjust_rsp) <- - if (tot_arg_size + wORD_SIZE dflags) `rem` 16 == 0 - then return (tot_arg_size, nilOL) - else do -- we need to adjust... - delta <- getDeltaNat - setDeltaNat (delta - wORD_SIZE dflags) - return (tot_arg_size + wORD_SIZE dflags, toOL [ - SUB II64 (OpImm (ImmInt (wORD_SIZE dflags))) (OpReg rsp), - DELTA (delta - wORD_SIZE dflags) ]) - - -- push the stack args, right to left - push_code <- push_args (reverse stack_args) nilOL - -- On Win64, we also have to leave stack space for the arguments - -- that we are passing in registers - lss_code <- if platformOS platform == OSMinGW32 - then leaveStackSpace (length (allArgRegs platform)) - else return nilOL - delta <- getDeltaNat - - -- deal with static vs dynamic call targets - (callinsns,_cconv) <- - case target of - ForeignTarget (CmmLit (CmmLabel lbl)) conv - -> -- ToDo: stdcall arg sizes - return (unitOL (CALL (Left fn_imm) arg_regs), conv) - where fn_imm = ImmCLbl lbl - ForeignTarget expr conv - -> do (dyn_r, dyn_c) <- getSomeReg expr - return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv) - PrimTarget _ - -> panic $ "genCCall: Can't handle PrimTarget call type here, error " - ++ "probably because too many return values." - - let - -- The x86_64 ABI requires us to set %al to the number of SSE2 - -- registers that contain arguments, if the called routine - -- is a varargs function. We don't know whether it's a - -- varargs function or not, so we have to assume it is. - -- - -- It's not safe to omit this assignment, even if the number - -- of SSE2 regs in use is zero. If %al is larger than 8 - -- on entry to a varargs function, seg faults ensue. - assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax)) - - let call = callinsns `appOL` - toOL ( - -- Deallocate parameters after call for ccall; - -- stdcall has callee do it, but is not supported on - -- x86_64 target (see #3336) - (if real_size==0 then [] else - [ADD (intFormat (wordWidth dflags)) (OpImm (ImmInt real_size)) (OpReg esp)]) - ++ - [DELTA (delta + real_size)] - ) - setDeltaNat (delta + real_size) - - let - -- assign the results, if necessary - assign_code [] = nilOL - assign_code [dest] = - case typeWidth rep of - W32 | isFloatType rep -> unitOL (MOV (floatFormat W32) - (OpReg xmm0) - (OpReg r_dest)) - W64 | isFloatType rep -> unitOL (MOV (floatFormat W64) - (OpReg xmm0) - (OpReg r_dest)) - _ -> unitOL (MOV (cmmTypeFormat rep) (OpReg rax) (OpReg r_dest)) - where - rep = localRegType dest - r_dest = getRegisterReg platform (CmmLocal dest) - assign_code _many = panic "genCCall.assign_code many" - - return (adjust_rsp `appOL` - push_code `appOL` - load_args_code `appOL` - assign_args_code `appOL` - lss_code `appOL` - assign_eax sse_regs `appOL` - call `appOL` - assign_code dest_regs) - - where platform = targetPlatform dflags - arg_size = 8 -- always, at the mo - - - load_args :: [CmmExpr] - -> [Reg] -- int regs avail for args - -> [Reg] -- FP regs avail for args - -> InstrBlock -- code computing args - -> InstrBlock -- code assigning args to ABI regs - -> NatM ([CmmExpr],[Reg],[Reg],InstrBlock,InstrBlock) - -- no more regs to use - load_args args [] [] code acode = - return (args, [], [], code, acode) - - -- no more args to push - load_args [] aregs fregs code acode = - return ([], aregs, fregs, code, acode) - - load_args (arg : rest) aregs fregs code acode - | isFloatType arg_rep = case fregs of - [] -> push_this_arg - (r:rs) -> do - (code',acode') <- reg_this_arg r - load_args rest aregs rs code' acode' - | otherwise = case aregs of - [] -> push_this_arg - (r:rs) -> do - (code',acode') <- reg_this_arg r - load_args rest rs fregs code' acode' - where - - -- put arg into the list of stack pushed args - push_this_arg = do - (args',ars,frs,code',acode') - <- load_args rest aregs fregs code acode - return (arg:args', ars, frs, code', acode') - - -- pass the arg into the given register - reg_this_arg r - -- "operand" args can be directly assigned into r - | isOperand False arg = do - arg_code <- getAnyReg arg - return (code, (acode `appOL` arg_code r)) - -- The last non-operand arg can be directly assigned after its - -- computation without going into a temporary register - | all (isOperand False) rest = do - arg_code <- getAnyReg arg - return (code `appOL` arg_code r,acode) - - -- other args need to be computed beforehand to avoid clobbering - -- previously assigned registers used to pass parameters (see - -- #11792, #12614). They are assigned into temporary registers - -- and get assigned to proper call ABI registers after they all - -- have been computed. - | otherwise = do - arg_code <- getAnyReg arg - tmp <- getNewRegNat arg_fmt - let - code' = code `appOL` arg_code tmp - acode' = acode `snocOL` reg2reg arg_fmt tmp r - return (code',acode') - - arg_rep = cmmExprType dflags arg - arg_fmt = cmmTypeFormat arg_rep - - load_args_win :: [CmmExpr] - -> [Reg] -- used int regs - -> [Reg] -- used FP regs - -> [(Reg, Reg)] -- (int, FP) regs avail for args - -> InstrBlock - -> NatM ([CmmExpr],[Reg],[Reg],InstrBlock,InstrBlock) - load_args_win args usedInt usedFP [] code - = return (args, usedInt, usedFP, code, nilOL) - -- no more regs to use - load_args_win [] usedInt usedFP _ code - = return ([], usedInt, usedFP, code, nilOL) - -- no more args to push - load_args_win (arg : rest) usedInt usedFP - ((ireg, freg) : regs) code - | isFloatType arg_rep = do - arg_code <- getAnyReg arg - load_args_win rest (ireg : usedInt) (freg : usedFP) regs - (code `appOL` - arg_code freg `snocOL` - -- If we are calling a varargs function - -- then we need to define ireg as well - -- as freg - MOV II64 (OpReg freg) (OpReg ireg)) - | otherwise = do - arg_code <- getAnyReg arg - load_args_win rest (ireg : usedInt) usedFP regs - (code `appOL` arg_code ireg) - where - arg_rep = cmmExprType dflags arg - - push_args [] code = return code - push_args (arg:rest) code - | isFloatType arg_rep = do - (arg_reg, arg_code) <- getSomeReg arg - delta <- getDeltaNat - setDeltaNat (delta-arg_size) - let code' = code `appOL` arg_code `appOL` toOL [ - SUB (intFormat (wordWidth dflags)) (OpImm (ImmInt arg_size)) (OpReg rsp), - DELTA (delta-arg_size), - MOV (floatFormat width) (OpReg arg_reg) (OpAddr (spRel dflags 0))] - push_args rest code' - - | otherwise = do - -- Arguments can be smaller than 64-bit, but we still use @PUSH - -- II64@ - the usual calling conventions expect integers to be - -- 8-byte aligned. - ASSERT(width <= W64) return () - (arg_op, arg_code) <- getOperand arg - delta <- getDeltaNat - setDeltaNat (delta-arg_size) - let code' = code `appOL` arg_code `appOL` toOL [ - PUSH II64 arg_op, - DELTA (delta-arg_size)] - push_args rest code' - where - arg_rep = cmmExprType dflags arg - width = typeWidth arg_rep - - leaveStackSpace n = do - delta <- getDeltaNat - setDeltaNat (delta - n * arg_size) - return $ toOL [ - SUB II64 (OpImm (ImmInt (n * wORD_SIZE dflags))) (OpReg rsp), - DELTA (delta - n * arg_size)] - -maybePromoteCArg :: DynFlags -> Width -> CmmExpr -> CmmExpr -maybePromoteCArg dflags wto arg - | wfrom < wto = CmmMachOp (MO_UU_Conv wfrom wto) [arg] - | otherwise = arg - where - wfrom = cmmExprWidth dflags arg - -outOfLineCmmOp :: BlockId -> CallishMachOp -> Maybe CmmFormal -> [CmmActual] - -> NatM InstrBlock -outOfLineCmmOp bid mop res args - = do - dflags <- getDynFlags - targetExpr <- cmmMakeDynamicReference dflags CallReference lbl - let target = ForeignTarget targetExpr - (ForeignConvention CCallConv [] [] CmmMayReturn) - - -- We know foreign calls results in no new basic blocks, so we can ignore - -- the returned block id. - (instrs, _) <- stmtToInstrs bid (CmmUnsafeForeignCall target (catMaybes [res]) args) - return instrs - where - -- Assume we can call these functions directly, and that they're not in a dynamic library. - -- TODO: Why is this ok? Under linux this code will be in libm.so - -- Is it because they're really implemented as a primitive instruction by the assembler?? -- BL 2009/12/31 - lbl = mkForeignLabel fn Nothing ForeignLabelInThisPackage IsFunction - - fn = case mop of - MO_F32_Sqrt -> fsLit "sqrtf" - MO_F32_Fabs -> fsLit "fabsf" - MO_F32_Sin -> fsLit "sinf" - MO_F32_Cos -> fsLit "cosf" - MO_F32_Tan -> fsLit "tanf" - MO_F32_Exp -> fsLit "expf" - MO_F32_ExpM1 -> fsLit "expm1f" - MO_F32_Log -> fsLit "logf" - MO_F32_Log1P -> fsLit "log1pf" - - MO_F32_Asin -> fsLit "asinf" - MO_F32_Acos -> fsLit "acosf" - MO_F32_Atan -> fsLit "atanf" - - MO_F32_Sinh -> fsLit "sinhf" - MO_F32_Cosh -> fsLit "coshf" - MO_F32_Tanh -> fsLit "tanhf" - MO_F32_Pwr -> fsLit "powf" - - MO_F32_Asinh -> fsLit "asinhf" - MO_F32_Acosh -> fsLit "acoshf" - MO_F32_Atanh -> fsLit "atanhf" - - MO_F64_Sqrt -> fsLit "sqrt" - MO_F64_Fabs -> fsLit "fabs" - MO_F64_Sin -> fsLit "sin" - MO_F64_Cos -> fsLit "cos" - MO_F64_Tan -> fsLit "tan" - MO_F64_Exp -> fsLit "exp" - MO_F64_ExpM1 -> fsLit "expm1" - MO_F64_Log -> fsLit "log" - MO_F64_Log1P -> fsLit "log1p" - - MO_F64_Asin -> fsLit "asin" - MO_F64_Acos -> fsLit "acos" - MO_F64_Atan -> fsLit "atan" - - MO_F64_Sinh -> fsLit "sinh" - MO_F64_Cosh -> fsLit "cosh" - MO_F64_Tanh -> fsLit "tanh" - MO_F64_Pwr -> fsLit "pow" - - MO_F64_Asinh -> fsLit "asinh" - MO_F64_Acosh -> fsLit "acosh" - MO_F64_Atanh -> fsLit "atanh" - - MO_Memcpy _ -> fsLit "memcpy" - MO_Memset _ -> fsLit "memset" - MO_Memmove _ -> fsLit "memmove" - MO_Memcmp _ -> fsLit "memcmp" - - MO_PopCnt _ -> fsLit "popcnt" - MO_BSwap _ -> fsLit "bswap" - {- Here the C implementation is used as there is no x86 - instruction to reverse a word's bit order. - -} - MO_BRev w -> fsLit $ bRevLabel w - MO_Clz w -> fsLit $ clzLabel w - MO_Ctz _ -> unsupported - - MO_Pdep w -> fsLit $ pdepLabel w - MO_Pext w -> fsLit $ pextLabel w - - MO_AtomicRMW _ _ -> fsLit "atomicrmw" - MO_AtomicRead _ -> fsLit "atomicread" - MO_AtomicWrite _ -> fsLit "atomicwrite" - MO_Cmpxchg _ -> fsLit "cmpxchg" - - MO_UF_Conv _ -> unsupported - - MO_S_Mul2 {} -> unsupported - MO_S_QuotRem {} -> unsupported - MO_U_QuotRem {} -> unsupported - MO_U_QuotRem2 {} -> unsupported - MO_Add2 {} -> unsupported - MO_AddIntC {} -> unsupported - MO_SubIntC {} -> unsupported - MO_AddWordC {} -> unsupported - MO_SubWordC {} -> unsupported - MO_U_Mul2 {} -> unsupported - MO_ReadBarrier -> unsupported - MO_WriteBarrier -> unsupported - MO_Touch -> unsupported - (MO_Prefetch_Data _ ) -> unsupported - unsupported = panic ("outOfLineCmmOp: " ++ show mop - ++ " not supported here") - --- ----------------------------------------------------------------------------- --- Generating a table-branch - -genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock - -genSwitch dflags expr targets - | positionIndependent dflags - = do - (reg,e_code) <- getNonClobberedReg (cmmOffset dflags expr offset) - -- getNonClobberedReg because it needs to survive across t_code - lbl <- getNewLabelNat - dflags <- getDynFlags - let is32bit = target32Bit (targetPlatform dflags) - os = platformOS (targetPlatform dflags) - -- Might want to use .rodata.<function we're in> instead, but as - -- long as it's something unique it'll work out since the - -- references to the jump table are in the appropriate section. - rosection = case os of - -- on Mac OS X/x86_64, put the jump table in the text section to - -- work around a limitation of the linker. - -- ld64 is unable to handle the relocations for - -- .quad L1 - L0 - -- if L0 is not preceded by a non-anonymous label in its section. - OSDarwin | not is32bit -> Section Text lbl - _ -> Section ReadOnlyData lbl - dynRef <- cmmMakeDynamicReference dflags DataReference lbl - (tableReg,t_code) <- getSomeReg $ dynRef - let op = OpAddr (AddrBaseIndex (EABaseReg tableReg) - (EAIndex reg (wORD_SIZE dflags)) (ImmInt 0)) - - offsetReg <- getNewRegNat (intFormat (wordWidth dflags)) - return $ if is32bit || os == OSDarwin - then e_code `appOL` t_code `appOL` toOL [ - ADD (intFormat (wordWidth dflags)) op (OpReg tableReg), - JMP_TBL (OpReg tableReg) ids rosection lbl - ] - else -- HACK: On x86_64 binutils<2.17 is only able to generate - -- PC32 relocations, hence we only get 32-bit offsets in - -- the jump table. As these offsets are always negative - -- we need to properly sign extend them to 64-bit. This - -- hack should be removed in conjunction with the hack in - -- PprMach.hs/pprDataItem once binutils 2.17 is standard. - e_code `appOL` t_code `appOL` toOL [ - MOVSxL II32 op (OpReg offsetReg), - ADD (intFormat (wordWidth dflags)) - (OpReg offsetReg) - (OpReg tableReg), - JMP_TBL (OpReg tableReg) ids rosection lbl - ] - | otherwise - = do - (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset) - lbl <- getNewLabelNat - let op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg (wORD_SIZE dflags)) (ImmCLbl lbl)) - code = e_code `appOL` toOL [ - JMP_TBL op ids (Section ReadOnlyData lbl) lbl - ] - return code - where - (offset, blockIds) = switchTargetsToTable targets - ids = map (fmap DestBlockId) blockIds - -generateJumpTableForInstr :: DynFlags -> Instr -> Maybe (NatCmmDecl (Alignment, RawCmmStatics) Instr) -generateJumpTableForInstr dflags (JMP_TBL _ ids section lbl) - = let getBlockId (DestBlockId id) = id - getBlockId _ = panic "Non-Label target in Jump Table" - blockIds = map (fmap getBlockId) ids - in Just (createJumpTable dflags blockIds section lbl) -generateJumpTableForInstr _ _ = Nothing - -createJumpTable :: DynFlags -> [Maybe BlockId] -> Section -> CLabel - -> GenCmmDecl (Alignment, RawCmmStatics) h g -createJumpTable dflags ids section lbl - = let jumpTable - | positionIndependent dflags = - let ww = wordWidth dflags - jumpTableEntryRel Nothing - = CmmStaticLit (CmmInt 0 ww) - jumpTableEntryRel (Just blockid) - = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0 ww) - where blockLabel = blockLbl blockid - in map jumpTableEntryRel ids - | otherwise = map (jumpTableEntry dflags) ids - in CmmData section (mkAlignment 1, RawCmmStatics lbl jumpTable) - -extractUnwindPoints :: [Instr] -> [UnwindPoint] -extractUnwindPoints instrs = - [ UnwindPoint lbl unwinds | UNWIND lbl unwinds <- instrs] - --- ----------------------------------------------------------------------------- --- 'condIntReg' and 'condFltReg': condition codes into registers - --- Turn those condition codes into integers now (when they appear on --- the right hand side of an assignment). --- --- (If applicable) Do not fill the delay slots here; you will confuse the --- register allocator. - -condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register - -condIntReg cond x y = do - CondCode _ cond cond_code <- condIntCode cond x y - tmp <- getNewRegNat II8 - let - code dst = cond_code `appOL` toOL [ - SETCC cond (OpReg tmp), - MOVZxL II8 (OpReg tmp) (OpReg dst) - ] - return (Any II32 code) - - ------------------------------------------------------------ ---- Note [SSE Parity Checks] --- ------------------------------------------------------------ - --- We have to worry about unordered operands (eg. comparisons --- against NaN). If the operands are unordered, the comparison --- sets the parity flag, carry flag and zero flag. --- All comparisons are supposed to return false for unordered --- operands except for !=, which returns true. --- --- Optimisation: we don't have to test the parity flag if we --- know the test has already excluded the unordered case: eg > --- and >= test for a zero carry flag, which can only occur for --- ordered operands. --- --- By reversing comparisons we can avoid testing the parity --- for < and <= as well. If any of the arguments is an NaN we --- return false either way. If both arguments are valid then --- x <= y <-> y >= x holds. So it's safe to swap these. --- --- We invert the condition inside getRegister'and getCondCode --- which should cover all invertable cases. --- All other functions translating FP comparisons to assembly --- use these to two generate the comparison code. --- --- As an example consider a simple check: --- --- func :: Float -> Float -> Int --- func x y = if x < y then 1 else 0 --- --- Which in Cmm gives the floating point comparison. --- --- if (%MO_F_Lt_W32(F1, F2)) goto c2gg; else goto c2gf; --- --- We used to compile this to an assembly code block like this: --- _c2gh: --- ucomiss %xmm2,%xmm1 --- jp _c2gf --- jb _c2gg --- jmp _c2gf --- --- Where we have to introduce an explicit --- check for unordered results (using jmp parity): --- --- We can avoid this by exchanging the arguments and inverting the direction --- of the comparison. This results in the sequence of: --- --- ucomiss %xmm1,%xmm2 --- ja _c2g2 --- jmp _c2g1 --- --- Removing the jump reduces the pressure on the branch predidiction system --- and plays better with the uOP cache. - -condFltReg :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register -condFltReg is32Bit cond x y = condFltReg_sse2 - where - - - condFltReg_sse2 = do - CondCode _ cond cond_code <- condFltCode cond x y - tmp1 <- getNewRegNat (archWordFormat is32Bit) - tmp2 <- getNewRegNat (archWordFormat is32Bit) - let -- See Note [SSE Parity Checks] - code dst = - cond_code `appOL` - (case cond of - NE -> or_unordered dst - GU -> plain_test dst - GEU -> plain_test dst - -- Use ASSERT so we don't break releases if these creep in. - LTT -> ASSERT2(False, ppr "Should have been turned into >") - and_ordered dst - LE -> ASSERT2(False, ppr "Should have been turned into >=") - and_ordered dst - _ -> and_ordered dst) - - plain_test dst = toOL [ - SETCC cond (OpReg tmp1), - MOVZxL II8 (OpReg tmp1) (OpReg dst) - ] - or_unordered dst = toOL [ - SETCC cond (OpReg tmp1), - SETCC PARITY (OpReg tmp2), - OR II8 (OpReg tmp1) (OpReg tmp2), - MOVZxL II8 (OpReg tmp2) (OpReg dst) - ] - and_ordered dst = toOL [ - SETCC cond (OpReg tmp1), - SETCC NOTPARITY (OpReg tmp2), - AND II8 (OpReg tmp1) (OpReg tmp2), - MOVZxL II8 (OpReg tmp2) (OpReg dst) - ] - return (Any II32 code) - - --- ----------------------------------------------------------------------------- --- 'trivial*Code': deal with trivial instructions - --- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode', --- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions. --- Only look for constants on the right hand side, because that's --- where the generic optimizer will have put them. - --- Similarly, for unary instructions, we don't have to worry about --- matching an StInt as the argument, because genericOpt will already --- have handled the constant-folding. - - -{- -The Rules of the Game are: - -* You cannot assume anything about the destination register dst; - it may be anything, including a fixed reg. - -* You may compute an operand into a fixed reg, but you may not - subsequently change the contents of that fixed reg. If you - want to do so, first copy the value either to a temporary - or into dst. You are free to modify dst even if it happens - to be a fixed reg -- that's not your problem. - -* You cannot assume that a fixed reg will stay live over an - arbitrary computation. The same applies to the dst reg. - -* Temporary regs obtained from getNewRegNat are distinct from - each other and from all other regs, and stay live over - arbitrary computations. - --------------------- - -SDM's version of The Rules: - -* If getRegister returns Any, that means it can generate correct - code which places the result in any register, period. Even if that - register happens to be read during the computation. - - Corollary #1: this means that if you are generating code for an - operation with two arbitrary operands, you cannot assign the result - of the first operand into the destination register before computing - the second operand. The second operand might require the old value - of the destination register. - - Corollary #2: A function might be able to generate more efficient - code if it knows the destination register is a new temporary (and - therefore not read by any of the sub-computations). - -* If getRegister returns Any, then the code it generates may modify only: - (a) fresh temporaries - (b) the destination register - (c) known registers (eg. %ecx is used by shifts) - In particular, it may *not* modify global registers, unless the global - register happens to be the destination register. --} - -trivialCode :: Width -> (Operand -> Operand -> Instr) - -> Maybe (Operand -> Operand -> Instr) - -> CmmExpr -> CmmExpr -> NatM Register -trivialCode width instr m a b - = do is32Bit <- is32BitPlatform - trivialCode' is32Bit width instr m a b - -trivialCode' :: Bool -> Width -> (Operand -> Operand -> Instr) - -> Maybe (Operand -> Operand -> Instr) - -> CmmExpr -> CmmExpr -> NatM Register -trivialCode' is32Bit width _ (Just revinstr) (CmmLit lit_a) b - | is32BitLit is32Bit lit_a = do - b_code <- getAnyReg b - let - code dst - = b_code dst `snocOL` - revinstr (OpImm (litToImm lit_a)) (OpReg dst) - return (Any (intFormat width) code) - -trivialCode' _ width instr _ a b - = genTrivialCode (intFormat width) instr a b - --- This is re-used for floating pt instructions too. -genTrivialCode :: Format -> (Operand -> Operand -> Instr) - -> CmmExpr -> CmmExpr -> NatM Register -genTrivialCode rep instr a b = do - (b_op, b_code) <- getNonClobberedOperand b - a_code <- getAnyReg a - tmp <- getNewRegNat rep - let - -- We want the value of b to stay alive across the computation of a. - -- But, we want to calculate a straight into the destination register, - -- because the instruction only has two operands (dst := dst `op` src). - -- The troublesome case is when the result of b is in the same register - -- as the destination reg. In this case, we have to save b in a - -- new temporary across the computation of a. - code dst - | dst `regClashesWithOp` b_op = - b_code `appOL` - unitOL (MOV rep b_op (OpReg tmp)) `appOL` - a_code dst `snocOL` - instr (OpReg tmp) (OpReg dst) - | otherwise = - b_code `appOL` - a_code dst `snocOL` - instr b_op (OpReg dst) - return (Any rep code) - -regClashesWithOp :: Reg -> Operand -> Bool -reg `regClashesWithOp` OpReg reg2 = reg == reg2 -reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode) -_ `regClashesWithOp` _ = False - ------------ - -trivialUCode :: Format -> (Operand -> Instr) - -> CmmExpr -> NatM Register -trivialUCode rep instr x = do - x_code <- getAnyReg x - let - code dst = - x_code dst `snocOL` - instr (OpReg dst) - return (Any rep code) - ------------ - - -trivialFCode_sse2 :: Width -> (Format -> Operand -> Operand -> Instr) - -> CmmExpr -> CmmExpr -> NatM Register -trivialFCode_sse2 pk instr x y - = genTrivialCode format (instr format) x y - where format = floatFormat pk - - -trivialUFCode :: Format -> (Reg -> Reg -> Instr) -> CmmExpr -> NatM Register -trivialUFCode format instr x = do - (x_reg, x_code) <- getSomeReg x - let - code dst = - x_code `snocOL` - instr x_reg dst - return (Any format code) - - --------------------------------------------------------------------------------- -coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register -coerceInt2FP from to x = coerce_sse2 - where - - coerce_sse2 = do - (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand - let - opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD - n -> panic $ "coerceInt2FP.sse: unhandled width (" - ++ show n ++ ")" - code dst = x_code `snocOL` opc (intFormat from) x_op dst - return (Any (floatFormat to) code) - -- works even if the destination rep is <II32 - --------------------------------------------------------------------------------- -coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register -coerceFP2Int from to x = coerceFP2Int_sse2 - where - coerceFP2Int_sse2 = do - (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand - let - opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ; - n -> panic $ "coerceFP2Init.sse: unhandled width (" - ++ show n ++ ")" - code dst = x_code `snocOL` opc (intFormat to) x_op dst - return (Any (intFormat to) code) - -- works even if the destination rep is <II32 - - --------------------------------------------------------------------------------- -coerceFP2FP :: Width -> CmmExpr -> NatM Register -coerceFP2FP to x = do - (x_reg, x_code) <- getSomeReg x - let - opc = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD; - n -> panic $ "coerceFP2FP: unhandled width (" - ++ show n ++ ")" - code dst = x_code `snocOL` opc x_reg dst - return (Any ( floatFormat to) code) - --------------------------------------------------------------------------------- - -sse2NegCode :: Width -> CmmExpr -> NatM Register -sse2NegCode w x = do - let fmt = floatFormat w - x_code <- getAnyReg x - -- This is how gcc does it, so it can't be that bad: - let - const = case fmt of - FF32 -> CmmInt 0x80000000 W32 - FF64 -> CmmInt 0x8000000000000000 W64 - x@II8 -> wrongFmt x - x@II16 -> wrongFmt x - x@II32 -> wrongFmt x - x@II64 -> wrongFmt x - - where - wrongFmt x = panic $ "sse2NegCode: " ++ show x - Amode amode amode_code <- memConstant (mkAlignment $ widthInBytes w) const - tmp <- getNewRegNat fmt - let - code dst = x_code dst `appOL` amode_code `appOL` toOL [ - MOV fmt (OpAddr amode) (OpReg tmp), - XOR fmt (OpReg tmp) (OpReg dst) - ] - -- - return (Any fmt code) - -isVecExpr :: CmmExpr -> Bool -isVecExpr (CmmMachOp (MO_V_Insert {}) _) = True -isVecExpr (CmmMachOp (MO_V_Extract {}) _) = True -isVecExpr (CmmMachOp (MO_V_Add {}) _) = True -isVecExpr (CmmMachOp (MO_V_Sub {}) _) = True -isVecExpr (CmmMachOp (MO_V_Mul {}) _) = True -isVecExpr (CmmMachOp (MO_VS_Quot {}) _) = True -isVecExpr (CmmMachOp (MO_VS_Rem {}) _) = True -isVecExpr (CmmMachOp (MO_VS_Neg {}) _) = True -isVecExpr (CmmMachOp (MO_VF_Insert {}) _) = True -isVecExpr (CmmMachOp (MO_VF_Extract {}) _) = True -isVecExpr (CmmMachOp (MO_VF_Add {}) _) = True -isVecExpr (CmmMachOp (MO_VF_Sub {}) _) = True -isVecExpr (CmmMachOp (MO_VF_Mul {}) _) = True -isVecExpr (CmmMachOp (MO_VF_Quot {}) _) = True -isVecExpr (CmmMachOp (MO_VF_Neg {}) _) = True -isVecExpr (CmmMachOp _ [e]) = isVecExpr e -isVecExpr _ = False - -needLlvm :: NatM a -needLlvm = - sorry $ unlines ["The native code generator does not support vector" - ,"instructions. Please use -fllvm."] - --- | This works on the invariant that all jumps in the given blocks are required. --- Starting from there we try to make a few more jumps redundant by reordering --- them. --- We depend on the information in the CFG to do so so without a given CFG --- we do nothing. -invertCondBranches :: Maybe CFG -- ^ CFG if present - -> LabelMap a -- ^ Blocks with info tables - -> [NatBasicBlock Instr] -- ^ List of basic blocks - -> [NatBasicBlock Instr] -invertCondBranches Nothing _ bs = bs -invertCondBranches (Just cfg) keep bs = - invert bs - where - invert :: [NatBasicBlock Instr] -> [NatBasicBlock Instr] - invert ((BasicBlock lbl1 ins@(_:_:_xs)):b2@(BasicBlock lbl2 _):bs) - | --pprTrace "Block" (ppr lbl1) True, - (jmp1,jmp2) <- last2 ins - , JXX cond1 target1 <- jmp1 - , target1 == lbl2 - --, pprTrace "CutChance" (ppr b1) True - , JXX ALWAYS target2 <- jmp2 - -- We have enough information to check if we can perform the inversion - -- TODO: We could also check for the last asm instruction which sets - -- status flags instead. Which I suspect is worse in terms of compiler - -- performance, but might be applicable to more cases - , Just edgeInfo1 <- getEdgeInfo lbl1 target1 cfg - , Just edgeInfo2 <- getEdgeInfo lbl1 target2 cfg - -- Both jumps come from the same cmm statement - , transitionSource edgeInfo1 == transitionSource edgeInfo2 - , CmmSource {trans_cmmNode = cmmCondBranch} <- transitionSource edgeInfo1 - - --Int comparisons are invertable - , CmmCondBranch (CmmMachOp op _args) _ _ _ <- cmmCondBranch - , Just _ <- maybeIntComparison op - , Just invCond <- maybeInvertCond cond1 - - --Swap the last two jumps, invert the conditional jumps condition. - = let jumps = - case () of - -- We are free the eliminate the jmp. So we do so. - _ | not (mapMember target1 keep) - -> [JXX invCond target2] - -- If the conditional target is unlikely we put the other - -- target at the front. - | edgeWeight edgeInfo2 > edgeWeight edgeInfo1 - -> [JXX invCond target2, JXX ALWAYS target1] - -- Keep things as-is otherwise - | otherwise - -> [jmp1, jmp2] - in --pprTrace "Cutable" (ppr [jmp1,jmp2] <+> text "=>" <+> ppr jumps) $ - (BasicBlock lbl1 - (dropTail 2 ins ++ jumps)) - : invert (b2:bs) - invert (b:bs) = b : invert bs - invert [] = [] diff --git a/compiler/nativeGen/X86/Cond.hs b/compiler/nativeGen/X86/Cond.hs deleted file mode 100644 index 49e3ce9254..0000000000 --- a/compiler/nativeGen/X86/Cond.hs +++ /dev/null @@ -1,109 +0,0 @@ -module X86.Cond ( - Cond(..), - condUnsigned, - condToSigned, - condToUnsigned, - maybeFlipCond, - maybeInvertCond -) - -where - -import GhcPrelude - -data Cond - = ALWAYS -- What's really used? ToDo - | EQQ - | GE - | GEU - | GTT - | GU - | LE - | LEU - | LTT - | LU - | NE - | NEG - | POS - | CARRY - | OFLO - | PARITY - | NOTPARITY - deriving Eq - -condUnsigned :: Cond -> Bool -condUnsigned GU = True -condUnsigned LU = True -condUnsigned GEU = True -condUnsigned LEU = True -condUnsigned _ = False - - -condToSigned :: Cond -> Cond -condToSigned GU = GTT -condToSigned LU = LTT -condToSigned GEU = GE -condToSigned LEU = LE -condToSigned x = x - - -condToUnsigned :: Cond -> Cond -condToUnsigned GTT = GU -condToUnsigned LTT = LU -condToUnsigned GE = GEU -condToUnsigned LE = LEU -condToUnsigned x = x - --- | @maybeFlipCond c@ returns @Just c'@ if it is possible to flip the --- arguments to the conditional @c@, and the new condition should be @c'@. -maybeFlipCond :: Cond -> Maybe Cond -maybeFlipCond cond = case cond of - EQQ -> Just EQQ - NE -> Just NE - LU -> Just GU - GU -> Just LU - LEU -> Just GEU - GEU -> Just LEU - LTT -> Just GTT - GTT -> Just LTT - LE -> Just GE - GE -> Just LE - _other -> Nothing - --- | If we apply @maybeInvertCond@ to the condition of a jump we turn --- jumps taken into jumps not taken and vice versa. --- --- Careful! If the used comparison and the conditional jump --- don't match the above behaviour will NOT hold. --- When used for FP comparisons this does not consider unordered --- numbers. --- Also inverting twice might return a synonym for the original condition. -maybeInvertCond :: Cond -> Maybe Cond -maybeInvertCond cond = case cond of - ALWAYS -> Nothing - EQQ -> Just NE - NE -> Just EQQ - - NEG -> Just POS - POS -> Just NEG - - GEU -> Just LU - LU -> Just GEU - - GE -> Just LTT - LTT -> Just GE - - GTT -> Just LE - LE -> Just GTT - - GU -> Just LEU - LEU -> Just GU - - --GEU "==" NOTCARRY, they are synonyms - --at the assembly level - CARRY -> Just GEU - - OFLO -> Nothing - - PARITY -> Just NOTPARITY - NOTPARITY -> Just PARITY diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs deleted file mode 100644 index 099437265c..0000000000 --- a/compiler/nativeGen/X86/Instr.hs +++ /dev/null @@ -1,1054 +0,0 @@ -{-# LANGUAGE CPP, TypeFamilies #-} - ------------------------------------------------------------------------------ --- --- Machine-dependent assembly language --- --- (c) The University of Glasgow 1993-2004 --- ------------------------------------------------------------------------------ - -module X86.Instr (Instr(..), Operand(..), PrefetchVariant(..), JumpDest(..), - getJumpDestBlockId, canShortcut, shortcutStatics, - shortcutJump, allocMoreStack, - maxSpillSlots, archWordFormat ) -where - -#include "HsVersions.h" - -import GhcPrelude - -import X86.Cond -import X86.Regs -import Instruction -import Format -import RegClass -import Reg -import TargetReg - -import GHC.Cmm.BlockId -import GHC.Cmm.Dataflow.Collections -import GHC.Cmm.Dataflow.Label -import GHC.Platform.Regs -import GHC.Cmm -import FastString -import Outputable -import GHC.Platform - -import BasicTypes (Alignment) -import GHC.Cmm.CLabel -import GHC.Driver.Session -import UniqSet -import Unique -import UniqSupply -import GHC.Cmm.DebugBlock (UnwindTable) - -import Control.Monad -import Data.Maybe (fromMaybe) - --- Format of an x86/x86_64 memory address, in bytes. --- -archWordFormat :: Bool -> Format -archWordFormat is32Bit - | is32Bit = II32 - | otherwise = II64 - --- | Instruction instance for x86 instruction set. -instance Instruction Instr where - regUsageOfInstr = x86_regUsageOfInstr - patchRegsOfInstr = x86_patchRegsOfInstr - isJumpishInstr = x86_isJumpishInstr - jumpDestsOfInstr = x86_jumpDestsOfInstr - patchJumpInstr = x86_patchJumpInstr - mkSpillInstr = x86_mkSpillInstr - mkLoadInstr = x86_mkLoadInstr - takeDeltaInstr = x86_takeDeltaInstr - isMetaInstr = x86_isMetaInstr - mkRegRegMoveInstr = x86_mkRegRegMoveInstr - takeRegRegMoveInstr = x86_takeRegRegMoveInstr - mkJumpInstr = x86_mkJumpInstr - mkStackAllocInstr = x86_mkStackAllocInstr - mkStackDeallocInstr = x86_mkStackDeallocInstr - - --- ----------------------------------------------------------------------------- --- Intel x86 instructions - -{- -Intel, in their infinite wisdom, selected a stack model for floating -point registers on x86. That might have made sense back in 1979 -- -nowadays we can see it for the nonsense it really is. A stack model -fits poorly with the existing nativeGen infrastructure, which assumes -flat integer and FP register sets. Prior to this commit, nativeGen -could not generate correct x86 FP code -- to do so would have meant -somehow working the register-stack paradigm into the register -allocator and spiller, which sounds very difficult. - -We have decided to cheat, and go for a simple fix which requires no -infrastructure modifications, at the expense of generating ropey but -correct FP code. All notions of the x86 FP stack and its insns have -been removed. Instead, we pretend (to the instruction selector and -register allocator) that x86 has six floating point registers, %fake0 -.. %fake5, which can be used in the usual flat manner. We further -claim that x86 has floating point instructions very similar to SPARC -and Alpha, that is, a simple 3-operand register-register arrangement. -Code generation and register allocation proceed on this basis. - -When we come to print out the final assembly, our convenient fiction -is converted to dismal reality. Each fake instruction is -independently converted to a series of real x86 instructions. -%fake0 .. %fake5 are mapped to %st(0) .. %st(5). To do reg-reg -arithmetic operations, the two operands are pushed onto the top of the -FP stack, the operation done, and the result copied back into the -relevant register. There are only six %fake registers because 2 are -needed for the translation, and x86 has 8 in total. - -The translation is inefficient but is simple and it works. A cleverer -translation would handle a sequence of insns, simulating the FP stack -contents, would not impose a fixed mapping from %fake to %st regs, and -hopefully could avoid most of the redundant reg-reg moves of the -current translation. - -We might as well make use of whatever unique FP facilities Intel have -chosen to bless us with (let's not be churlish, after all). -Hence GLDZ and GLD1. Bwahahahahahahaha! --} - -{- -Note [x86 Floating point precision] - -Intel's internal floating point registers are by default 80 bit -extended precision. This means that all operations done on values in -registers are done at 80 bits, and unless the intermediate values are -truncated to the appropriate size (32 or 64 bits) by storing in -memory, calculations in registers will give different results from -calculations which pass intermediate values in memory (eg. via -function calls). - -One solution is to set the FPU into 64 bit precision mode. Some OSs -do this (eg. FreeBSD) and some don't (eg. Linux). The problem here is -that this will only affect 64-bit precision arithmetic; 32-bit -calculations will still be done at 64-bit precision in registers. So -it doesn't solve the whole problem. - -There's also the issue of what the C library is expecting in terms of -precision. It seems to be the case that glibc on Linux expects the -FPU to be set to 80 bit precision, so setting it to 64 bit could have -unexpected effects. Changing the default could have undesirable -effects on other 3rd-party library code too, so the right thing would -be to save/restore the FPU control word across Haskell code if we were -to do this. - -gcc's -ffloat-store gives consistent results by always storing the -results of floating-point calculations in memory, which works for both -32 and 64-bit precision. However, it only affects the values of -user-declared floating point variables in C, not intermediate results. -GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision -flag). - -Another problem is how to spill floating point registers in the -register allocator. Should we spill the whole 80 bits, or just 64? -On an OS which is set to 64 bit precision, spilling 64 is fine. On -Linux, spilling 64 bits will round the results of some operations. -This is what gcc does. Spilling at 80 bits requires taking up a full -128 bit slot (so we get alignment). We spill at 80-bits and ignore -the alignment problems. - -In the future [edit: now available in GHC 7.0.1, with the -msse2 -flag], we'll use the SSE registers for floating point. This requires -a CPU that supports SSE2 (ordinary SSE only supports 32 bit precision -float ops), which means P4 or Xeon and above. Using SSE will solve -all these problems, because the SSE registers use fixed 32 bit or 64 -bit precision. - ---SDM 1/2003 --} - -data Instr - -- comment pseudo-op - = COMMENT FastString - - -- location pseudo-op (file, line, col, name) - | LOCATION Int Int Int String - - -- some static data spat out during code - -- generation. Will be extracted before - -- pretty-printing. - | LDATA Section (Alignment, RawCmmStatics) - - -- start a new basic block. Useful during - -- codegen, removed later. Preceding - -- instruction should be a jump, as per the - -- invariants for a BasicBlock (see Cmm). - | NEWBLOCK BlockId - - -- unwinding information - -- See Note [Unwinding information in the NCG]. - | UNWIND CLabel UnwindTable - - -- specify current stack offset for benefit of subsequent passes. - -- This carries a BlockId so it can be used in unwinding information. - | DELTA Int - - -- Moves. - | MOV Format Operand Operand - | CMOV Cond Format Operand Reg - | MOVZxL Format Operand Operand -- format is the size of operand 1 - | MOVSxL Format Operand Operand -- format is the size of operand 1 - -- x86_64 note: plain mov into a 32-bit register always zero-extends - -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which - -- don't affect the high bits of the register. - - -- Load effective address (also a very useful three-operand add instruction :-) - | LEA Format Operand Operand - - -- Int Arithmetic. - | ADD Format Operand Operand - | ADC Format Operand Operand - | SUB Format Operand Operand - | SBB Format Operand Operand - - | MUL Format Operand Operand - | MUL2 Format Operand -- %edx:%eax = operand * %rax - | IMUL Format Operand Operand -- signed int mul - | IMUL2 Format Operand -- %edx:%eax = operand * %eax - - | DIV Format Operand -- eax := eax:edx/op, edx := eax:edx%op - | IDIV Format Operand -- ditto, but signed - - -- Int Arithmetic, where the effects on the condition register - -- are important. Used in specialized sequences such as MO_Add2. - -- Do not rewrite these instructions to "equivalent" ones that - -- have different effect on the condition register! (See #9013.) - | ADD_CC Format Operand Operand - | SUB_CC Format Operand Operand - - -- Simple bit-twiddling. - | AND Format Operand Operand - | OR Format Operand Operand - | XOR Format Operand Operand - | NOT Format Operand - | NEGI Format Operand -- NEG instruction (name clash with Cond) - | BSWAP Format Reg - - -- Shifts (amount may be immediate or %cl only) - | SHL Format Operand{-amount-} Operand - | SAR Format Operand{-amount-} Operand - | SHR Format Operand{-amount-} Operand - - | BT Format Imm Operand - | NOP - - - -- We need to support the FSTP (x87 store and pop) instruction - -- so that we can correctly read off the return value of an - -- x86 CDECL C function call when its floating point. - -- so we dont include a register argument, and just use st(0) - -- this instruction is used ONLY for return values of C ffi calls - -- in x86_32 abi - | X87Store Format AddrMode -- st(0), dst - - - -- SSE2 floating point: we use a restricted set of the available SSE2 - -- instructions for floating-point. - -- use MOV for moving (either movss or movsd (movlpd better?)) - | CVTSS2SD Reg Reg -- F32 to F64 - | CVTSD2SS Reg Reg -- F64 to F32 - | CVTTSS2SIQ Format Operand Reg -- F32 to I32/I64 (with truncation) - | CVTTSD2SIQ Format Operand Reg -- F64 to I32/I64 (with truncation) - | CVTSI2SS Format Operand Reg -- I32/I64 to F32 - | CVTSI2SD Format Operand Reg -- I32/I64 to F64 - - -- use ADD, SUB, and SQRT for arithmetic. In both cases, operands - -- are Operand Reg. - - -- SSE2 floating-point division: - | FDIV Format Operand Operand -- divisor, dividend(dst) - - -- use CMP for comparisons. ucomiss and ucomisd instructions - -- compare single/double prec floating point respectively. - - | SQRT Format Operand Reg -- src, dst - - - -- Comparison - | TEST Format Operand Operand - | CMP Format Operand Operand - | SETCC Cond Operand - - -- Stack Operations. - | PUSH Format Operand - | POP Format Operand - -- both unused (SDM): - -- | PUSHA - -- | POPA - - -- Jumping around. - | JMP Operand [Reg] -- including live Regs at the call - | JXX Cond BlockId -- includes unconditional branches - | JXX_GBL Cond Imm -- non-local version of JXX - -- Table jump - | JMP_TBL Operand -- Address to jump to - [Maybe JumpDest] -- Targets of the jump table - Section -- Data section jump table should be put in - CLabel -- Label of jump table - -- | X86 call instruction - | CALL (Either Imm Reg) -- ^ Jump target - [Reg] -- ^ Arguments (required for register allocation) - - -- Other things. - | CLTD Format -- sign extend %eax into %edx:%eax - - | FETCHGOT Reg -- pseudo-insn for ELF position-independent code - -- pretty-prints as - -- call 1f - -- 1: popl %reg - -- addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg - | FETCHPC Reg -- pseudo-insn for Darwin position-independent code - -- pretty-prints as - -- call 1f - -- 1: popl %reg - - -- bit counting instructions - | POPCNT Format Operand Reg -- [SSE4.2] count number of bits set to 1 - | LZCNT Format Operand Reg -- [BMI2] count number of leading zeros - | TZCNT Format Operand Reg -- [BMI2] count number of trailing zeros - | BSF Format Operand Reg -- bit scan forward - | BSR Format Operand Reg -- bit scan reverse - - -- bit manipulation instructions - | PDEP Format Operand Operand Reg -- [BMI2] deposit bits to the specified mask - | PEXT Format Operand Operand Reg -- [BMI2] extract bits from the specified mask - - -- prefetch - | PREFETCH PrefetchVariant Format Operand -- prefetch Variant, addr size, address to prefetch - -- variant can be NTA, Lvl0, Lvl1, or Lvl2 - - | LOCK Instr -- lock prefix - | XADD Format Operand Operand -- src (r), dst (r/m) - | CMPXCHG Format Operand Operand -- src (r), dst (r/m), eax implicit - | MFENCE - -data PrefetchVariant = NTA | Lvl0 | Lvl1 | Lvl2 - - -data Operand - = OpReg Reg -- register - | OpImm Imm -- immediate value - | OpAddr AddrMode -- memory reference - - - --- | Returns which registers are read and written as a (read, written) --- pair. -x86_regUsageOfInstr :: Platform -> Instr -> RegUsage -x86_regUsageOfInstr platform instr - = case instr of - MOV _ src dst -> usageRW src dst - CMOV _ _ src dst -> mkRU (use_R src [dst]) [dst] - MOVZxL _ src dst -> usageRW src dst - MOVSxL _ src dst -> usageRW src dst - LEA _ src dst -> usageRW src dst - ADD _ src dst -> usageRM src dst - ADC _ src dst -> usageRM src dst - SUB _ src dst -> usageRM src dst - SBB _ src dst -> usageRM src dst - IMUL _ src dst -> usageRM src dst - - -- Result of IMULB will be in just in %ax - IMUL2 II8 src -> mkRU (eax:use_R src []) [eax] - -- Result of IMUL for wider values, will be split between %dx/%edx/%rdx and - -- %ax/%eax/%rax. - IMUL2 _ src -> mkRU (eax:use_R src []) [eax,edx] - - MUL _ src dst -> usageRM src dst - MUL2 _ src -> mkRU (eax:use_R src []) [eax,edx] - DIV _ op -> mkRU (eax:edx:use_R op []) [eax,edx] - IDIV _ op -> mkRU (eax:edx:use_R op []) [eax,edx] - ADD_CC _ src dst -> usageRM src dst - SUB_CC _ src dst -> usageRM src dst - AND _ src dst -> usageRM src dst - OR _ src dst -> usageRM src dst - - XOR _ (OpReg src) (OpReg dst) - | src == dst -> mkRU [] [dst] - - XOR _ src dst -> usageRM src dst - NOT _ op -> usageM op - BSWAP _ reg -> mkRU [reg] [reg] - NEGI _ op -> usageM op - SHL _ imm dst -> usageRM imm dst - SAR _ imm dst -> usageRM imm dst - SHR _ imm dst -> usageRM imm dst - BT _ _ src -> mkRUR (use_R src []) - - PUSH _ op -> mkRUR (use_R op []) - POP _ op -> mkRU [] (def_W op) - TEST _ src dst -> mkRUR (use_R src $! use_R dst []) - CMP _ src dst -> mkRUR (use_R src $! use_R dst []) - SETCC _ op -> mkRU [] (def_W op) - JXX _ _ -> mkRU [] [] - JXX_GBL _ _ -> mkRU [] [] - JMP op regs -> mkRUR (use_R op regs) - JMP_TBL op _ _ _ -> mkRUR (use_R op []) - CALL (Left _) params -> mkRU params (callClobberedRegs platform) - CALL (Right reg) params -> mkRU (reg:params) (callClobberedRegs platform) - CLTD _ -> mkRU [eax] [edx] - NOP -> mkRU [] [] - - X87Store _ dst -> mkRUR ( use_EA dst []) - - CVTSS2SD src dst -> mkRU [src] [dst] - CVTSD2SS src dst -> mkRU [src] [dst] - CVTTSS2SIQ _ src dst -> mkRU (use_R src []) [dst] - CVTTSD2SIQ _ src dst -> mkRU (use_R src []) [dst] - CVTSI2SS _ src dst -> mkRU (use_R src []) [dst] - CVTSI2SD _ src dst -> mkRU (use_R src []) [dst] - FDIV _ src dst -> usageRM src dst - SQRT _ src dst -> mkRU (use_R src []) [dst] - - FETCHGOT reg -> mkRU [] [reg] - FETCHPC reg -> mkRU [] [reg] - - COMMENT _ -> noUsage - LOCATION{} -> noUsage - UNWIND{} -> noUsage - DELTA _ -> noUsage - - POPCNT _ src dst -> mkRU (use_R src []) [dst] - LZCNT _ src dst -> mkRU (use_R src []) [dst] - TZCNT _ src dst -> mkRU (use_R src []) [dst] - BSF _ src dst -> mkRU (use_R src []) [dst] - BSR _ src dst -> mkRU (use_R src []) [dst] - - PDEP _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst] - PEXT _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst] - - -- note: might be a better way to do this - PREFETCH _ _ src -> mkRU (use_R src []) [] - LOCK i -> x86_regUsageOfInstr platform i - XADD _ src dst -> usageMM src dst - CMPXCHG _ src dst -> usageRMM src dst (OpReg eax) - MFENCE -> noUsage - - _other -> panic "regUsage: unrecognised instr" - where - -- # Definitions - -- - -- Written: If the operand is a register, it's written. If it's an - -- address, registers mentioned in the address are read. - -- - -- Modified: If the operand is a register, it's both read and - -- written. If it's an address, registers mentioned in the address - -- are read. - - -- 2 operand form; first operand Read; second Written - usageRW :: Operand -> Operand -> RegUsage - usageRW op (OpReg reg) = mkRU (use_R op []) [reg] - usageRW op (OpAddr ea) = mkRUR (use_R op $! use_EA ea []) - usageRW _ _ = panic "X86.RegInfo.usageRW: no match" - - -- 2 operand form; first operand Read; second Modified - usageRM :: Operand -> Operand -> RegUsage - usageRM op (OpReg reg) = mkRU (use_R op [reg]) [reg] - usageRM op (OpAddr ea) = mkRUR (use_R op $! use_EA ea []) - usageRM _ _ = panic "X86.RegInfo.usageRM: no match" - - -- 2 operand form; first operand Modified; second Modified - usageMM :: Operand -> Operand -> RegUsage - usageMM (OpReg src) (OpReg dst) = mkRU [src, dst] [src, dst] - usageMM (OpReg src) (OpAddr ea) = mkRU (use_EA ea [src]) [src] - usageMM _ _ = panic "X86.RegInfo.usageMM: no match" - - -- 3 operand form; first operand Read; second Modified; third Modified - usageRMM :: Operand -> Operand -> Operand -> RegUsage - usageRMM (OpReg src) (OpReg dst) (OpReg reg) = mkRU [src, dst, reg] [dst, reg] - usageRMM (OpReg src) (OpAddr ea) (OpReg reg) = mkRU (use_EA ea [src, reg]) [reg] - usageRMM _ _ _ = panic "X86.RegInfo.usageRMM: no match" - - -- 1 operand form; operand Modified - usageM :: Operand -> RegUsage - usageM (OpReg reg) = mkRU [reg] [reg] - usageM (OpAddr ea) = mkRUR (use_EA ea []) - usageM _ = panic "X86.RegInfo.usageM: no match" - - -- Registers defd when an operand is written. - def_W (OpReg reg) = [reg] - def_W (OpAddr _ ) = [] - def_W _ = panic "X86.RegInfo.def_W: no match" - - -- Registers used when an operand is read. - use_R (OpReg reg) tl = reg : tl - use_R (OpImm _) tl = tl - use_R (OpAddr ea) tl = use_EA ea tl - - -- Registers used to compute an effective address. - use_EA (ImmAddr _ _) tl = tl - use_EA (AddrBaseIndex base index _) tl = - use_base base $! use_index index tl - where use_base (EABaseReg r) tl = r : tl - use_base _ tl = tl - use_index EAIndexNone tl = tl - use_index (EAIndex i _) tl = i : tl - - mkRUR src = src' `seq` RU src' [] - where src' = filter (interesting platform) src - - mkRU src dst = src' `seq` dst' `seq` RU src' dst' - where src' = filter (interesting platform) src - dst' = filter (interesting platform) dst - --- | Is this register interesting for the register allocator? -interesting :: Platform -> Reg -> Bool -interesting _ (RegVirtual _) = True -interesting platform (RegReal (RealRegSingle i)) = freeReg platform i -interesting _ (RegReal (RealRegPair{})) = panic "X86.interesting: no reg pairs on this arch" - - - --- | Applies the supplied function to all registers in instructions. --- Typically used to change virtual registers to real registers. -x86_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr -x86_patchRegsOfInstr instr env - = case instr of - MOV fmt src dst -> patch2 (MOV fmt) src dst - CMOV cc fmt src dst -> CMOV cc fmt (patchOp src) (env dst) - MOVZxL fmt src dst -> patch2 (MOVZxL fmt) src dst - MOVSxL fmt src dst -> patch2 (MOVSxL fmt) src dst - LEA fmt src dst -> patch2 (LEA fmt) src dst - ADD fmt src dst -> patch2 (ADD fmt) src dst - ADC fmt src dst -> patch2 (ADC fmt) src dst - SUB fmt src dst -> patch2 (SUB fmt) src dst - SBB fmt src dst -> patch2 (SBB fmt) src dst - IMUL fmt src dst -> patch2 (IMUL fmt) src dst - IMUL2 fmt src -> patch1 (IMUL2 fmt) src - MUL fmt src dst -> patch2 (MUL fmt) src dst - MUL2 fmt src -> patch1 (MUL2 fmt) src - IDIV fmt op -> patch1 (IDIV fmt) op - DIV fmt op -> patch1 (DIV fmt) op - ADD_CC fmt src dst -> patch2 (ADD_CC fmt) src dst - SUB_CC fmt src dst -> patch2 (SUB_CC fmt) src dst - AND fmt src dst -> patch2 (AND fmt) src dst - OR fmt src dst -> patch2 (OR fmt) src dst - XOR fmt src dst -> patch2 (XOR fmt) src dst - NOT fmt op -> patch1 (NOT fmt) op - BSWAP fmt reg -> BSWAP fmt (env reg) - NEGI fmt op -> patch1 (NEGI fmt) op - SHL fmt imm dst -> patch1 (SHL fmt imm) dst - SAR fmt imm dst -> patch1 (SAR fmt imm) dst - SHR fmt imm dst -> patch1 (SHR fmt imm) dst - BT fmt imm src -> patch1 (BT fmt imm) src - TEST fmt src dst -> patch2 (TEST fmt) src dst - CMP fmt src dst -> patch2 (CMP fmt) src dst - PUSH fmt op -> patch1 (PUSH fmt) op - POP fmt op -> patch1 (POP fmt) op - SETCC cond op -> patch1 (SETCC cond) op - JMP op regs -> JMP (patchOp op) regs - JMP_TBL op ids s lbl -> JMP_TBL (patchOp op) ids s lbl - - -- literally only support storing the top x87 stack value st(0) - X87Store fmt dst -> X87Store fmt (lookupAddr dst) - - CVTSS2SD src dst -> CVTSS2SD (env src) (env dst) - CVTSD2SS src dst -> CVTSD2SS (env src) (env dst) - CVTTSS2SIQ fmt src dst -> CVTTSS2SIQ fmt (patchOp src) (env dst) - CVTTSD2SIQ fmt src dst -> CVTTSD2SIQ fmt (patchOp src) (env dst) - CVTSI2SS fmt src dst -> CVTSI2SS fmt (patchOp src) (env dst) - CVTSI2SD fmt src dst -> CVTSI2SD fmt (patchOp src) (env dst) - FDIV fmt src dst -> FDIV fmt (patchOp src) (patchOp dst) - SQRT fmt src dst -> SQRT fmt (patchOp src) (env dst) - - CALL (Left _) _ -> instr - CALL (Right reg) p -> CALL (Right (env reg)) p - - FETCHGOT reg -> FETCHGOT (env reg) - FETCHPC reg -> FETCHPC (env reg) - - NOP -> instr - COMMENT _ -> instr - LOCATION {} -> instr - UNWIND {} -> instr - DELTA _ -> instr - - JXX _ _ -> instr - JXX_GBL _ _ -> instr - CLTD _ -> instr - - POPCNT fmt src dst -> POPCNT fmt (patchOp src) (env dst) - LZCNT fmt src dst -> LZCNT fmt (patchOp src) (env dst) - TZCNT fmt src dst -> TZCNT fmt (patchOp src) (env dst) - PDEP fmt src mask dst -> PDEP fmt (patchOp src) (patchOp mask) (env dst) - PEXT fmt src mask dst -> PEXT fmt (patchOp src) (patchOp mask) (env dst) - BSF fmt src dst -> BSF fmt (patchOp src) (env dst) - BSR fmt src dst -> BSR fmt (patchOp src) (env dst) - - PREFETCH lvl format src -> PREFETCH lvl format (patchOp src) - - LOCK i -> LOCK (x86_patchRegsOfInstr i env) - XADD fmt src dst -> patch2 (XADD fmt) src dst - CMPXCHG fmt src dst -> patch2 (CMPXCHG fmt) src dst - MFENCE -> instr - - _other -> panic "patchRegs: unrecognised instr" - - where - patch1 :: (Operand -> a) -> Operand -> a - patch1 insn op = insn $! patchOp op - patch2 :: (Operand -> Operand -> a) -> Operand -> Operand -> a - patch2 insn src dst = (insn $! patchOp src) $! patchOp dst - - patchOp (OpReg reg) = OpReg $! env reg - patchOp (OpImm imm) = OpImm imm - patchOp (OpAddr ea) = OpAddr $! lookupAddr ea - - lookupAddr (ImmAddr imm off) = ImmAddr imm off - lookupAddr (AddrBaseIndex base index disp) - = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp - where - lookupBase EABaseNone = EABaseNone - lookupBase EABaseRip = EABaseRip - lookupBase (EABaseReg r) = EABaseReg $! env r - - lookupIndex EAIndexNone = EAIndexNone - lookupIndex (EAIndex r i) = (EAIndex $! env r) i - - --------------------------------------------------------------------------------- -x86_isJumpishInstr - :: Instr -> Bool - -x86_isJumpishInstr instr - = case instr of - JMP{} -> True - JXX{} -> True - JXX_GBL{} -> True - JMP_TBL{} -> True - CALL{} -> True - _ -> False - - -x86_jumpDestsOfInstr - :: Instr - -> [BlockId] - -x86_jumpDestsOfInstr insn - = case insn of - JXX _ id -> [id] - JMP_TBL _ ids _ _ -> [id | Just (DestBlockId id) <- ids] - _ -> [] - - -x86_patchJumpInstr - :: Instr -> (BlockId -> BlockId) -> Instr - -x86_patchJumpInstr insn patchF - = case insn of - JXX cc id -> JXX cc (patchF id) - JMP_TBL op ids section lbl - -> JMP_TBL op (map (fmap (patchJumpDest patchF)) ids) section lbl - _ -> insn - where - patchJumpDest f (DestBlockId id) = DestBlockId (f id) - patchJumpDest _ dest = dest - - - - - --- ----------------------------------------------------------------------------- --- | Make a spill instruction. -x86_mkSpillInstr - :: DynFlags - -> Reg -- register to spill - -> Int -- current stack delta - -> Int -- spill slot to use - -> Instr - -x86_mkSpillInstr dflags reg delta slot - = let off = spillSlotToOffset platform slot - delta - in - case targetClassOfReg platform reg of - RcInteger -> MOV (archWordFormat is32Bit) - (OpReg reg) (OpAddr (spRel dflags off)) - RcDouble -> MOV FF64 (OpReg reg) (OpAddr (spRel dflags off)) - _ -> panic "X86.mkSpillInstr: no match" - where platform = targetPlatform dflags - is32Bit = target32Bit platform - --- | Make a spill reload instruction. -x86_mkLoadInstr - :: DynFlags - -> Reg -- register to load - -> Int -- current stack delta - -> Int -- spill slot to use - -> Instr - -x86_mkLoadInstr dflags reg delta slot - = let off = spillSlotToOffset platform slot - delta - in - case targetClassOfReg platform reg of - RcInteger -> MOV (archWordFormat is32Bit) - (OpAddr (spRel dflags off)) (OpReg reg) - RcDouble -> MOV FF64 (OpAddr (spRel dflags off)) (OpReg reg) - _ -> panic "X86.x86_mkLoadInstr" - where platform = targetPlatform dflags - is32Bit = target32Bit platform - -spillSlotSize :: Platform -> Int -spillSlotSize dflags = if is32Bit then 12 else 8 - where is32Bit = target32Bit dflags - -maxSpillSlots :: DynFlags -> Int -maxSpillSlots dflags - = ((rESERVED_C_STACK_BYTES dflags - 64) `div` spillSlotSize (targetPlatform dflags)) - 1 --- = 0 -- useful for testing allocMoreStack - --- number of bytes that the stack pointer should be aligned to -stackAlign :: Int -stackAlign = 16 - --- convert a spill slot number to a *byte* offset, with no sign: --- decide on a per arch basis whether you are spilling above or below --- the C stack pointer. -spillSlotToOffset :: Platform -> Int -> Int -spillSlotToOffset platform slot - = 64 + spillSlotSize platform * slot - --------------------------------------------------------------------------------- - --- | See if this instruction is telling us the current C stack delta -x86_takeDeltaInstr - :: Instr - -> Maybe Int - -x86_takeDeltaInstr instr - = case instr of - DELTA i -> Just i - _ -> Nothing - - -x86_isMetaInstr - :: Instr - -> Bool - -x86_isMetaInstr instr - = case instr of - COMMENT{} -> True - LOCATION{} -> True - LDATA{} -> True - NEWBLOCK{} -> True - UNWIND{} -> True - DELTA{} -> True - _ -> False - - - ---- TODO: why is there --- | Make a reg-reg move instruction. --- On SPARC v8 there are no instructions to move directly between --- floating point and integer regs. If we need to do that then we --- have to go via memory. --- -x86_mkRegRegMoveInstr - :: Platform - -> Reg - -> Reg - -> Instr - -x86_mkRegRegMoveInstr platform src dst - = case targetClassOfReg platform src of - RcInteger -> case platformArch platform of - ArchX86 -> MOV II32 (OpReg src) (OpReg dst) - ArchX86_64 -> MOV II64 (OpReg src) (OpReg dst) - _ -> panic "x86_mkRegRegMoveInstr: Bad arch" - RcDouble -> MOV FF64 (OpReg src) (OpReg dst) - -- this code is the lie we tell ourselves because both float and double - -- use the same register class.on x86_64 and x86 32bit with SSE2, - -- more plainly, both use the XMM registers - _ -> panic "X86.RegInfo.mkRegRegMoveInstr: no match" - --- | Check whether an instruction represents a reg-reg move. --- The register allocator attempts to eliminate reg->reg moves whenever it can, --- by assigning the src and dest temporaries to the same real register. --- -x86_takeRegRegMoveInstr - :: Instr - -> Maybe (Reg,Reg) - -x86_takeRegRegMoveInstr (MOV _ (OpReg r1) (OpReg r2)) - = Just (r1,r2) - -x86_takeRegRegMoveInstr _ = Nothing - - --- | Make an unconditional branch instruction. -x86_mkJumpInstr - :: BlockId - -> [Instr] - -x86_mkJumpInstr id - = [JXX ALWAYS id] - --- Note [Windows stack layout] --- | On most OSes the kernel will place a guard page after the current stack --- page. If you allocate larger than a page worth you may jump over this --- guard page. Not only is this a security issue, but on certain OSes such --- as Windows a new page won't be allocated if you don't hit the guard. This --- will cause a segfault or access fault. --- --- This function defines if the current allocation amount requires a probe. --- On Windows (for now) we emit a call to _chkstk for this. For other OSes --- this is not yet implemented. --- See https://docs.microsoft.com/en-us/windows/desktop/DevNotes/-win32-chkstk --- The Windows stack looks like this: --- --- +-------------------+ --- | SP | --- +-------------------+ --- | | --- | GUARD PAGE | --- | | --- +-------------------+ --- | | --- | | --- | UNMAPPED | --- | | --- | | --- +-------------------+ --- --- In essence each allocation larger than a page size needs to be chunked and --- a probe emitted after each page allocation. You have to hit the guard --- page so the kernel can map in the next page, otherwise you'll segfault. --- -needs_probe_call :: Platform -> Int -> Bool -needs_probe_call platform amount - = case platformOS platform of - OSMinGW32 -> case platformArch platform of - ArchX86 -> amount > (4 * 1024) - ArchX86_64 -> amount > (8 * 1024) - _ -> False - _ -> False - -x86_mkStackAllocInstr - :: Platform - -> Int - -> [Instr] -x86_mkStackAllocInstr platform amount - = case platformOS platform of - OSMinGW32 -> - -- These will clobber AX but this should be ok because - -- - -- 1. It is the first thing we do when entering the closure and AX is - -- a caller saved registers on Windows both on x86_64 and x86. - -- - -- 2. The closures are only entered via a call or longjmp in which case - -- there are no expectations for volatile registers. - -- - -- 3. When the target is a local branch point it is re-targeted - -- after the dealloc, preserving #2. See note [extra spill slots]. - -- - -- We emit a call because the stack probes are quite involved and - -- would bloat code size a lot. GHC doesn't really have an -Os. - -- __chkstk is guaranteed to leave all nonvolatile registers and AX - -- untouched. It's part of the standard prologue code for any Windows - -- function dropping the stack more than a page. - -- See Note [Windows stack layout] - case platformArch platform of - ArchX86 | needs_probe_call platform amount -> - [ MOV II32 (OpImm (ImmInt amount)) (OpReg eax) - , CALL (Left $ strImmLit "___chkstk_ms") [eax] - , SUB II32 (OpReg eax) (OpReg esp) - ] - | otherwise -> - [ SUB II32 (OpImm (ImmInt amount)) (OpReg esp) - , TEST II32 (OpReg esp) (OpReg esp) - ] - ArchX86_64 | needs_probe_call platform amount -> - [ MOV II64 (OpImm (ImmInt amount)) (OpReg rax) - , CALL (Left $ strImmLit "___chkstk_ms") [rax] - , SUB II64 (OpReg rax) (OpReg rsp) - ] - | otherwise -> - [ SUB II64 (OpImm (ImmInt amount)) (OpReg rsp) - , TEST II64 (OpReg rsp) (OpReg rsp) - ] - _ -> panic "x86_mkStackAllocInstr" - _ -> - case platformArch platform of - ArchX86 -> [ SUB II32 (OpImm (ImmInt amount)) (OpReg esp) ] - ArchX86_64 -> [ SUB II64 (OpImm (ImmInt amount)) (OpReg rsp) ] - _ -> panic "x86_mkStackAllocInstr" - -x86_mkStackDeallocInstr - :: Platform - -> Int - -> [Instr] -x86_mkStackDeallocInstr platform amount - = case platformArch platform of - ArchX86 -> [ADD II32 (OpImm (ImmInt amount)) (OpReg esp)] - ArchX86_64 -> [ADD II64 (OpImm (ImmInt amount)) (OpReg rsp)] - _ -> panic "x86_mkStackDeallocInstr" - - --- --- Note [extra spill slots] --- --- If the register allocator used more spill slots than we have --- pre-allocated (rESERVED_C_STACK_BYTES), then we must allocate more --- C stack space on entry and exit from this proc. Therefore we --- insert a "sub $N, %rsp" at every entry point, and an "add $N, %rsp" --- before every non-local jump. --- --- This became necessary when the new codegen started bundling entire --- functions together into one proc, because the register allocator --- assigns a different stack slot to each virtual reg within a proc. --- To avoid using so many slots we could also: --- --- - split up the proc into connected components before code generator --- --- - rename the virtual regs, so that we re-use vreg names and hence --- stack slots for non-overlapping vregs. --- --- Note that when a block is both a non-local entry point (with an --- info table) and a local branch target, we have to split it into --- two, like so: --- --- <info table> --- L: --- <code> --- --- becomes --- --- <info table> --- L: --- subl $rsp, N --- jmp Lnew --- Lnew: --- <code> --- --- and all branches pointing to L are retargetted to point to Lnew. --- Otherwise, we would repeat the $rsp adjustment for each branch to --- L. --- --- Returns a list of (L,Lnew) pairs. --- -allocMoreStack - :: Platform - -> Int - -> NatCmmDecl statics X86.Instr.Instr - -> UniqSM (NatCmmDecl statics X86.Instr.Instr, [(BlockId,BlockId)]) - -allocMoreStack _ _ top@(CmmData _ _) = return (top,[]) -allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do - let entries = entryBlocks proc - - uniqs <- replicateM (length entries) getUniqueM - - let - delta = ((x + stackAlign - 1) `quot` stackAlign) * stackAlign -- round up - where x = slots * spillSlotSize platform -- sp delta - - alloc = mkStackAllocInstr platform delta - dealloc = mkStackDeallocInstr platform delta - - retargetList = (zip entries (map mkBlockId uniqs)) - - new_blockmap :: LabelMap BlockId - new_blockmap = mapFromList retargetList - - insert_stack_insns (BasicBlock id insns) - | Just new_blockid <- mapLookup id new_blockmap - = [ BasicBlock id $ alloc ++ [JXX ALWAYS new_blockid] - , BasicBlock new_blockid block' ] - | otherwise - = [ BasicBlock id block' ] - where - block' = foldr insert_dealloc [] insns - - insert_dealloc insn r = case insn of - JMP _ _ -> dealloc ++ (insn : r) - JXX_GBL _ _ -> panic "insert_dealloc: cannot handle JXX_GBL" - _other -> x86_patchJumpInstr insn retarget : r - where retarget b = fromMaybe b (mapLookup b new_blockmap) - - new_code = concatMap insert_stack_insns code - -- in - return (CmmProc info lbl live (ListGraph new_code), retargetList) - -data JumpDest = DestBlockId BlockId | DestImm Imm - --- Debug Instance -instance Outputable JumpDest where - ppr (DestBlockId bid) = text "jd<blk>:" <> ppr bid - ppr (DestImm _imm) = text "jd<imm>:noShow" - - -getJumpDestBlockId :: JumpDest -> Maybe BlockId -getJumpDestBlockId (DestBlockId bid) = Just bid -getJumpDestBlockId _ = Nothing - -canShortcut :: Instr -> Maybe JumpDest -canShortcut (JXX ALWAYS id) = Just (DestBlockId id) -canShortcut (JMP (OpImm imm) _) = Just (DestImm imm) -canShortcut _ = Nothing - - --- This helper shortcuts a sequence of branches. --- The blockset helps avoid following cycles. -shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr -shortcutJump fn insn = shortcutJump' fn (setEmpty :: LabelSet) insn - where - shortcutJump' :: (BlockId -> Maybe JumpDest) -> LabelSet -> Instr -> Instr - shortcutJump' fn seen insn@(JXX cc id) = - if setMember id seen then insn - else case fn id of - Nothing -> insn - Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id') - Just (DestImm imm) -> shortcutJump' fn seen' (JXX_GBL cc imm) - where seen' = setInsert id seen - shortcutJump' fn _ (JMP_TBL addr blocks section tblId) = - let updateBlock (Just (DestBlockId bid)) = - case fn bid of - Nothing -> Just (DestBlockId bid ) - Just dest -> Just dest - updateBlock dest = dest - blocks' = map updateBlock blocks - in JMP_TBL addr blocks' section tblId - shortcutJump' _ _ other = other - --- Here because it knows about JumpDest -shortcutStatics :: (BlockId -> Maybe JumpDest) -> (Alignment, RawCmmStatics) -> (Alignment, RawCmmStatics) -shortcutStatics fn (align, RawCmmStatics lbl statics) - = (align, RawCmmStatics lbl $ map (shortcutStatic fn) statics) - -- we need to get the jump tables, so apply the mapping to the entries - -- of a CmmData too. - -shortcutLabel :: (BlockId -> Maybe JumpDest) -> CLabel -> CLabel -shortcutLabel fn lab - | Just blkId <- maybeLocalBlockLabel lab = shortBlockId fn emptyUniqSet blkId - | otherwise = lab - -shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic -shortcutStatic fn (CmmStaticLit (CmmLabel lab)) - = CmmStaticLit (CmmLabel (shortcutLabel fn lab)) -shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off w)) - = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off w) - -- slightly dodgy, we're ignoring the second label, but this - -- works with the way we use CmmLabelDiffOff for jump tables now. -shortcutStatic _ other_static - = other_static - -shortBlockId - :: (BlockId -> Maybe JumpDest) - -> UniqSet Unique - -> BlockId - -> CLabel - -shortBlockId fn seen blockid = - case (elementOfUniqSet uq seen, fn blockid) of - (True, _) -> blockLbl blockid - (_, Nothing) -> blockLbl blockid - (_, Just (DestBlockId blockid')) -> shortBlockId fn (addOneToUniqSet seen uq) blockid' - (_, Just (DestImm (ImmCLbl lbl))) -> lbl - (_, _other) -> panic "shortBlockId" - where uq = getUnique blockid diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs deleted file mode 100644 index 4abc15cedd..0000000000 --- a/compiler/nativeGen/X86/Ppr.hs +++ /dev/null @@ -1,1014 +0,0 @@ -{-# LANGUAGE CPP #-} - ------------------------------------------------------------------------------ --- --- Pretty-printing assembly language --- --- (c) The University of Glasgow 1993-2005 --- ------------------------------------------------------------------------------ - -{-# OPTIONS_GHC -fno-warn-orphans #-} -module X86.Ppr ( - pprNatCmmDecl, - pprData, - pprInstr, - pprFormat, - pprImm, - pprDataItem, -) - -where - -#include "HsVersions.h" - -import GhcPrelude - -import X86.Regs -import X86.Instr -import X86.Cond -import Instruction -import Format -import Reg -import PprBase - - -import GHC.Cmm.Dataflow.Collections -import GHC.Cmm.Dataflow.Label -import BasicTypes (Alignment, mkAlignment, alignmentBytes) -import GHC.Driver.Session -import GHC.Cmm hiding (topInfoTable) -import GHC.Cmm.BlockId -import GHC.Cmm.CLabel -import Unique ( pprUniqueAlways ) -import GHC.Platform -import FastString -import Outputable - -import Data.Word -import Data.Bits - --- ----------------------------------------------------------------------------- --- Printing this stuff out --- --- --- Note [Subsections Via Symbols] --- --- If we are using the .subsections_via_symbols directive --- (available on recent versions of Darwin), --- we have to make sure that there is some kind of reference --- from the entry code to a label on the _top_ of of the info table, --- so that the linker will not think it is unreferenced and dead-strip --- it. That's why the label is called a DeadStripPreventer (_dsp). --- --- The LLVM code gen already creates `iTableSuf` symbols, where --- the X86 would generate the DeadStripPreventer (_dsp) symbol. --- Therefore all that is left for llvm code gen, is to ensure --- that all the `iTableSuf` symbols are marked as used. --- As of this writing the documentation regarding the --- .subsections_via_symbols and -dead_strip can be found at --- <https://developer.apple.com/library/mac/documentation/DeveloperTools/Reference/Assembler/040-Assembler_Directives/asm_directives.html#//apple_ref/doc/uid/TP30000823-TPXREF101> - -pprProcAlignment :: SDoc -pprProcAlignment = sdocWithDynFlags $ \dflags -> - (maybe empty (pprAlign . mkAlignment) (cmmProcAlignment dflags)) - -pprNatCmmDecl :: NatCmmDecl (Alignment, RawCmmStatics) Instr -> SDoc -pprNatCmmDecl (CmmData section dats) = - pprSectionAlign section $$ pprDatas dats - -pprNatCmmDecl proc@(CmmProc top_info lbl _ (ListGraph blocks)) = - sdocWithDynFlags $ \dflags -> - pprProcAlignment $$ - case topInfoTable proc of - Nothing -> - -- special case for code without info table: - pprSectionAlign (Section Text lbl) $$ - pprProcAlignment $$ - pprLabel lbl $$ -- blocks guaranteed not null, so label needed - vcat (map (pprBasicBlock top_info) blocks) $$ - (if debugLevel dflags > 0 - then ppr (mkAsmTempEndLabel lbl) <> char ':' else empty) $$ - pprSizeDecl lbl - - Just (RawCmmStatics info_lbl _) -> - sdocWithPlatform $ \platform -> - pprSectionAlign (Section Text info_lbl) $$ - pprProcAlignment $$ - (if platformHasSubsectionsViaSymbols platform - then ppr (mkDeadStripPreventer info_lbl) <> char ':' - else empty) $$ - vcat (map (pprBasicBlock top_info) blocks) $$ - -- above: Even the first block gets a label, because with branch-chain - -- elimination, it might be the target of a goto. - (if platformHasSubsectionsViaSymbols platform - then -- See Note [Subsections Via Symbols] - text "\t.long " - <+> ppr info_lbl - <+> char '-' - <+> ppr (mkDeadStripPreventer info_lbl) - else empty) $$ - pprSizeDecl info_lbl - --- | Output the ELF .size directive. -pprSizeDecl :: CLabel -> SDoc -pprSizeDecl lbl - = sdocWithPlatform $ \platform -> - if osElfTarget (platformOS platform) - then text "\t.size" <+> ppr lbl <> ptext (sLit ", .-") <> ppr lbl - else empty - -pprBasicBlock :: LabelMap RawCmmStatics -> NatBasicBlock Instr -> SDoc -pprBasicBlock info_env (BasicBlock blockid instrs) - = maybe_infotable $ - pprLabel asmLbl $$ - vcat (map pprInstr instrs) $$ - (sdocOption sdocDebugLevel $ \level -> - if level > 0 - then ppr (mkAsmTempEndLabel asmLbl) <> char ':' - else empty - ) - where - asmLbl = blockLbl blockid - maybe_infotable c = case mapLookup blockid info_env of - Nothing -> c - Just (RawCmmStatics infoLbl info) -> - pprAlignForSection Text $$ - infoTableLoc $$ - vcat (map pprData info) $$ - pprLabel infoLbl $$ - c $$ - (sdocOption sdocDebugLevel $ \level -> - if level > 0 - then ppr (mkAsmTempEndLabel infoLbl) <> char ':' - else empty - ) - -- Make sure the info table has the right .loc for the block - -- coming right after it. See [Note: Info Offset] - infoTableLoc = case instrs of - (l@LOCATION{} : _) -> pprInstr l - _other -> empty - - -pprDatas :: (Alignment, RawCmmStatics) -> SDoc --- See note [emit-time elimination of static indirections] in CLabel. -pprDatas (_, RawCmmStatics alias [CmmStaticLit (CmmLabel lbl), CmmStaticLit ind, _, _]) - | lbl == mkIndStaticInfoLabel - , let labelInd (CmmLabelOff l _) = Just l - labelInd (CmmLabel l) = Just l - labelInd _ = Nothing - , Just ind' <- labelInd ind - , alias `mayRedirectTo` ind' - = pprGloblDecl alias - $$ text ".equiv" <+> ppr alias <> comma <> ppr (CmmLabel ind') - -pprDatas (align, (RawCmmStatics lbl dats)) - = vcat (pprAlign align : pprLabel lbl : map pprData dats) - -pprData :: CmmStatic -> SDoc -pprData (CmmString str) = pprBytes str - -pprData (CmmUninitialised bytes) - = sdocWithPlatform $ \platform -> - if platformOS platform == OSDarwin then text ".space " <> int bytes - else text ".skip " <> int bytes - -pprData (CmmStaticLit lit) = pprDataItem lit - -pprGloblDecl :: CLabel -> SDoc -pprGloblDecl lbl - | not (externallyVisibleCLabel lbl) = empty - | otherwise = text ".globl " <> ppr lbl - -pprLabelType' :: DynFlags -> CLabel -> SDoc -pprLabelType' dflags lbl = - if isCFunctionLabel lbl || functionOkInfoTable then - text "@function" - else - text "@object" - where - {- - NOTE: This is a bit hacky. - - With the `tablesNextToCode` info tables look like this: - ``` - <info table data> - label_info: - <info table code> - ``` - So actually info table label points exactly to the code and we can mark - the label as @function. (This is required to make perf and potentially other - tools to work on Haskell binaries). - This usually works well but it can cause issues with a linker. - A linker uses different algorithms for the relocation depending on - the symbol type.For some reason, a linker will generate JUMP_SLOT relocation - when constructor info table is referenced from a data section. - This only happens with static constructor call so - we mark _con_info symbols as `@object` to avoid the issue with relocations. - - @SimonMarlow hack explanation: - "The reasoning goes like this: - - * The danger when we mark a symbol as `@function` is that the linker will - redirect it to point to the PLT and use a `JUMP_SLOT` relocation when - the symbol refers to something outside the current shared object. - A PLT / JUMP_SLOT reference only works for symbols that we jump to, not - for symbols representing data,, nor for info table symbol references which - we expect to point directly to the info table. - * GHC generates code that might refer to any info table symbol from the text - segment, but that's OK, because those will be explicit GOT references - generated by the code generator. - * When we refer to info tables from the data segment, it's either - * a FUN_STATIC/THUNK_STATIC local to this module - * a `con_info` that could be from anywhere - - So, the only info table symbols that we might refer to from the data segment - of another shared object are `con_info` symbols, so those are the ones we - need to exclude from getting the @function treatment. - " - - A good place to check for more - https://gitlab.haskell.org/ghc/ghc/wikis/commentary/position-independent-code - - Another possible hack is to create an extra local function symbol for - every code-like thing to give the needed information for to the tools - but mess up with the relocation. https://phabricator.haskell.org/D4730 - -} - functionOkInfoTable = tablesNextToCode dflags && - isInfoTableLabel lbl && not (isConInfoTableLabel lbl) - - -pprTypeDecl :: CLabel -> SDoc -pprTypeDecl lbl - = sdocWithPlatform $ \platform -> - if osElfTarget (platformOS platform) && externallyVisibleCLabel lbl - then - sdocWithDynFlags $ \df -> - text ".type " <> ppr lbl <> ptext (sLit ", ") <> pprLabelType' df lbl - else empty - -pprLabel :: CLabel -> SDoc -pprLabel lbl = pprGloblDecl lbl - $$ pprTypeDecl lbl - $$ (ppr lbl <> char ':') - -pprAlign :: Alignment -> SDoc -pprAlign alignment - = sdocWithPlatform $ \platform -> - text ".align " <> int (alignmentOn platform) - where - bytes = alignmentBytes alignment - alignmentOn platform = if platformOS platform == OSDarwin - then log2 bytes - else bytes - - log2 :: Int -> Int -- cache the common ones - log2 1 = 0 - log2 2 = 1 - log2 4 = 2 - log2 8 = 3 - log2 n = 1 + log2 (n `quot` 2) - --- ----------------------------------------------------------------------------- --- pprInstr: print an 'Instr' - -instance Outputable Instr where - ppr instr = pprInstr instr - - -pprReg :: Format -> Reg -> SDoc -pprReg f r - = case r of - RegReal (RealRegSingle i) -> - sdocWithPlatform $ \platform -> - if target32Bit platform then ppr32_reg_no f i - else ppr64_reg_no f i - RegReal (RealRegPair _ _) -> panic "X86.Ppr: no reg pairs on this arch" - RegVirtual (VirtualRegI u) -> text "%vI_" <> pprUniqueAlways u - RegVirtual (VirtualRegHi u) -> text "%vHi_" <> pprUniqueAlways u - RegVirtual (VirtualRegF u) -> text "%vF_" <> pprUniqueAlways u - RegVirtual (VirtualRegD u) -> text "%vD_" <> pprUniqueAlways u - - where - ppr32_reg_no :: Format -> Int -> SDoc - ppr32_reg_no II8 = ppr32_reg_byte - ppr32_reg_no II16 = ppr32_reg_word - ppr32_reg_no _ = ppr32_reg_long - - ppr32_reg_byte i = ptext - (case i of { - 0 -> sLit "%al"; 1 -> sLit "%bl"; - 2 -> sLit "%cl"; 3 -> sLit "%dl"; - _ -> sLit $ "very naughty I386 byte register: " ++ show i - }) - - ppr32_reg_word i = ptext - (case i of { - 0 -> sLit "%ax"; 1 -> sLit "%bx"; - 2 -> sLit "%cx"; 3 -> sLit "%dx"; - 4 -> sLit "%si"; 5 -> sLit "%di"; - 6 -> sLit "%bp"; 7 -> sLit "%sp"; - _ -> sLit "very naughty I386 word register" - }) - - ppr32_reg_long i = ptext - (case i of { - 0 -> sLit "%eax"; 1 -> sLit "%ebx"; - 2 -> sLit "%ecx"; 3 -> sLit "%edx"; - 4 -> sLit "%esi"; 5 -> sLit "%edi"; - 6 -> sLit "%ebp"; 7 -> sLit "%esp"; - _ -> ppr_reg_float i - }) - - ppr64_reg_no :: Format -> Int -> SDoc - ppr64_reg_no II8 = ppr64_reg_byte - ppr64_reg_no II16 = ppr64_reg_word - ppr64_reg_no II32 = ppr64_reg_long - ppr64_reg_no _ = ppr64_reg_quad - - ppr64_reg_byte i = ptext - (case i of { - 0 -> sLit "%al"; 1 -> sLit "%bl"; - 2 -> sLit "%cl"; 3 -> sLit "%dl"; - 4 -> sLit "%sil"; 5 -> sLit "%dil"; -- new 8-bit regs! - 6 -> sLit "%bpl"; 7 -> sLit "%spl"; - 8 -> sLit "%r8b"; 9 -> sLit "%r9b"; - 10 -> sLit "%r10b"; 11 -> sLit "%r11b"; - 12 -> sLit "%r12b"; 13 -> sLit "%r13b"; - 14 -> sLit "%r14b"; 15 -> sLit "%r15b"; - _ -> sLit $ "very naughty x86_64 byte register: " ++ show i - }) - - ppr64_reg_word i = ptext - (case i of { - 0 -> sLit "%ax"; 1 -> sLit "%bx"; - 2 -> sLit "%cx"; 3 -> sLit "%dx"; - 4 -> sLit "%si"; 5 -> sLit "%di"; - 6 -> sLit "%bp"; 7 -> sLit "%sp"; - 8 -> sLit "%r8w"; 9 -> sLit "%r9w"; - 10 -> sLit "%r10w"; 11 -> sLit "%r11w"; - 12 -> sLit "%r12w"; 13 -> sLit "%r13w"; - 14 -> sLit "%r14w"; 15 -> sLit "%r15w"; - _ -> sLit "very naughty x86_64 word register" - }) - - ppr64_reg_long i = ptext - (case i of { - 0 -> sLit "%eax"; 1 -> sLit "%ebx"; - 2 -> sLit "%ecx"; 3 -> sLit "%edx"; - 4 -> sLit "%esi"; 5 -> sLit "%edi"; - 6 -> sLit "%ebp"; 7 -> sLit "%esp"; - 8 -> sLit "%r8d"; 9 -> sLit "%r9d"; - 10 -> sLit "%r10d"; 11 -> sLit "%r11d"; - 12 -> sLit "%r12d"; 13 -> sLit "%r13d"; - 14 -> sLit "%r14d"; 15 -> sLit "%r15d"; - _ -> sLit "very naughty x86_64 register" - }) - - ppr64_reg_quad i = ptext - (case i of { - 0 -> sLit "%rax"; 1 -> sLit "%rbx"; - 2 -> sLit "%rcx"; 3 -> sLit "%rdx"; - 4 -> sLit "%rsi"; 5 -> sLit "%rdi"; - 6 -> sLit "%rbp"; 7 -> sLit "%rsp"; - 8 -> sLit "%r8"; 9 -> sLit "%r9"; - 10 -> sLit "%r10"; 11 -> sLit "%r11"; - 12 -> sLit "%r12"; 13 -> sLit "%r13"; - 14 -> sLit "%r14"; 15 -> sLit "%r15"; - _ -> ppr_reg_float i - }) - -ppr_reg_float :: Int -> PtrString -ppr_reg_float i = case i of - 16 -> sLit "%xmm0" ; 17 -> sLit "%xmm1" - 18 -> sLit "%xmm2" ; 19 -> sLit "%xmm3" - 20 -> sLit "%xmm4" ; 21 -> sLit "%xmm5" - 22 -> sLit "%xmm6" ; 23 -> sLit "%xmm7" - 24 -> sLit "%xmm8" ; 25 -> sLit "%xmm9" - 26 -> sLit "%xmm10"; 27 -> sLit "%xmm11" - 28 -> sLit "%xmm12"; 29 -> sLit "%xmm13" - 30 -> sLit "%xmm14"; 31 -> sLit "%xmm15" - _ -> sLit "very naughty x86 register" - -pprFormat :: Format -> SDoc -pprFormat x - = ptext (case x of - II8 -> sLit "b" - II16 -> sLit "w" - II32 -> sLit "l" - II64 -> sLit "q" - FF32 -> sLit "ss" -- "scalar single-precision float" (SSE2) - FF64 -> sLit "sd" -- "scalar double-precision float" (SSE2) - ) - -pprFormat_x87 :: Format -> SDoc -pprFormat_x87 x - = ptext $ case x of - FF32 -> sLit "s" - FF64 -> sLit "l" - _ -> panic "X86.Ppr.pprFormat_x87" - - -pprCond :: Cond -> SDoc -pprCond c - = ptext (case c of { - GEU -> sLit "ae"; LU -> sLit "b"; - EQQ -> sLit "e"; GTT -> sLit "g"; - GE -> sLit "ge"; GU -> sLit "a"; - LTT -> sLit "l"; LE -> sLit "le"; - LEU -> sLit "be"; NE -> sLit "ne"; - NEG -> sLit "s"; POS -> sLit "ns"; - CARRY -> sLit "c"; OFLO -> sLit "o"; - PARITY -> sLit "p"; NOTPARITY -> sLit "np"; - ALWAYS -> sLit "mp"}) - - -pprImm :: Imm -> SDoc -pprImm (ImmInt i) = int i -pprImm (ImmInteger i) = integer i -pprImm (ImmCLbl l) = ppr l -pprImm (ImmIndex l i) = ppr l <> char '+' <> int i -pprImm (ImmLit s) = s - -pprImm (ImmFloat _) = text "naughty float immediate" -pprImm (ImmDouble _) = text "naughty double immediate" - -pprImm (ImmConstantSum a b) = pprImm a <> char '+' <> pprImm b -pprImm (ImmConstantDiff a b) = pprImm a <> char '-' - <> lparen <> pprImm b <> rparen - - - -pprAddr :: AddrMode -> SDoc -pprAddr (ImmAddr imm off) - = let pp_imm = pprImm imm - in - if (off == 0) then - pp_imm - else if (off < 0) then - pp_imm <> int off - else - pp_imm <> char '+' <> int off - -pprAddr (AddrBaseIndex base index displacement) - = sdocWithPlatform $ \platform -> - let - pp_disp = ppr_disp displacement - pp_off p = pp_disp <> char '(' <> p <> char ')' - pp_reg r = pprReg (archWordFormat (target32Bit platform)) r - in - case (base, index) of - (EABaseNone, EAIndexNone) -> pp_disp - (EABaseReg b, EAIndexNone) -> pp_off (pp_reg b) - (EABaseRip, EAIndexNone) -> pp_off (text "%rip") - (EABaseNone, EAIndex r i) -> pp_off (comma <> pp_reg r <> comma <> int i) - (EABaseReg b, EAIndex r i) -> pp_off (pp_reg b <> comma <> pp_reg r - <> comma <> int i) - _ -> panic "X86.Ppr.pprAddr: no match" - - where - ppr_disp (ImmInt 0) = empty - ppr_disp imm = pprImm imm - --- | Print section header and appropriate alignment for that section. -pprSectionAlign :: Section -> SDoc -pprSectionAlign (Section (OtherSection _) _) = - panic "X86.Ppr.pprSectionAlign: unknown section" -pprSectionAlign sec@(Section seg _) = - sdocWithPlatform $ \platform -> - pprSectionHeader platform sec $$ - pprAlignForSection seg - --- | Print appropriate alignment for the given section type. -pprAlignForSection :: SectionType -> SDoc -pprAlignForSection seg = - sdocWithPlatform $ \platform -> - text ".align " <> - case platformOS platform of - -- Darwin: alignments are given as shifts. - OSDarwin - | target32Bit platform -> - case seg of - ReadOnlyData16 -> int 4 - CString -> int 1 - _ -> int 2 - | otherwise -> - case seg of - ReadOnlyData16 -> int 4 - CString -> int 1 - _ -> int 3 - -- Other: alignments are given as bytes. - _ - | target32Bit platform -> - case seg of - Text -> text "4,0x90" - ReadOnlyData16 -> int 16 - CString -> int 1 - _ -> int 4 - | otherwise -> - case seg of - ReadOnlyData16 -> int 16 - CString -> int 1 - _ -> int 8 - -pprDataItem :: CmmLit -> SDoc -pprDataItem lit = sdocWithDynFlags $ \dflags -> pprDataItem' dflags lit - -pprDataItem' :: DynFlags -> CmmLit -> SDoc -pprDataItem' dflags lit - = vcat (ppr_item (cmmTypeFormat $ cmmLitType dflags lit) lit) - where - platform = targetPlatform dflags - imm = litToImm lit - - -- These seem to be common: - ppr_item II8 _ = [text "\t.byte\t" <> pprImm imm] - ppr_item II16 _ = [text "\t.word\t" <> pprImm imm] - ppr_item II32 _ = [text "\t.long\t" <> pprImm imm] - - ppr_item FF32 (CmmFloat r _) - = let bs = floatToBytes (fromRational r) - in map (\b -> text "\t.byte\t" <> pprImm (ImmInt b)) bs - - ppr_item FF64 (CmmFloat r _) - = let bs = doubleToBytes (fromRational r) - in map (\b -> text "\t.byte\t" <> pprImm (ImmInt b)) bs - - ppr_item II64 _ - = case platformOS platform of - OSDarwin - | target32Bit platform -> - case lit of - CmmInt x _ -> - [text "\t.long\t" - <> int (fromIntegral (fromIntegral x :: Word32)), - text "\t.long\t" - <> int (fromIntegral - (fromIntegral (x `shiftR` 32) :: Word32))] - _ -> panic "X86.Ppr.ppr_item: no match for II64" - | otherwise -> - [text "\t.quad\t" <> pprImm imm] - _ - | target32Bit platform -> - [text "\t.quad\t" <> pprImm imm] - | otherwise -> - -- x86_64: binutils can't handle the R_X86_64_PC64 - -- relocation type, which means we can't do - -- pc-relative 64-bit addresses. Fortunately we're - -- assuming the small memory model, in which all such - -- offsets will fit into 32 bits, so we have to stick - -- to 32-bit offset fields and modify the RTS - -- appropriately - -- - -- See Note [x86-64-relative] in includes/rts/storage/InfoTables.h - -- - case lit of - -- A relative relocation: - CmmLabelDiffOff _ _ _ _ -> - [text "\t.long\t" <> pprImm imm, - text "\t.long\t0"] - _ -> - [text "\t.quad\t" <> pprImm imm] - - ppr_item _ _ - = panic "X86.Ppr.ppr_item: no match" - - -asmComment :: SDoc -> SDoc -asmComment c = whenPprDebug $ text "# " <> c - -pprInstr :: Instr -> SDoc - -pprInstr (COMMENT s) - = asmComment (ftext s) - -pprInstr (LOCATION file line col _name) - = text "\t.loc " <> ppr file <+> ppr line <+> ppr col - -pprInstr (DELTA d) - = asmComment $ text ("\tdelta = " ++ show d) - -pprInstr (NEWBLOCK _) - = panic "PprMach.pprInstr: NEWBLOCK" - -pprInstr (UNWIND lbl d) - = asmComment (text "\tunwind = " <> ppr d) - $$ ppr lbl <> colon - -pprInstr (LDATA _ _) - = panic "PprMach.pprInstr: LDATA" - -{- -pprInstr (SPILL reg slot) - = hcat [ - text "\tSPILL", - char ' ', - pprUserReg reg, - comma, - text "SLOT" <> parens (int slot)] - -pprInstr (RELOAD slot reg) - = hcat [ - text "\tRELOAD", - char ' ', - text "SLOT" <> parens (int slot), - comma, - pprUserReg reg] --} - --- Replace 'mov $0x0,%reg' by 'xor %reg,%reg', which is smaller and cheaper. --- The code generator catches most of these already, but not all. -pprInstr (MOV format (OpImm (ImmInt 0)) dst@(OpReg _)) - = pprInstr (XOR format' dst dst) - where format' = case format of - II64 -> II32 -- 32-bit version is equivalent, and smaller - _ -> format -pprInstr (MOV format src dst) - = pprFormatOpOp (sLit "mov") format src dst - -pprInstr (CMOV cc format src dst) - = pprCondOpReg (sLit "cmov") format cc src dst - -pprInstr (MOVZxL II32 src dst) = pprFormatOpOp (sLit "mov") II32 src dst - -- 32-to-64 bit zero extension on x86_64 is accomplished by a simple - -- movl. But we represent it as a MOVZxL instruction, because - -- the reg alloc would tend to throw away a plain reg-to-reg - -- move, and we still want it to do that. - -pprInstr (MOVZxL formats src dst) - = pprFormatOpOpCoerce (sLit "movz") formats II32 src dst - -- zero-extension only needs to extend to 32 bits: on x86_64, - -- the remaining zero-extension to 64 bits is automatic, and the 32-bit - -- instruction is shorter. - -pprInstr (MOVSxL formats src dst) - = sdocWithPlatform $ \platform -> - pprFormatOpOpCoerce (sLit "movs") formats (archWordFormat (target32Bit platform)) src dst - --- here we do some patching, since the physical registers are only set late --- in the code generation. -pprInstr (LEA format (OpAddr (AddrBaseIndex (EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3)) - | reg1 == reg3 - = pprFormatOpOp (sLit "add") format (OpReg reg2) dst - -pprInstr (LEA format (OpAddr (AddrBaseIndex (EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3)) - | reg2 == reg3 - = pprFormatOpOp (sLit "add") format (OpReg reg1) dst - -pprInstr (LEA format (OpAddr (AddrBaseIndex (EABaseReg reg1) EAIndexNone displ)) dst@(OpReg reg3)) - | reg1 == reg3 - = pprInstr (ADD format (OpImm displ) dst) - -pprInstr (LEA format src dst) = pprFormatOpOp (sLit "lea") format src dst - -pprInstr (ADD format (OpImm (ImmInt (-1))) dst) - = pprFormatOp (sLit "dec") format dst -pprInstr (ADD format (OpImm (ImmInt 1)) dst) - = pprFormatOp (sLit "inc") format dst -pprInstr (ADD format src dst) = pprFormatOpOp (sLit "add") format src dst -pprInstr (ADC format src dst) = pprFormatOpOp (sLit "adc") format src dst -pprInstr (SUB format src dst) = pprFormatOpOp (sLit "sub") format src dst -pprInstr (SBB format src dst) = pprFormatOpOp (sLit "sbb") format src dst -pprInstr (IMUL format op1 op2) = pprFormatOpOp (sLit "imul") format op1 op2 - -pprInstr (ADD_CC format src dst) - = pprFormatOpOp (sLit "add") format src dst -pprInstr (SUB_CC format src dst) - = pprFormatOpOp (sLit "sub") format src dst - -{- A hack. The Intel documentation says that "The two and three - operand forms [of IMUL] may also be used with unsigned operands - because the lower half of the product is the same regardless if - (sic) the operands are signed or unsigned. The CF and OF flags, - however, cannot be used to determine if the upper half of the - result is non-zero." So there. --} - --- Use a 32-bit instruction when possible as it saves a byte. --- Notably, extracting the tag bits of a pointer has this form. --- TODO: we could save a byte in a subsequent CMP instruction too, --- but need something like a peephole pass for this -pprInstr (AND II64 src@(OpImm (ImmInteger mask)) dst) - | 0 <= mask && mask < 0xffffffff - = pprInstr (AND II32 src dst) -pprInstr (AND FF32 src dst) = pprOpOp (sLit "andps") FF32 src dst -pprInstr (AND FF64 src dst) = pprOpOp (sLit "andpd") FF64 src dst -pprInstr (AND format src dst) = pprFormatOpOp (sLit "and") format src dst -pprInstr (OR format src dst) = pprFormatOpOp (sLit "or") format src dst - -pprInstr (XOR FF32 src dst) = pprOpOp (sLit "xorps") FF32 src dst -pprInstr (XOR FF64 src dst) = pprOpOp (sLit "xorpd") FF64 src dst -pprInstr (XOR format src dst) = pprFormatOpOp (sLit "xor") format src dst - -pprInstr (POPCNT format src dst) = pprOpOp (sLit "popcnt") format src (OpReg dst) -pprInstr (LZCNT format src dst) = pprOpOp (sLit "lzcnt") format src (OpReg dst) -pprInstr (TZCNT format src dst) = pprOpOp (sLit "tzcnt") format src (OpReg dst) -pprInstr (BSF format src dst) = pprOpOp (sLit "bsf") format src (OpReg dst) -pprInstr (BSR format src dst) = pprOpOp (sLit "bsr") format src (OpReg dst) - -pprInstr (PDEP format src mask dst) = pprFormatOpOpReg (sLit "pdep") format src mask dst -pprInstr (PEXT format src mask dst) = pprFormatOpOpReg (sLit "pext") format src mask dst - -pprInstr (PREFETCH NTA format src ) = pprFormatOp_ (sLit "prefetchnta") format src -pprInstr (PREFETCH Lvl0 format src) = pprFormatOp_ (sLit "prefetcht0") format src -pprInstr (PREFETCH Lvl1 format src) = pprFormatOp_ (sLit "prefetcht1") format src -pprInstr (PREFETCH Lvl2 format src) = pprFormatOp_ (sLit "prefetcht2") format src - -pprInstr (NOT format op) = pprFormatOp (sLit "not") format op -pprInstr (BSWAP format op) = pprFormatOp (sLit "bswap") format (OpReg op) -pprInstr (NEGI format op) = pprFormatOp (sLit "neg") format op - -pprInstr (SHL format src dst) = pprShift (sLit "shl") format src dst -pprInstr (SAR format src dst) = pprShift (sLit "sar") format src dst -pprInstr (SHR format src dst) = pprShift (sLit "shr") format src dst - -pprInstr (BT format imm src) = pprFormatImmOp (sLit "bt") format imm src - -pprInstr (CMP format src dst) - | isFloatFormat format = pprFormatOpOp (sLit "ucomi") format src dst -- SSE2 - | otherwise = pprFormatOpOp (sLit "cmp") format src dst - -pprInstr (TEST format src dst) = sdocWithPlatform $ \platform -> - let format' = case (src,dst) of - -- Match instructions like 'test $0x3,%esi' or 'test $0x7,%rbx'. - -- We can replace them by equivalent, but smaller instructions - -- by reducing the size of the immediate operand as far as possible. - -- (We could handle masks larger than a single byte too, - -- but it would complicate the code considerably - -- and tag checks are by far the most common case.) - -- The mask must have the high bit clear for this smaller encoding - -- to be completely equivalent to the original; in particular so - -- that the signed comparison condition bits are the same as they - -- would be if doing a full word comparison. See #13425. - (OpImm (ImmInteger mask), OpReg dstReg) - | 0 <= mask && mask < 128 -> minSizeOfReg platform dstReg - _ -> format - in pprFormatOpOp (sLit "test") format' src dst - where - minSizeOfReg platform (RegReal (RealRegSingle i)) - | target32Bit platform && i <= 3 = II8 -- al, bl, cl, dl - | target32Bit platform && i <= 7 = II16 -- si, di, bp, sp - | not (target32Bit platform) && i <= 15 = II8 -- al .. r15b - minSizeOfReg _ _ = format -- other - -pprInstr (PUSH format op) = pprFormatOp (sLit "push") format op -pprInstr (POP format op) = pprFormatOp (sLit "pop") format op - --- both unused (SDM): --- pprInstr PUSHA = text "\tpushal" --- pprInstr POPA = text "\tpopal" - -pprInstr NOP = text "\tnop" -pprInstr (CLTD II8) = text "\tcbtw" -pprInstr (CLTD II16) = text "\tcwtd" -pprInstr (CLTD II32) = text "\tcltd" -pprInstr (CLTD II64) = text "\tcqto" -pprInstr (CLTD x) = panic $ "pprInstr: " ++ show x - -pprInstr (SETCC cond op) = pprCondInstr (sLit "set") cond (pprOperand II8 op) - -pprInstr (JXX cond blockid) - = pprCondInstr (sLit "j") cond (ppr lab) - where lab = blockLbl blockid - -pprInstr (JXX_GBL cond imm) = pprCondInstr (sLit "j") cond (pprImm imm) - -pprInstr (JMP (OpImm imm) _) = text "\tjmp " <> pprImm imm -pprInstr (JMP op _) = sdocWithPlatform $ \platform -> - text "\tjmp *" - <> pprOperand (archWordFormat (target32Bit platform)) op -pprInstr (JMP_TBL op _ _ _) = pprInstr (JMP op []) -pprInstr (CALL (Left imm) _) = text "\tcall " <> pprImm imm -pprInstr (CALL (Right reg) _) = sdocWithPlatform $ \platform -> - text "\tcall *" - <> pprReg (archWordFormat (target32Bit platform)) reg - -pprInstr (IDIV fmt op) = pprFormatOp (sLit "idiv") fmt op -pprInstr (DIV fmt op) = pprFormatOp (sLit "div") fmt op -pprInstr (IMUL2 fmt op) = pprFormatOp (sLit "imul") fmt op - --- x86_64 only -pprInstr (MUL format op1 op2) = pprFormatOpOp (sLit "mul") format op1 op2 -pprInstr (MUL2 format op) = pprFormatOp (sLit "mul") format op - -pprInstr (FDIV format op1 op2) = pprFormatOpOp (sLit "div") format op1 op2 -pprInstr (SQRT format op1 op2) = pprFormatOpReg (sLit "sqrt") format op1 op2 - -pprInstr (CVTSS2SD from to) = pprRegReg (sLit "cvtss2sd") from to -pprInstr (CVTSD2SS from to) = pprRegReg (sLit "cvtsd2ss") from to -pprInstr (CVTTSS2SIQ fmt from to) = pprFormatFormatOpReg (sLit "cvttss2si") FF32 fmt from to -pprInstr (CVTTSD2SIQ fmt from to) = pprFormatFormatOpReg (sLit "cvttsd2si") FF64 fmt from to -pprInstr (CVTSI2SS fmt from to) = pprFormatOpReg (sLit "cvtsi2ss") fmt from to -pprInstr (CVTSI2SD fmt from to) = pprFormatOpReg (sLit "cvtsi2sd") fmt from to - - -- FETCHGOT for PIC on ELF platforms -pprInstr (FETCHGOT reg) - = vcat [ text "\tcall 1f", - hcat [ text "1:\tpopl\t", pprReg II32 reg ], - hcat [ text "\taddl\t$_GLOBAL_OFFSET_TABLE_+(.-1b), ", - pprReg II32 reg ] - ] - - -- FETCHPC for PIC on Darwin/x86 - -- get the instruction pointer into a register - -- (Terminology note: the IP is called Program Counter on PPC, - -- and it's a good thing to use the same name on both platforms) -pprInstr (FETCHPC reg) - = vcat [ text "\tcall 1f", - hcat [ text "1:\tpopl\t", pprReg II32 reg ] - ] - - --- the --- GST fmt src addr ==> FLD dst ; FSTPsz addr -pprInstr g@(X87Store fmt addr) - = pprX87 g (hcat [gtab, - text "fstp", pprFormat_x87 fmt, gsp, pprAddr addr]) - - --- Atomics - -pprInstr (LOCK i) = text "\tlock" $$ pprInstr i - -pprInstr MFENCE = text "\tmfence" - -pprInstr (XADD format src dst) = pprFormatOpOp (sLit "xadd") format src dst - -pprInstr (CMPXCHG format src dst) - = pprFormatOpOp (sLit "cmpxchg") format src dst - - - --------------------------- --- some left over - - - -gtab :: SDoc -gtab = char '\t' - -gsp :: SDoc -gsp = char ' ' - - - -pprX87 :: Instr -> SDoc -> SDoc -pprX87 fake actual - = (char '#' <> pprX87Instr fake) $$ actual - -pprX87Instr :: Instr -> SDoc -pprX87Instr (X87Store fmt dst) = pprFormatAddr (sLit "gst") fmt dst -pprX87Instr _ = panic "X86.Ppr.pprX87Instr: no match" - -pprDollImm :: Imm -> SDoc -pprDollImm i = text "$" <> pprImm i - - -pprOperand :: Format -> Operand -> SDoc -pprOperand f (OpReg r) = pprReg f r -pprOperand _ (OpImm i) = pprDollImm i -pprOperand _ (OpAddr ea) = pprAddr ea - - -pprMnemonic_ :: PtrString -> SDoc -pprMnemonic_ name = - char '\t' <> ptext name <> space - - -pprMnemonic :: PtrString -> Format -> SDoc -pprMnemonic name format = - char '\t' <> ptext name <> pprFormat format <> space - - -pprFormatImmOp :: PtrString -> Format -> Imm -> Operand -> SDoc -pprFormatImmOp name format imm op1 - = hcat [ - pprMnemonic name format, - char '$', - pprImm imm, - comma, - pprOperand format op1 - ] - - -pprFormatOp_ :: PtrString -> Format -> Operand -> SDoc -pprFormatOp_ name format op1 - = hcat [ - pprMnemonic_ name , - pprOperand format op1 - ] - -pprFormatOp :: PtrString -> Format -> Operand -> SDoc -pprFormatOp name format op1 - = hcat [ - pprMnemonic name format, - pprOperand format op1 - ] - - -pprFormatOpOp :: PtrString -> Format -> Operand -> Operand -> SDoc -pprFormatOpOp name format op1 op2 - = hcat [ - pprMnemonic name format, - pprOperand format op1, - comma, - pprOperand format op2 - ] - - -pprOpOp :: PtrString -> Format -> Operand -> Operand -> SDoc -pprOpOp name format op1 op2 - = hcat [ - pprMnemonic_ name, - pprOperand format op1, - comma, - pprOperand format op2 - ] - - - -pprRegReg :: PtrString -> Reg -> Reg -> SDoc -pprRegReg name reg1 reg2 - = sdocWithPlatform $ \platform -> - hcat [ - pprMnemonic_ name, - pprReg (archWordFormat (target32Bit platform)) reg1, - comma, - pprReg (archWordFormat (target32Bit platform)) reg2 - ] - - -pprFormatOpReg :: PtrString -> Format -> Operand -> Reg -> SDoc -pprFormatOpReg name format op1 reg2 - = sdocWithPlatform $ \platform -> - hcat [ - pprMnemonic name format, - pprOperand format op1, - comma, - pprReg (archWordFormat (target32Bit platform)) reg2 - ] - -pprCondOpReg :: PtrString -> Format -> Cond -> Operand -> Reg -> SDoc -pprCondOpReg name format cond op1 reg2 - = hcat [ - char '\t', - ptext name, - pprCond cond, - space, - pprOperand format op1, - comma, - pprReg format reg2 - ] - -pprFormatFormatOpReg :: PtrString -> Format -> Format -> Operand -> Reg -> SDoc -pprFormatFormatOpReg name format1 format2 op1 reg2 - = hcat [ - pprMnemonic name format2, - pprOperand format1 op1, - comma, - pprReg format2 reg2 - ] - -pprFormatOpOpReg :: PtrString -> Format -> Operand -> Operand -> Reg -> SDoc -pprFormatOpOpReg name format op1 op2 reg3 - = hcat [ - pprMnemonic name format, - pprOperand format op1, - comma, - pprOperand format op2, - comma, - pprReg format reg3 - ] - - - -pprFormatAddr :: PtrString -> Format -> AddrMode -> SDoc -pprFormatAddr name format op - = hcat [ - pprMnemonic name format, - comma, - pprAddr op - ] - -pprShift :: PtrString -> Format -> Operand -> Operand -> SDoc -pprShift name format src dest - = hcat [ - pprMnemonic name format, - pprOperand II8 src, -- src is 8-bit sized - comma, - pprOperand format dest - ] - - -pprFormatOpOpCoerce :: PtrString -> Format -> Format -> Operand -> Operand -> SDoc -pprFormatOpOpCoerce name format1 format2 op1 op2 - = hcat [ char '\t', ptext name, pprFormat format1, pprFormat format2, space, - pprOperand format1 op1, - comma, - pprOperand format2 op2 - ] - - -pprCondInstr :: PtrString -> Cond -> SDoc -> SDoc -pprCondInstr name cond arg - = hcat [ char '\t', ptext name, pprCond cond, space, arg] diff --git a/compiler/nativeGen/X86/RegInfo.hs b/compiler/nativeGen/X86/RegInfo.hs deleted file mode 100644 index eb5e9bc7fc..0000000000 --- a/compiler/nativeGen/X86/RegInfo.hs +++ /dev/null @@ -1,73 +0,0 @@ -{-# LANGUAGE CPP #-} -module X86.RegInfo ( - mkVirtualReg, - regDotColor -) - -where - -#include "HsVersions.h" - -import GhcPrelude - -import Format -import Reg - -import Outputable -import GHC.Platform -import Unique - -import UniqFM -import X86.Regs - - -mkVirtualReg :: Unique -> Format -> VirtualReg -mkVirtualReg u format - = case format of - FF32 -> VirtualRegD u - -- for scalar F32, we use the same xmm as F64! - -- this is a hack that needs some improvement. - -- For now we map both to being allocated as "Double" Registers - -- on X86/X86_64 - FF64 -> VirtualRegD u - _other -> VirtualRegI u - -regDotColor :: Platform -> RealReg -> SDoc -regDotColor platform reg - = case (lookupUFM (regColors platform) reg) of - Just str -> text str - _ -> panic "Register not assigned a color" - -regColors :: Platform -> UniqFM [Char] -regColors platform = listToUFM (normalRegColors platform) - -normalRegColors :: Platform -> [(Reg,String)] -normalRegColors platform = - zip (map regSingle [0..lastint platform]) colors - ++ zip (map regSingle [firstxmm..lastxmm platform]) greys - where - -- 16 colors - enough for amd64 gp regs - colors = ["#800000","#ff0000","#808000","#ffff00","#008000" - ,"#00ff00","#008080","#00ffff","#000080","#0000ff" - ,"#800080","#ff00ff","#87005f","#875f00","#87af00" - ,"#ff00af"] - - -- 16 shades of grey, enough for the currently supported - -- SSE extensions. - greys = ["#0e0e0e","#1c1c1c","#2a2a2a","#383838","#464646" - ,"#545454","#626262","#707070","#7e7e7e","#8c8c8c" - ,"#9a9a9a","#a8a8a8","#b6b6b6","#c4c4c4","#d2d2d2" - ,"#e0e0e0"] - - - --- 32 shades of grey - use for avx 512 if we ever need it --- greys = ["#070707","#0e0e0e","#151515","#1c1c1c" --- ,"#232323","#2a2a2a","#313131","#383838","#3f3f3f" --- ,"#464646","#4d4d4d","#545454","#5b5b5b","#626262" --- ,"#696969","#707070","#777777","#7e7e7e","#858585" --- ,"#8c8c8c","#939393","#9a9a9a","#a1a1a1","#a8a8a8" --- ,"#afafaf","#b6b6b6","#bdbdbd","#c4c4c4","#cbcbcb" --- ,"#d2d2d2","#d9d9d9","#e0e0e0"] - - diff --git a/compiler/nativeGen/X86/Regs.hs b/compiler/nativeGen/X86/Regs.hs deleted file mode 100644 index 4c8943a284..0000000000 --- a/compiler/nativeGen/X86/Regs.hs +++ /dev/null @@ -1,442 +0,0 @@ -{-# LANGUAGE CPP #-} - -module X86.Regs ( - -- squeese functions for the graph allocator - virtualRegSqueeze, - realRegSqueeze, - - -- immediates - Imm(..), - strImmLit, - litToImm, - - -- addressing modes - AddrMode(..), - addrOffset, - - -- registers - spRel, - argRegs, - allArgRegs, - allIntArgRegs, - callClobberedRegs, - instrClobberedRegs, - allMachRegNos, - classOfRealReg, - showReg, - - -- machine specific - EABase(..), EAIndex(..), addrModeRegs, - - eax, ebx, ecx, edx, esi, edi, ebp, esp, - - - rax, rbx, rcx, rdx, rsi, rdi, rbp, rsp, - r8, r9, r10, r11, r12, r13, r14, r15, - lastint, - xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, - xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, - xmm, - firstxmm, lastxmm, - - ripRel, - allFPArgRegs, - - allocatableRegs -) - -where - -#include "HsVersions.h" - -import GhcPrelude - -import GHC.Platform.Regs -import Reg -import RegClass - -import GHC.Cmm -import GHC.Cmm.CLabel ( CLabel ) -import GHC.Driver.Session -import Outputable -import GHC.Platform - -import qualified Data.Array as A - --- | regSqueeze_class reg --- Calculate the maximum number of register colors that could be --- denied to a node of this class due to having this reg --- as a neighbour. --- -{-# INLINE virtualRegSqueeze #-} -virtualRegSqueeze :: RegClass -> VirtualReg -> Int - -virtualRegSqueeze cls vr - = case cls of - RcInteger - -> case vr of - VirtualRegI{} -> 1 - VirtualRegHi{} -> 1 - _other -> 0 - - RcDouble - -> case vr of - VirtualRegD{} -> 1 - VirtualRegF{} -> 0 - _other -> 0 - - - _other -> 0 - -{-# INLINE realRegSqueeze #-} -realRegSqueeze :: RegClass -> RealReg -> Int -realRegSqueeze cls rr - = case cls of - RcInteger - -> case rr of - RealRegSingle regNo - | regNo < firstxmm -> 1 - | otherwise -> 0 - - RealRegPair{} -> 0 - - RcDouble - -> case rr of - RealRegSingle regNo - | regNo >= firstxmm -> 1 - | otherwise -> 0 - - RealRegPair{} -> 0 - - - _other -> 0 - --- ----------------------------------------------------------------------------- --- Immediates - -data Imm - = ImmInt Int - | ImmInteger Integer -- Sigh. - | ImmCLbl CLabel -- AbstractC Label (with baggage) - | ImmLit SDoc -- Simple string - | ImmIndex CLabel Int - | ImmFloat Rational - | ImmDouble Rational - | ImmConstantSum Imm Imm - | ImmConstantDiff Imm Imm - -strImmLit :: String -> Imm -strImmLit s = ImmLit (text s) - - -litToImm :: CmmLit -> Imm -litToImm (CmmInt i w) = ImmInteger (narrowS w i) - -- narrow to the width: a CmmInt might be out of - -- range, but we assume that ImmInteger only contains - -- in-range values. A signed value should be fine here. -litToImm (CmmFloat f W32) = ImmFloat f -litToImm (CmmFloat f W64) = ImmDouble f -litToImm (CmmLabel l) = ImmCLbl l -litToImm (CmmLabelOff l off) = ImmIndex l off -litToImm (CmmLabelDiffOff l1 l2 off _) - = ImmConstantSum - (ImmConstantDiff (ImmCLbl l1) (ImmCLbl l2)) - (ImmInt off) -litToImm _ = panic "X86.Regs.litToImm: no match" - --- addressing modes ------------------------------------------------------------ - -data AddrMode - = AddrBaseIndex EABase EAIndex Displacement - | ImmAddr Imm Int - -data EABase = EABaseNone | EABaseReg Reg | EABaseRip -data EAIndex = EAIndexNone | EAIndex Reg Int -type Displacement = Imm - - -addrOffset :: AddrMode -> Int -> Maybe AddrMode -addrOffset addr off - = case addr of - ImmAddr i off0 -> Just (ImmAddr i (off0 + off)) - - AddrBaseIndex r i (ImmInt n) -> Just (AddrBaseIndex r i (ImmInt (n + off))) - AddrBaseIndex r i (ImmInteger n) - -> Just (AddrBaseIndex r i (ImmInt (fromInteger (n + toInteger off)))) - - AddrBaseIndex r i (ImmCLbl lbl) - -> Just (AddrBaseIndex r i (ImmIndex lbl off)) - - AddrBaseIndex r i (ImmIndex lbl ix) - -> Just (AddrBaseIndex r i (ImmIndex lbl (ix+off))) - - _ -> Nothing -- in theory, shouldn't happen - - -addrModeRegs :: AddrMode -> [Reg] -addrModeRegs (AddrBaseIndex b i _) = b_regs ++ i_regs - where - b_regs = case b of { EABaseReg r -> [r]; _ -> [] } - i_regs = case i of { EAIndex r _ -> [r]; _ -> [] } -addrModeRegs _ = [] - - --- registers ------------------------------------------------------------------- - --- @spRel@ gives us a stack relative addressing mode for volatile --- temporaries and for excess call arguments. @fpRel@, where --- applicable, is the same but for the frame pointer. - - -spRel :: DynFlags - -> Int -- ^ desired stack offset in bytes, positive or negative - -> AddrMode -spRel dflags n - | target32Bit (targetPlatform dflags) - = AddrBaseIndex (EABaseReg esp) EAIndexNone (ImmInt n) - | otherwise - = AddrBaseIndex (EABaseReg rsp) EAIndexNone (ImmInt n) - --- The register numbers must fit into 32 bits on x86, so that we can --- use a Word32 to represent the set of free registers in the register --- allocator. - - - -firstxmm :: RegNo -firstxmm = 16 - --- on 32bit platformOSs, only the first 8 XMM/YMM/ZMM registers are available -lastxmm :: Platform -> RegNo -lastxmm platform - | target32Bit platform = firstxmm + 7 -- xmm0 - xmmm7 - | otherwise = firstxmm + 15 -- xmm0 -xmm15 - -lastint :: Platform -> RegNo -lastint platform - | target32Bit platform = 7 -- not %r8..%r15 - | otherwise = 15 - -intregnos :: Platform -> [RegNo] -intregnos platform = [0 .. lastint platform] - - - -xmmregnos :: Platform -> [RegNo] -xmmregnos platform = [firstxmm .. lastxmm platform] - -floatregnos :: Platform -> [RegNo] -floatregnos platform = xmmregnos platform - --- argRegs is the set of regs which are read for an n-argument call to C. --- For archs which pass all args on the stack (x86), is empty. --- Sparc passes up to the first 6 args in regs. -argRegs :: RegNo -> [Reg] -argRegs _ = panic "MachRegs.argRegs(x86): should not be used!" - --- | The complete set of machine registers. -allMachRegNos :: Platform -> [RegNo] -allMachRegNos platform = intregnos platform ++ floatregnos platform - --- | Take the class of a register. -{-# INLINE classOfRealReg #-} -classOfRealReg :: Platform -> RealReg -> RegClass --- On x86, we might want to have an 8-bit RegClass, which would --- contain just regs 1-4 (the others don't have 8-bit versions). --- However, we can get away without this at the moment because the --- only allocatable integer regs are also 8-bit compatible (1, 3, 4). -classOfRealReg platform reg - = case reg of - RealRegSingle i - | i <= lastint platform -> RcInteger - | i <= lastxmm platform -> RcDouble - | otherwise -> panic "X86.Reg.classOfRealReg registerSingle too high" - _ -> panic "X86.Regs.classOfRealReg: RegPairs on this arch" - --- | Get the name of the register with this number. --- NOTE: fixme, we dont track which "way" the XMM registers are used -showReg :: Platform -> RegNo -> String -showReg platform n - | n >= firstxmm && n <= lastxmm platform = "%xmm" ++ show (n-firstxmm) - | n >= 8 && n < firstxmm = "%r" ++ show n - | otherwise = regNames platform A.! n - -regNames :: Platform -> A.Array Int String -regNames platform - = if target32Bit platform - then A.listArray (0,8) ["%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi", "%ebp", "%esp"] - else A.listArray (0,8) ["%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi", "%rbp", "%rsp"] - - - --- machine specific ------------------------------------------------------------ - - -{- -Intel x86 architecture: -- All registers except 7 (esp) are available for use. -- Only ebx, esi, edi and esp are available across a C call (they are callee-saves). -- Registers 0-7 have 16-bit counterparts (ax, bx etc.) -- Registers 0-3 have 8 bit counterparts (ah, bh etc.) - -The fp registers are all Double registers; we don't have any RcFloat class -regs. @regClass@ barfs if you give it a VirtualRegF, and mkVReg above should -never generate them. - -TODO: cleanup modelling float vs double registers and how they are the same class. --} - - -eax, ebx, ecx, edx, esp, ebp, esi, edi :: Reg - -eax = regSingle 0 -ebx = regSingle 1 -ecx = regSingle 2 -edx = regSingle 3 -esi = regSingle 4 -edi = regSingle 5 -ebp = regSingle 6 -esp = regSingle 7 - - - - -{- -AMD x86_64 architecture: -- All 16 integer registers are addressable as 8, 16, 32 and 64-bit values: - - 8 16 32 64 - --------------------- - al ax eax rax - bl bx ebx rbx - cl cx ecx rcx - dl dx edx rdx - sil si esi rsi - dil si edi rdi - bpl bp ebp rbp - spl sp esp rsp - r10b r10w r10d r10 - r11b r11w r11d r11 - r12b r12w r12d r12 - r13b r13w r13d r13 - r14b r14w r14d r14 - r15b r15w r15d r15 --} - -rax, rbx, rcx, rdx, rsp, rbp, rsi, rdi, - r8, r9, r10, r11, r12, r13, r14, r15, - xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, - xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15 :: Reg - -rax = regSingle 0 -rbx = regSingle 1 -rcx = regSingle 2 -rdx = regSingle 3 -rsi = regSingle 4 -rdi = regSingle 5 -rbp = regSingle 6 -rsp = regSingle 7 -r8 = regSingle 8 -r9 = regSingle 9 -r10 = regSingle 10 -r11 = regSingle 11 -r12 = regSingle 12 -r13 = regSingle 13 -r14 = regSingle 14 -r15 = regSingle 15 -xmm0 = regSingle 16 -xmm1 = regSingle 17 -xmm2 = regSingle 18 -xmm3 = regSingle 19 -xmm4 = regSingle 20 -xmm5 = regSingle 21 -xmm6 = regSingle 22 -xmm7 = regSingle 23 -xmm8 = regSingle 24 -xmm9 = regSingle 25 -xmm10 = regSingle 26 -xmm11 = regSingle 27 -xmm12 = regSingle 28 -xmm13 = regSingle 29 -xmm14 = regSingle 30 -xmm15 = regSingle 31 - -ripRel :: Displacement -> AddrMode -ripRel imm = AddrBaseIndex EABaseRip EAIndexNone imm - - - -- so we can re-use some x86 code: -{- -eax = rax -ebx = rbx -ecx = rcx -edx = rdx -esi = rsi -edi = rdi -ebp = rbp -esp = rsp --} - -xmm :: RegNo -> Reg -xmm n = regSingle (firstxmm+n) - - - - --- | these are the regs which we cannot assume stay alive over a C call. -callClobberedRegs :: Platform -> [Reg] --- caller-saves registers -callClobberedRegs platform - | target32Bit platform = [eax,ecx,edx] ++ map regSingle (floatregnos platform) - | platformOS platform == OSMinGW32 - = [rax,rcx,rdx,r8,r9,r10,r11] - -- Only xmm0-5 are caller-saves registers on 64bit windows. - -- ( https://docs.microsoft.com/en-us/cpp/build/register-usage ) - -- For details check the Win64 ABI. - ++ map xmm [0 .. 5] - | otherwise - -- all xmm regs are caller-saves - -- caller-saves registers - = [rax,rcx,rdx,rsi,rdi,r8,r9,r10,r11] - ++ map regSingle (floatregnos platform) - -allArgRegs :: Platform -> [(Reg, Reg)] -allArgRegs platform - | platformOS platform == OSMinGW32 = zip [rcx,rdx,r8,r9] - (map regSingle [firstxmm ..]) - | otherwise = panic "X86.Regs.allArgRegs: not defined for this arch" - -allIntArgRegs :: Platform -> [Reg] -allIntArgRegs platform - | (platformOS platform == OSMinGW32) || target32Bit platform - = panic "X86.Regs.allIntArgRegs: not defined for this platform" - | otherwise = [rdi,rsi,rdx,rcx,r8,r9] - - --- | on 64bit platforms we pass the first 8 float/double arguments --- in the xmm registers. -allFPArgRegs :: Platform -> [Reg] -allFPArgRegs platform - | platformOS platform == OSMinGW32 - = panic "X86.Regs.allFPArgRegs: not defined for this platform" - | otherwise = map regSingle [firstxmm .. firstxmm + 7 ] - - --- Machine registers which might be clobbered by instructions that --- generate results into fixed registers, or need arguments in a fixed --- register. -instrClobberedRegs :: Platform -> [Reg] -instrClobberedRegs platform - | target32Bit platform = [ eax, ecx, edx ] - | otherwise = [ rax, rcx, rdx ] - --- - --- allocatableRegs is allMachRegNos with the fixed-use regs removed. --- i.e., these are the regs for which we are prepared to allow the --- register allocator to attempt to map VRegs to. -allocatableRegs :: Platform -> [RealReg] -allocatableRegs platform - = let isFree i = freeReg platform i - in map RealRegSingle $ filter isFree (allMachRegNos platform) - |