summaryrefslogtreecommitdiff
path: root/compiler/nativeGen
diff options
context:
space:
mode:
authorSylvain Henry <sylvain@haskus.fr>2020-02-22 15:05:20 +0100
committerMarge Bot <ben+marge-bot@smart-cactus.org>2020-02-24 20:55:25 -0500
commit1b1067d14b656bbbfa7c47f156ec2700c9751549 (patch)
tree32346e3c4c3f89117190b36364144d85dc260e05 /compiler/nativeGen
parent354e2787be08fb6d973de1a39e58080ff8e107f8 (diff)
downloadhaskell-1b1067d14b656bbbfa7c47f156ec2700c9751549.tar.gz
Modules: CmmToAsm (#13009)
Diffstat (limited to 'compiler/nativeGen')
-rw-r--r--compiler/nativeGen/AsmCodeGen.hs1236
-rw-r--r--compiler/nativeGen/BlockLayout.hs895
-rw-r--r--compiler/nativeGen/CFG.hs1320
-rw-r--r--compiler/nativeGen/CPrim.hs133
-rw-r--r--compiler/nativeGen/Dwarf.hs269
-rw-r--r--compiler/nativeGen/Dwarf/Constants.hs229
-rw-r--r--compiler/nativeGen/Dwarf/Types.hs612
-rw-r--r--compiler/nativeGen/Format.hs105
-rw-r--r--compiler/nativeGen/Instruction.hs202
-rw-r--r--compiler/nativeGen/NCGMonad.hs294
-rw-r--r--compiler/nativeGen/NOTES41
-rw-r--r--compiler/nativeGen/PIC.hs838
-rw-r--r--compiler/nativeGen/PPC/CodeGen.hs2453
-rw-r--r--compiler/nativeGen/PPC/Cond.hs63
-rw-r--r--compiler/nativeGen/PPC/Instr.hs713
-rw-r--r--compiler/nativeGen/PPC/Ppr.hs994
-rw-r--r--compiler/nativeGen/PPC/RegInfo.hs80
-rw-r--r--compiler/nativeGen/PPC/Regs.hs333
-rw-r--r--compiler/nativeGen/PprBase.hs275
-rw-r--r--compiler/nativeGen/Reg.hs241
-rw-r--r--compiler/nativeGen/RegAlloc/Graph/ArchBase.hs163
-rw-r--r--compiler/nativeGen/RegAlloc/Graph/ArchX86.hs161
-rw-r--r--compiler/nativeGen/RegAlloc/Graph/Coalesce.hs99
-rw-r--r--compiler/nativeGen/RegAlloc/Graph/Main.hs472
-rw-r--r--compiler/nativeGen/RegAlloc/Graph/Spill.hs382
-rw-r--r--compiler/nativeGen/RegAlloc/Graph/SpillClean.hs616
-rw-r--r--compiler/nativeGen/RegAlloc/Graph/SpillCost.hs317
-rw-r--r--compiler/nativeGen/RegAlloc/Graph/Stats.hs346
-rw-r--r--compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs274
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/Base.hs141
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs89
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs378
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/Main.hs920
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs61
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs189
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/StackMap.hs61
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/State.hs184
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/Stats.hs87
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs53
-rw-r--r--compiler/nativeGen/RegAlloc/Linear/X86_64/FreeRegs.hs54
-rw-r--r--compiler/nativeGen/RegAlloc/Liveness.hs1025
-rw-r--r--compiler/nativeGen/RegClass.hs32
-rw-r--r--compiler/nativeGen/SPARC/AddrMode.hs44
-rw-r--r--compiler/nativeGen/SPARC/Base.hs77
-rw-r--r--compiler/nativeGen/SPARC/CodeGen.hs700
-rw-r--r--compiler/nativeGen/SPARC/CodeGen/Amode.hs74
-rw-r--r--compiler/nativeGen/SPARC/CodeGen/Base.hs119
-rw-r--r--compiler/nativeGen/SPARC/CodeGen/CondCode.hs110
-rw-r--r--compiler/nativeGen/SPARC/CodeGen/Expand.hs156
-rw-r--r--compiler/nativeGen/SPARC/CodeGen/Gen32.hs692
-rw-r--r--compiler/nativeGen/SPARC/CodeGen/Gen32.hs-boot16
-rw-r--r--compiler/nativeGen/SPARC/CodeGen/Gen64.hs216
-rw-r--r--compiler/nativeGen/SPARC/CodeGen/Sanity.hs69
-rw-r--r--compiler/nativeGen/SPARC/Cond.hs54
-rw-r--r--compiler/nativeGen/SPARC/Imm.hs67
-rw-r--r--compiler/nativeGen/SPARC/Instr.hs481
-rw-r--r--compiler/nativeGen/SPARC/Ppr.hs645
-rw-r--r--compiler/nativeGen/SPARC/Regs.hs259
-rw-r--r--compiler/nativeGen/SPARC/ShortcutJump.hs74
-rw-r--r--compiler/nativeGen/SPARC/Stack.hs59
-rw-r--r--compiler/nativeGen/TargetReg.hs137
-rw-r--r--compiler/nativeGen/X86/CodeGen.hs3743
-rw-r--r--compiler/nativeGen/X86/Cond.hs109
-rw-r--r--compiler/nativeGen/X86/Instr.hs1054
-rw-r--r--compiler/nativeGen/X86/Ppr.hs1014
-rw-r--r--compiler/nativeGen/X86/RegInfo.hs73
-rw-r--r--compiler/nativeGen/X86/Regs.hs442
67 files changed, 0 insertions, 27914 deletions
diff --git a/compiler/nativeGen/AsmCodeGen.hs b/compiler/nativeGen/AsmCodeGen.hs
deleted file mode 100644
index 9aa6933757..0000000000
--- a/compiler/nativeGen/AsmCodeGen.hs
+++ /dev/null
@@ -1,1236 +0,0 @@
--- -----------------------------------------------------------------------------
---
--- (c) The University of Glasgow 1993-2004
---
--- This is the top-level module in the native code generator.
---
--- -----------------------------------------------------------------------------
-
-{-# LANGUAGE BangPatterns, CPP, GADTs, ScopedTypeVariables, PatternSynonyms,
- DeriveFunctor #-}
-
-#if !defined(GHC_LOADED_INTO_GHCI)
-{-# LANGUAGE UnboxedTuples #-}
-#endif
-
-{-# OPTIONS_GHC -Wno-incomplete-record-updates #-}
-
-module AsmCodeGen (
- -- * Module entry point
- nativeCodeGen
-
- -- * Test-only exports: see trac #12744
- -- used by testGraphNoSpills, which needs to access
- -- the register allocator intermediate data structures
- -- cmmNativeGen emits
- , cmmNativeGen
- , NcgImpl(..)
- , x86NcgImpl
- ) where
-
-#include "HsVersions.h"
-
-import GhcPrelude
-
-import qualified X86.CodeGen
-import qualified X86.Regs
-import qualified X86.Instr
-import qualified X86.Ppr
-
-import qualified SPARC.CodeGen
-import qualified SPARC.Regs
-import qualified SPARC.Instr
-import qualified SPARC.Ppr
-import qualified SPARC.ShortcutJump
-import qualified SPARC.CodeGen.Expand
-
-import qualified PPC.CodeGen
-import qualified PPC.Regs
-import qualified PPC.RegInfo
-import qualified PPC.Instr
-import qualified PPC.Ppr
-
-import RegAlloc.Liveness
-import qualified RegAlloc.Linear.Main as Linear
-
-import qualified GraphColor as Color
-import qualified RegAlloc.Graph.Main as Color
-import qualified RegAlloc.Graph.Stats as Color
-import qualified RegAlloc.Graph.TrivColorable as Color
-
-import AsmUtils
-import TargetReg
-import GHC.Platform
-import BlockLayout
-import Config
-import Instruction
-import PIC
-import Reg
-import NCGMonad
-import CFG
-import Dwarf
-import GHC.Cmm.DebugBlock
-
-import GHC.Cmm.BlockId
-import GHC.StgToCmm.CgUtils ( fixStgRegisters )
-import GHC.Cmm
-import GHC.Cmm.Utils
-import GHC.Cmm.Dataflow.Collections
-import GHC.Cmm.Dataflow.Label
-import GHC.Cmm.Dataflow.Block
-import GHC.Cmm.Opt ( cmmMachOpFold )
-import GHC.Cmm.Ppr
-import GHC.Cmm.CLabel
-
-import UniqFM
-import UniqSupply
-import GHC.Driver.Session
-import Util
-
-import BasicTypes ( Alignment )
-import qualified Pretty
-import BufWrite
-import Outputable
-import FastString
-import UniqSet
-import ErrUtils
-import Module
-import Stream (Stream)
-import qualified Stream
-
--- DEBUGGING ONLY
---import OrdList
-
-import Data.List
-import Data.Maybe
-import Data.Ord ( comparing )
-import Control.Exception
-import Control.Monad
-import System.IO
-
-{-
-The native-code generator has machine-independent and
-machine-dependent modules.
-
-This module ("AsmCodeGen") is the top-level machine-independent
-module. Before entering machine-dependent land, we do some
-machine-independent optimisations (defined below) on the
-'CmmStmts's.
-
-We convert to the machine-specific 'Instr' datatype with
-'cmmCodeGen', assuming an infinite supply of registers. We then use
-a machine-independent register allocator ('regAlloc') to rejoin
-reality. Obviously, 'regAlloc' has machine-specific helper
-functions (see about "RegAllocInfo" below).
-
-Finally, we order the basic blocks of the function so as to minimise
-the number of jumps between blocks, by utilising fallthrough wherever
-possible.
-
-The machine-dependent bits break down as follows:
-
- * ["MachRegs"] Everything about the target platform's machine
- registers (and immediate operands, and addresses, which tend to
- intermingle/interact with registers).
-
- * ["MachInstrs"] Includes the 'Instr' datatype (possibly should
- have a module of its own), plus a miscellany of other things
- (e.g., 'targetDoubleSize', 'smStablePtrTable', ...)
-
- * ["MachCodeGen"] is where 'Cmm' stuff turns into
- machine instructions.
-
- * ["PprMach"] 'pprInstr' turns an 'Instr' into text (well, really
- a 'SDoc').
-
- * ["RegAllocInfo"] In the register allocator, we manipulate
- 'MRegsState's, which are 'BitSet's, one bit per machine register.
- When we want to say something about a specific machine register
- (e.g., ``it gets clobbered by this instruction''), we set/unset
- its bit. Obviously, we do this 'BitSet' thing for efficiency
- reasons.
-
- The 'RegAllocInfo' module collects together the machine-specific
- info needed to do register allocation.
-
- * ["RegisterAlloc"] The (machine-independent) register allocator.
--}
-
---------------------
-nativeCodeGen :: forall a . DynFlags -> Module -> ModLocation -> Handle -> UniqSupply
- -> Stream IO RawCmmGroup a
- -> IO a
-nativeCodeGen dflags this_mod modLoc h us cmms
- = let platform = targetPlatform dflags
- nCG' :: ( Outputable statics, Outputable instr
- , Outputable jumpDest, Instruction instr)
- => NcgImpl statics instr jumpDest -> IO a
- nCG' ncgImpl = nativeCodeGen' dflags this_mod modLoc ncgImpl h us cmms
- in case platformArch platform of
- ArchX86 -> nCG' (x86NcgImpl dflags)
- ArchX86_64 -> nCG' (x86_64NcgImpl dflags)
- ArchPPC -> nCG' (ppcNcgImpl dflags)
- ArchS390X -> panic "nativeCodeGen: No NCG for S390X"
- ArchSPARC -> nCG' (sparcNcgImpl dflags)
- ArchSPARC64 -> panic "nativeCodeGen: No NCG for SPARC64"
- ArchARM {} -> panic "nativeCodeGen: No NCG for ARM"
- ArchARM64 -> panic "nativeCodeGen: No NCG for ARM64"
- ArchPPC_64 _ -> nCG' (ppcNcgImpl dflags)
- ArchAlpha -> panic "nativeCodeGen: No NCG for Alpha"
- ArchMipseb -> panic "nativeCodeGen: No NCG for mipseb"
- ArchMipsel -> panic "nativeCodeGen: No NCG for mipsel"
- ArchUnknown -> panic "nativeCodeGen: No NCG for unknown arch"
- ArchJavaScript-> panic "nativeCodeGen: No NCG for JavaScript"
-
-x86NcgImpl :: DynFlags -> NcgImpl (Alignment, RawCmmStatics)
- X86.Instr.Instr X86.Instr.JumpDest
-x86NcgImpl dflags
- = (x86_64NcgImpl dflags)
-
-x86_64NcgImpl :: DynFlags -> NcgImpl (Alignment, RawCmmStatics)
- X86.Instr.Instr X86.Instr.JumpDest
-x86_64NcgImpl dflags
- = NcgImpl {
- cmmTopCodeGen = X86.CodeGen.cmmTopCodeGen
- ,generateJumpTableForInstr = X86.CodeGen.generateJumpTableForInstr dflags
- ,getJumpDestBlockId = X86.Instr.getJumpDestBlockId
- ,canShortcut = X86.Instr.canShortcut
- ,shortcutStatics = X86.Instr.shortcutStatics
- ,shortcutJump = X86.Instr.shortcutJump
- ,pprNatCmmDecl = X86.Ppr.pprNatCmmDecl
- ,maxSpillSlots = X86.Instr.maxSpillSlots dflags
- ,allocatableRegs = X86.Regs.allocatableRegs platform
- ,ncgAllocMoreStack = X86.Instr.allocMoreStack platform
- ,ncgExpandTop = id
- ,ncgMakeFarBranches = const id
- ,extractUnwindPoints = X86.CodeGen.extractUnwindPoints
- ,invertCondBranches = X86.CodeGen.invertCondBranches
- }
- where platform = targetPlatform dflags
-
-ppcNcgImpl :: DynFlags -> NcgImpl RawCmmStatics PPC.Instr.Instr PPC.RegInfo.JumpDest
-ppcNcgImpl dflags
- = NcgImpl {
- cmmTopCodeGen = PPC.CodeGen.cmmTopCodeGen
- ,generateJumpTableForInstr = PPC.CodeGen.generateJumpTableForInstr dflags
- ,getJumpDestBlockId = PPC.RegInfo.getJumpDestBlockId
- ,canShortcut = PPC.RegInfo.canShortcut
- ,shortcutStatics = PPC.RegInfo.shortcutStatics
- ,shortcutJump = PPC.RegInfo.shortcutJump
- ,pprNatCmmDecl = PPC.Ppr.pprNatCmmDecl
- ,maxSpillSlots = PPC.Instr.maxSpillSlots dflags
- ,allocatableRegs = PPC.Regs.allocatableRegs platform
- ,ncgAllocMoreStack = PPC.Instr.allocMoreStack platform
- ,ncgExpandTop = id
- ,ncgMakeFarBranches = PPC.Instr.makeFarBranches
- ,extractUnwindPoints = const []
- ,invertCondBranches = \_ _ -> id
- }
- where platform = targetPlatform dflags
-
-sparcNcgImpl :: DynFlags -> NcgImpl RawCmmStatics SPARC.Instr.Instr SPARC.ShortcutJump.JumpDest
-sparcNcgImpl dflags
- = NcgImpl {
- cmmTopCodeGen = SPARC.CodeGen.cmmTopCodeGen
- ,generateJumpTableForInstr = SPARC.CodeGen.generateJumpTableForInstr dflags
- ,getJumpDestBlockId = SPARC.ShortcutJump.getJumpDestBlockId
- ,canShortcut = SPARC.ShortcutJump.canShortcut
- ,shortcutStatics = SPARC.ShortcutJump.shortcutStatics
- ,shortcutJump = SPARC.ShortcutJump.shortcutJump
- ,pprNatCmmDecl = SPARC.Ppr.pprNatCmmDecl
- ,maxSpillSlots = SPARC.Instr.maxSpillSlots dflags
- ,allocatableRegs = SPARC.Regs.allocatableRegs
- ,ncgAllocMoreStack = noAllocMoreStack
- ,ncgExpandTop = map SPARC.CodeGen.Expand.expandTop
- ,ncgMakeFarBranches = const id
- ,extractUnwindPoints = const []
- ,invertCondBranches = \_ _ -> id
- }
-
---
--- Allocating more stack space for spilling is currently only
--- supported for the linear register allocator on x86/x86_64, the rest
--- default to the panic below. To support allocating extra stack on
--- more platforms provide a definition of ncgAllocMoreStack.
---
-noAllocMoreStack :: Int -> NatCmmDecl statics instr
- -> UniqSM (NatCmmDecl statics instr, [(BlockId,BlockId)])
-noAllocMoreStack amount _
- = panic $ "Register allocator: out of stack slots (need " ++ show amount ++ ")\n"
- ++ " If you are trying to compile SHA1.hs from the crypto library then this\n"
- ++ " is a known limitation in the linear allocator.\n"
- ++ "\n"
- ++ " Try enabling the graph colouring allocator with -fregs-graph instead."
- ++ " You can still file a bug report if you like.\n"
-
-
--- | Data accumulated during code generation. Mostly about statistics,
--- but also collects debug data for DWARF generation.
-data NativeGenAcc statics instr
- = NGS { ngs_imports :: ![[CLabel]]
- , ngs_natives :: ![[NatCmmDecl statics instr]]
- -- ^ Native code generated, for statistics. This might
- -- hold a lot of data, so it is important to clear this
- -- field as early as possible if it isn't actually
- -- required.
- , ngs_colorStats :: ![[Color.RegAllocStats statics instr]]
- , ngs_linearStats :: ![[Linear.RegAllocStats]]
- , ngs_labels :: ![Label]
- , ngs_debug :: ![DebugBlock]
- , ngs_dwarfFiles :: !DwarfFiles
- , ngs_unwinds :: !(LabelMap [UnwindPoint])
- -- ^ see Note [Unwinding information in the NCG]
- -- and Note [What is this unwinding business?] in Debug.
- }
-
-{-
-Note [Unwinding information in the NCG]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Unwind information is a type of metadata which allows a debugging tool
-to reconstruct the values of machine registers at the time a procedure was
-entered. For the most part, the production of unwind information is handled by
-the Cmm stage, where it is represented by CmmUnwind nodes.
-
-Unfortunately, the Cmm stage doesn't know everything necessary to produce
-accurate unwinding information. For instance, the x86-64 calling convention
-requires that the stack pointer be aligned to 16 bytes, which in turn means that
-GHC must sometimes add padding to $sp prior to performing a foreign call. When
-this happens unwind information must be updated accordingly.
-For this reason, we make the NCG backends responsible for producing
-unwinding tables (with the extractUnwindPoints function in NcgImpl).
-
-We accumulate the produced unwind tables over CmmGroups in the ngs_unwinds
-field of NativeGenAcc. This is a label map which contains an entry for each
-procedure, containing a list of unwinding points (e.g. a label and an associated
-unwinding table).
-
-See also Note [What is this unwinding business?] in Debug.
--}
-
-nativeCodeGen' :: (Outputable statics, Outputable instr,Outputable jumpDest,
- Instruction instr)
- => DynFlags
- -> Module -> ModLocation
- -> NcgImpl statics instr jumpDest
- -> Handle
- -> UniqSupply
- -> Stream IO RawCmmGroup a
- -> IO a
-nativeCodeGen' dflags this_mod modLoc ncgImpl h us cmms
- = do
- -- BufHandle is a performance hack. We could hide it inside
- -- Pretty if it weren't for the fact that we do lots of little
- -- printDocs here (in order to do codegen in constant space).
- bufh <- newBufHandle h
- let ngs0 = NGS [] [] [] [] [] [] emptyUFM mapEmpty
- (ngs, us', a) <- cmmNativeGenStream dflags this_mod modLoc ncgImpl bufh us
- cmms ngs0
- _ <- finishNativeGen dflags modLoc bufh us' ngs
- return a
-
-finishNativeGen :: Instruction instr
- => DynFlags
- -> ModLocation
- -> BufHandle
- -> UniqSupply
- -> NativeGenAcc statics instr
- -> IO UniqSupply
-finishNativeGen dflags modLoc bufh@(BufHandle _ _ h) us ngs
- = withTimingSilent dflags (text "NCG") (`seq` ()) $ do
- -- Write debug data and finish
- let emitDw = debugLevel dflags > 0
- us' <- if not emitDw then return us else do
- (dwarf, us') <- dwarfGen dflags modLoc us (ngs_debug ngs)
- emitNativeCode dflags bufh dwarf
- return us'
- bFlush bufh
-
- -- dump global NCG stats for graph coloring allocator
- let stats = concat (ngs_colorStats ngs)
- unless (null stats) $ do
-
- -- build the global register conflict graph
- let graphGlobal
- = foldl' Color.union Color.initGraph
- $ [ Color.raGraph stat
- | stat@Color.RegAllocStatsStart{} <- stats]
-
- dump_stats (Color.pprStats stats graphGlobal)
-
- let platform = targetPlatform dflags
- dumpIfSet_dyn dflags
- Opt_D_dump_asm_conflicts "Register conflict graph"
- FormatText
- $ Color.dotGraph
- (targetRegDotColor platform)
- (Color.trivColorable platform
- (targetVirtualRegSqueeze platform)
- (targetRealRegSqueeze platform))
- $ graphGlobal
-
-
- -- dump global NCG stats for linear allocator
- let linearStats = concat (ngs_linearStats ngs)
- unless (null linearStats) $
- dump_stats (Linear.pprStats (concat (ngs_natives ngs)) linearStats)
-
- -- write out the imports
- printSDocLn Pretty.LeftMode dflags h (mkCodeStyle AsmStyle)
- $ makeImportsDoc dflags (concat (ngs_imports ngs))
- return us'
- where
- dump_stats = dumpAction dflags (mkDumpStyle dflags alwaysQualify)
- (dumpOptionsFromFlag Opt_D_dump_asm_stats) "NCG stats"
- FormatText
-
-cmmNativeGenStream :: (Outputable statics, Outputable instr
- ,Outputable jumpDest, Instruction instr)
- => DynFlags
- -> Module -> ModLocation
- -> NcgImpl statics instr jumpDest
- -> BufHandle
- -> UniqSupply
- -> Stream IO RawCmmGroup a
- -> NativeGenAcc statics instr
- -> IO (NativeGenAcc statics instr, UniqSupply, a)
-
-cmmNativeGenStream dflags this_mod modLoc ncgImpl h us cmm_stream ngs
- = do r <- Stream.runStream cmm_stream
- case r of
- Left a ->
- return (ngs { ngs_imports = reverse $ ngs_imports ngs
- , ngs_natives = reverse $ ngs_natives ngs
- , ngs_colorStats = reverse $ ngs_colorStats ngs
- , ngs_linearStats = reverse $ ngs_linearStats ngs
- },
- us,
- a)
- Right (cmms, cmm_stream') -> do
- (us', ngs'') <-
- withTimingSilent
- dflags
- ncglabel (\(a, b) -> a `seq` b `seq` ()) $ do
- -- Generate debug information
- let debugFlag = debugLevel dflags > 0
- !ndbgs | debugFlag = cmmDebugGen modLoc cmms
- | otherwise = []
- dbgMap = debugToMap ndbgs
-
- -- Generate native code
- (ngs',us') <- cmmNativeGens dflags this_mod modLoc ncgImpl h
- dbgMap us cmms ngs 0
-
- -- Link native code information into debug blocks
- -- See Note [What is this unwinding business?] in Debug.
- let !ldbgs = cmmDebugLink (ngs_labels ngs') (ngs_unwinds ngs') ndbgs
- unless (null ldbgs) $
- dumpIfSet_dyn dflags Opt_D_dump_debug "Debug Infos" FormatText
- (vcat $ map ppr ldbgs)
-
- -- Accumulate debug information for emission in finishNativeGen.
- let ngs'' = ngs' { ngs_debug = ngs_debug ngs' ++ ldbgs, ngs_labels = [] }
- return (us', ngs'')
-
- cmmNativeGenStream dflags this_mod modLoc ncgImpl h us'
- cmm_stream' ngs''
-
- where ncglabel = text "NCG"
-
--- | Do native code generation on all these cmms.
---
-cmmNativeGens :: forall statics instr jumpDest.
- (Outputable statics, Outputable instr
- ,Outputable jumpDest, Instruction instr)
- => DynFlags
- -> Module -> ModLocation
- -> NcgImpl statics instr jumpDest
- -> BufHandle
- -> LabelMap DebugBlock
- -> UniqSupply
- -> [RawCmmDecl]
- -> NativeGenAcc statics instr
- -> Int
- -> IO (NativeGenAcc statics instr, UniqSupply)
-
-cmmNativeGens dflags this_mod modLoc ncgImpl h dbgMap = go
- where
- go :: UniqSupply -> [RawCmmDecl]
- -> NativeGenAcc statics instr -> Int
- -> IO (NativeGenAcc statics instr, UniqSupply)
-
- go us [] ngs !_ =
- return (ngs, us)
-
- go us (cmm : cmms) ngs count = do
- let fileIds = ngs_dwarfFiles ngs
- (us', fileIds', native, imports, colorStats, linearStats, unwinds)
- <- {-# SCC "cmmNativeGen" #-}
- cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap
- cmm count
-
- -- Generate .file directives for every new file that has been
- -- used. Note that it is important that we generate these in
- -- ascending order, as Clang's 3.6 assembler complains.
- let newFileIds = sortBy (comparing snd) $
- nonDetEltsUFM $ fileIds' `minusUFM` fileIds
- -- See Note [Unique Determinism and code generation]
- pprDecl (f,n) = text "\t.file " <> ppr n <+>
- pprFilePathString (unpackFS f)
-
- emitNativeCode dflags h $ vcat $
- map pprDecl newFileIds ++
- map (pprNatCmmDecl ncgImpl) native
-
- -- force evaluation all this stuff to avoid space leaks
- {-# SCC "seqString" #-} evaluate $ seqList (showSDoc dflags $ vcat $ map ppr imports) ()
-
- let !labels' = if debugLevel dflags > 0
- then cmmDebugLabels isMetaInstr native else []
- !natives' = if dopt Opt_D_dump_asm_stats dflags
- then native : ngs_natives ngs else []
-
- mCon = maybe id (:)
- ngs' = ngs{ ngs_imports = imports : ngs_imports ngs
- , ngs_natives = natives'
- , ngs_colorStats = colorStats `mCon` ngs_colorStats ngs
- , ngs_linearStats = linearStats `mCon` ngs_linearStats ngs
- , ngs_labels = ngs_labels ngs ++ labels'
- , ngs_dwarfFiles = fileIds'
- , ngs_unwinds = ngs_unwinds ngs `mapUnion` unwinds
- }
- go us' cmms ngs' (count + 1)
-
-
-emitNativeCode :: DynFlags -> BufHandle -> SDoc -> IO ()
-emitNativeCode dflags h sdoc = do
-
- {-# SCC "pprNativeCode" #-} bufLeftRenderSDoc dflags h
- (mkCodeStyle AsmStyle) sdoc
-
- -- dump native code
- dumpIfSet_dyn dflags
- Opt_D_dump_asm "Asm code" FormatASM
- sdoc
-
--- | Complete native code generation phase for a single top-level chunk of Cmm.
--- Dumping the output of each stage along the way.
--- Global conflict graph and NGC stats
-cmmNativeGen
- :: forall statics instr jumpDest. (Instruction instr,
- Outputable statics, Outputable instr, Outputable jumpDest)
- => DynFlags
- -> Module -> ModLocation
- -> NcgImpl statics instr jumpDest
- -> UniqSupply
- -> DwarfFiles
- -> LabelMap DebugBlock
- -> RawCmmDecl -- ^ the cmm to generate code for
- -> Int -- ^ sequence number of this top thing
- -> IO ( UniqSupply
- , DwarfFiles
- , [NatCmmDecl statics instr] -- native code
- , [CLabel] -- things imported by this cmm
- , Maybe [Color.RegAllocStats statics instr] -- stats for the coloring register allocator
- , Maybe [Linear.RegAllocStats] -- stats for the linear register allocators
- , LabelMap [UnwindPoint] -- unwinding information for blocks
- )
-
-cmmNativeGen dflags this_mod modLoc ncgImpl us fileIds dbgMap cmm count
- = do
- let platform = targetPlatform dflags
-
- let proc_name = case cmm of
- (CmmProc _ entry_label _ _) -> ppr entry_label
- _ -> text "DataChunk"
-
- -- rewrite assignments to global regs
- let fixed_cmm =
- {-# SCC "fixStgRegisters" #-}
- fixStgRegisters dflags cmm
-
- -- cmm to cmm optimisations
- let (opt_cmm, imports) =
- {-# SCC "cmmToCmm" #-}
- cmmToCmm dflags this_mod fixed_cmm
-
- dumpIfSet_dyn dflags
- Opt_D_dump_opt_cmm "Optimised Cmm" FormatCMM
- (pprCmmGroup [opt_cmm])
-
- let cmmCfg = {-# SCC "getCFG" #-}
- getCfgProc (cfgWeightInfo dflags) opt_cmm
-
- -- generate native code from cmm
- let ((native, lastMinuteImports, fileIds', nativeCfgWeights), usGen) =
- {-# SCC "genMachCode" #-}
- initUs us $ genMachCode dflags this_mod modLoc
- (cmmTopCodeGen ncgImpl)
- fileIds dbgMap opt_cmm cmmCfg
-
- dumpIfSet_dyn dflags
- Opt_D_dump_asm_native "Native code" FormatASM
- (vcat $ map (pprNatCmmDecl ncgImpl) native)
-
- maybeDumpCfg dflags (Just nativeCfgWeights) "CFG Weights - Native" proc_name
-
- -- tag instructions with register liveness information
- -- also drops dead code. We don't keep the cfg in sync on
- -- some backends, so don't use it there.
- let livenessCfg = if (backendMaintainsCfg dflags)
- then Just nativeCfgWeights
- else Nothing
- let (withLiveness, usLive) =
- {-# SCC "regLiveness" #-}
- initUs usGen
- $ mapM (cmmTopLiveness livenessCfg platform) native
-
- dumpIfSet_dyn dflags
- Opt_D_dump_asm_liveness "Liveness annotations added"
- FormatCMM
- (vcat $ map ppr withLiveness)
-
- -- allocate registers
- (alloced, usAlloc, ppr_raStatsColor, ppr_raStatsLinear, raStats, stack_updt_blks) <-
- if ( gopt Opt_RegsGraph dflags
- || gopt Opt_RegsIterative dflags )
- then do
- -- the regs usable for allocation
- let (alloc_regs :: UniqFM (UniqSet RealReg))
- = foldr (\r -> plusUFM_C unionUniqSets
- $ unitUFM (targetClassOfRealReg platform r) (unitUniqSet r))
- emptyUFM
- $ allocatableRegs ncgImpl
-
- -- do the graph coloring register allocation
- let ((alloced, maybe_more_stack, regAllocStats), usAlloc)
- = {-# SCC "RegAlloc-color" #-}
- initUs usLive
- $ Color.regAlloc
- dflags
- alloc_regs
- (mkUniqSet [0 .. maxSpillSlots ncgImpl])
- (maxSpillSlots ncgImpl)
- withLiveness
- livenessCfg
-
- let ((alloced', stack_updt_blks), usAlloc')
- = initUs usAlloc $
- case maybe_more_stack of
- Nothing -> return (alloced, [])
- Just amount -> do
- (alloced',stack_updt_blks) <- unzip <$>
- (mapM ((ncgAllocMoreStack ncgImpl) amount) alloced)
- return (alloced', concat stack_updt_blks )
-
-
- -- dump out what happened during register allocation
- dumpIfSet_dyn dflags
- Opt_D_dump_asm_regalloc "Registers allocated"
- FormatCMM
- (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
-
- dumpIfSet_dyn dflags
- Opt_D_dump_asm_regalloc_stages "Build/spill stages"
- FormatText
- (vcat $ map (\(stage, stats)
- -> text "# --------------------------"
- $$ text "# cmm " <> int count <> text " Stage " <> int stage
- $$ ppr stats)
- $ zip [0..] regAllocStats)
-
- let mPprStats =
- if dopt Opt_D_dump_asm_stats dflags
- then Just regAllocStats else Nothing
-
- -- force evaluation of the Maybe to avoid space leak
- mPprStats `seq` return ()
-
- return ( alloced', usAlloc'
- , mPprStats
- , Nothing
- , [], stack_updt_blks)
-
- else do
- -- do linear register allocation
- let reg_alloc proc = do
- (alloced, maybe_more_stack, ra_stats) <-
- Linear.regAlloc dflags proc
- case maybe_more_stack of
- Nothing -> return ( alloced, ra_stats, [] )
- Just amount -> do
- (alloced',stack_updt_blks) <-
- ncgAllocMoreStack ncgImpl amount alloced
- return (alloced', ra_stats, stack_updt_blks )
-
- let ((alloced, regAllocStats, stack_updt_blks), usAlloc)
- = {-# SCC "RegAlloc-linear" #-}
- initUs usLive
- $ liftM unzip3
- $ mapM reg_alloc withLiveness
-
- dumpIfSet_dyn dflags
- Opt_D_dump_asm_regalloc "Registers allocated"
- FormatCMM
- (vcat $ map (pprNatCmmDecl ncgImpl) alloced)
-
- let mPprStats =
- if dopt Opt_D_dump_asm_stats dflags
- then Just (catMaybes regAllocStats) else Nothing
-
- -- force evaluation of the Maybe to avoid space leak
- mPprStats `seq` return ()
-
- return ( alloced, usAlloc
- , Nothing
- , mPprStats, (catMaybes regAllocStats)
- , concat stack_updt_blks )
-
- -- Fixupblocks the register allocator inserted (from, regMoves, to)
- let cfgRegAllocUpdates :: [(BlockId,BlockId,BlockId)]
- cfgRegAllocUpdates = (concatMap Linear.ra_fixupList raStats)
-
- let cfgWithFixupBlks =
- (\cfg -> addNodesBetween cfg cfgRegAllocUpdates) <$> livenessCfg
-
- -- Insert stack update blocks
- let postRegCFG =
- pure (foldl' (\m (from,to) -> addImmediateSuccessor from to m ))
- <*> cfgWithFixupBlks
- <*> pure stack_updt_blks
-
- ---- generate jump tables
- let tabled =
- {-# SCC "generateJumpTables" #-}
- generateJumpTables ncgImpl alloced
-
- when (not $ null nativeCfgWeights) $ dumpIfSet_dyn dflags
- Opt_D_dump_cfg_weights "CFG Update information"
- FormatText
- ( text "stack:" <+> ppr stack_updt_blks $$
- text "linearAlloc:" <+> ppr cfgRegAllocUpdates )
-
- ---- shortcut branches
- let (shorted, postShortCFG) =
- {-# SCC "shortcutBranches" #-}
- shortcutBranches dflags ncgImpl tabled postRegCFG
-
- let optimizedCFG :: Maybe CFG
- optimizedCFG =
- optimizeCFG (cfgWeightInfo dflags) cmm <$!> postShortCFG
-
- maybeDumpCfg dflags optimizedCFG "CFG Weights - Final" proc_name
-
- --TODO: Partially check validity of the cfg.
- let getBlks (CmmProc _info _lbl _live (ListGraph blocks)) = blocks
- getBlks _ = []
-
- when ( backendMaintainsCfg dflags &&
- (gopt Opt_DoAsmLinting dflags || debugIsOn )) $ do
- let blocks = concatMap getBlks shorted
- let labels = setFromList $ fmap blockId blocks :: LabelSet
- let cfg = fromJust optimizedCFG
- return $! seq (sanityCheckCfg cfg labels $
- text "cfg not in lockstep") ()
-
- ---- sequence blocks
- let sequenced :: [NatCmmDecl statics instr]
- sequenced =
- checkLayout shorted $
- {-# SCC "sequenceBlocks" #-}
- map (BlockLayout.sequenceTop
- dflags
- ncgImpl optimizedCFG)
- shorted
-
- let branchOpt :: [NatCmmDecl statics instr]
- branchOpt =
- {-# SCC "invertCondBranches" #-}
- map invert sequenced
- where
- invertConds :: LabelMap RawCmmStatics -> [NatBasicBlock instr]
- -> [NatBasicBlock instr]
- invertConds = invertCondBranches ncgImpl optimizedCFG
- invert top@CmmData {} = top
- invert (CmmProc info lbl live (ListGraph blocks)) =
- CmmProc info lbl live (ListGraph $ invertConds info blocks)
-
- ---- expansion of SPARC synthetic instrs
- let expanded =
- {-# SCC "sparc_expand" #-}
- ncgExpandTop ncgImpl branchOpt
- --ncgExpandTop ncgImpl sequenced
-
- dumpIfSet_dyn dflags
- Opt_D_dump_asm_expanded "Synthetic instructions expanded"
- FormatCMM
- (vcat $ map (pprNatCmmDecl ncgImpl) expanded)
-
- -- generate unwinding information from cmm
- let unwinds :: BlockMap [UnwindPoint]
- unwinds =
- {-# SCC "unwindingInfo" #-}
- foldl' addUnwind mapEmpty expanded
- where
- addUnwind acc proc =
- acc `mapUnion` computeUnwinding dflags ncgImpl proc
-
- return ( usAlloc
- , fileIds'
- , expanded
- , lastMinuteImports ++ imports
- , ppr_raStatsColor
- , ppr_raStatsLinear
- , unwinds )
-
-maybeDumpCfg :: DynFlags -> Maybe CFG -> String -> SDoc -> IO ()
-maybeDumpCfg _dflags Nothing _ _ = return ()
-maybeDumpCfg dflags (Just cfg) msg proc_name
- | null cfg = return ()
- | otherwise
- = dumpIfSet_dyn
- dflags Opt_D_dump_cfg_weights msg
- FormatText
- (proc_name <> char ':' $$ pprEdgeWeights cfg)
-
--- | Make sure all blocks we want the layout algorithm to place have been placed.
-checkLayout :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr]
- -> [NatCmmDecl statics instr]
-checkLayout procsUnsequenced procsSequenced =
- ASSERT2(setNull diff,
- ppr "Block sequencing dropped blocks:" <> ppr diff)
- procsSequenced
- where
- blocks1 = foldl' (setUnion) setEmpty $
- map getBlockIds procsUnsequenced :: LabelSet
- blocks2 = foldl' (setUnion) setEmpty $
- map getBlockIds procsSequenced
- diff = setDifference blocks1 blocks2
-
- getBlockIds (CmmData _ _) = setEmpty
- getBlockIds (CmmProc _ _ _ (ListGraph blocks)) =
- setFromList $ map blockId blocks
-
--- | Compute unwinding tables for the blocks of a procedure
-computeUnwinding :: Instruction instr
- => DynFlags -> NcgImpl statics instr jumpDest
- -> NatCmmDecl statics instr
- -- ^ the native code generated for the procedure
- -> LabelMap [UnwindPoint]
- -- ^ unwinding tables for all points of all blocks of the
- -- procedure
-computeUnwinding dflags _ _
- | debugLevel dflags == 0 = mapEmpty
-computeUnwinding _ _ (CmmData _ _) = mapEmpty
-computeUnwinding _ ncgImpl (CmmProc _ _ _ (ListGraph blks)) =
- -- In general we would need to push unwinding information down the
- -- block-level call-graph to ensure that we fully account for all
- -- relevant register writes within a procedure.
- --
- -- However, the only unwinding information that we care about in GHC is for
- -- Sp. The fact that GHC.Cmm.LayoutStack already ensures that we have unwind
- -- information at the beginning of every block means that there is no need
- -- to perform this sort of push-down.
- mapFromList [ (blk_lbl, extractUnwindPoints ncgImpl instrs)
- | BasicBlock blk_lbl instrs <- blks ]
-
--- | Build a doc for all the imports.
---
-makeImportsDoc :: DynFlags -> [CLabel] -> SDoc
-makeImportsDoc dflags imports
- = dyld_stubs imports
- $$
- -- On recent versions of Darwin, the linker supports
- -- dead-stripping of code and data on a per-symbol basis.
- -- There's a hack to make this work in PprMach.pprNatCmmDecl.
- (if platformHasSubsectionsViaSymbols platform
- then text ".subsections_via_symbols"
- else Outputable.empty)
- $$
- -- On recent GNU ELF systems one can mark an object file
- -- as not requiring an executable stack. If all objects
- -- linked into a program have this note then the program
- -- will not use an executable stack, which is good for
- -- security. GHC generated code does not need an executable
- -- stack so add the note in:
- (if platformHasGnuNonexecStack platform
- then text ".section .note.GNU-stack,\"\"," <> sectionType "progbits"
- else Outputable.empty)
- $$
- -- And just because every other compiler does, let's stick in
- -- an identifier directive: .ident "GHC x.y.z"
- (if platformHasIdentDirective platform
- then let compilerIdent = text "GHC" <+> text cProjectVersion
- in text ".ident" <+> doubleQuotes compilerIdent
- else Outputable.empty)
-
- where
- platform = targetPlatform dflags
- arch = platformArch platform
- os = platformOS platform
-
- -- Generate "symbol stubs" for all external symbols that might
- -- come from a dynamic library.
- dyld_stubs :: [CLabel] -> SDoc
-{- dyld_stubs imps = vcat $ map pprDyldSymbolStub $
- map head $ group $ sort imps-}
- -- (Hack) sometimes two Labels pretty-print the same, but have
- -- different uniques; so we compare their text versions...
- dyld_stubs imps
- | needImportedSymbols dflags arch os
- = vcat $
- (pprGotDeclaration dflags arch os :) $
- map ( pprImportedSymbol dflags platform . fst . head) $
- groupBy (\(_,a) (_,b) -> a == b) $
- sortBy (\(_,a) (_,b) -> compare a b) $
- map doPpr $
- imps
- | otherwise
- = Outputable.empty
-
- doPpr lbl = (lbl, renderWithStyle
- (initSDocContext dflags astyle)
- (pprCLabel dflags lbl))
- astyle = mkCodeStyle AsmStyle
-
--- -----------------------------------------------------------------------------
--- Generate jump tables
-
--- Analyzes all native code and generates data sections for all jump
--- table instructions.
-generateJumpTables
- :: NcgImpl statics instr jumpDest
- -> [NatCmmDecl statics instr] -> [NatCmmDecl statics instr]
-generateJumpTables ncgImpl xs = concatMap f xs
- where f p@(CmmProc _ _ _ (ListGraph xs)) = p : concatMap g xs
- f p = [p]
- g (BasicBlock _ xs) = catMaybes (map (generateJumpTableForInstr ncgImpl) xs)
-
--- -----------------------------------------------------------------------------
--- Shortcut branches
-
-shortcutBranches
- :: forall statics instr jumpDest. (Outputable jumpDest) => DynFlags
- -> NcgImpl statics instr jumpDest
- -> [NatCmmDecl statics instr]
- -> Maybe CFG
- -> ([NatCmmDecl statics instr],Maybe CFG)
-
-shortcutBranches dflags ncgImpl tops weights
- | gopt Opt_AsmShortcutting dflags
- = ( map (apply_mapping ncgImpl mapping) tops'
- , shortcutWeightMap mappingBid <$!> weights )
- | otherwise
- = (tops, weights)
- where
- (tops', mappings) = mapAndUnzip (build_mapping ncgImpl) tops
- mapping = mapUnions mappings :: LabelMap jumpDest
- mappingBid = fmap (getJumpDestBlockId ncgImpl) mapping
-
-build_mapping :: forall instr t d statics jumpDest.
- NcgImpl statics instr jumpDest
- -> GenCmmDecl d (LabelMap t) (ListGraph instr)
- -> (GenCmmDecl d (LabelMap t) (ListGraph instr)
- ,LabelMap jumpDest)
-build_mapping _ top@(CmmData _ _) = (top, mapEmpty)
-build_mapping _ (CmmProc info lbl live (ListGraph []))
- = (CmmProc info lbl live (ListGraph []), mapEmpty)
-build_mapping ncgImpl (CmmProc info lbl live (ListGraph (head:blocks)))
- = (CmmProc info lbl live (ListGraph (head:others)), mapping)
- -- drop the shorted blocks, but don't ever drop the first one,
- -- because it is pointed to by a global label.
- where
- -- find all the blocks that just consist of a jump that can be
- -- shorted.
- -- Don't completely eliminate loops here -- that can leave a dangling jump!
- shortcut_blocks :: [(BlockId, jumpDest)]
- (_, shortcut_blocks, others) =
- foldl' split (setEmpty :: LabelSet, [], []) blocks
- split (s, shortcut_blocks, others) b@(BasicBlock id [insn])
- | Just jd <- canShortcut ncgImpl insn
- , Just dest <- getJumpDestBlockId ncgImpl jd
- , not (has_info id)
- , (setMember dest s) || dest == id -- loop checks
- = (s, shortcut_blocks, b : others)
- split (s, shortcut_blocks, others) (BasicBlock id [insn])
- | Just dest <- canShortcut ncgImpl insn
- , not (has_info id)
- = (setInsert id s, (id,dest) : shortcut_blocks, others)
- split (s, shortcut_blocks, others) other = (s, shortcut_blocks, other : others)
-
- -- do not eliminate blocks that have an info table
- has_info l = mapMember l info
-
- -- build a mapping from BlockId to JumpDest for shorting branches
- mapping = mapFromList shortcut_blocks
-
-apply_mapping :: NcgImpl statics instr jumpDest
- -> LabelMap jumpDest
- -> GenCmmDecl statics h (ListGraph instr)
- -> GenCmmDecl statics h (ListGraph instr)
-apply_mapping ncgImpl ufm (CmmData sec statics)
- = CmmData sec (shortcutStatics ncgImpl (\bid -> mapLookup bid ufm) statics)
-apply_mapping ncgImpl ufm (CmmProc info lbl live (ListGraph blocks))
- = CmmProc info lbl live (ListGraph $ map short_bb blocks)
- where
- short_bb (BasicBlock id insns) = BasicBlock id $! map short_insn insns
- short_insn i = shortcutJump ncgImpl (\bid -> mapLookup bid ufm) i
- -- shortcutJump should apply the mapping repeatedly,
- -- just in case we can short multiple branches.
-
--- -----------------------------------------------------------------------------
--- Instruction selection
-
--- Native code instruction selection for a chunk of stix code. For
--- this part of the computation, we switch from the UniqSM monad to
--- the NatM monad. The latter carries not only a Unique, but also an
--- Int denoting the current C stack pointer offset in the generated
--- code; this is needed for creating correct spill offsets on
--- architectures which don't offer, or for which it would be
--- prohibitively expensive to employ, a frame pointer register. Viz,
--- x86.
-
--- The offset is measured in bytes, and indicates the difference
--- between the current (simulated) C stack-ptr and the value it was at
--- the beginning of the block. For stacks which grow down, this value
--- should be either zero or negative.
-
--- Along with the stack pointer offset, we also carry along a LabelMap of
--- DebugBlocks, which we read to generate .location directives.
---
--- Switching between the two monads whilst carrying along the same
--- Unique supply breaks abstraction. Is that bad?
-
-genMachCode
- :: DynFlags
- -> Module -> ModLocation
- -> (RawCmmDecl -> NatM [NatCmmDecl statics instr])
- -> DwarfFiles
- -> LabelMap DebugBlock
- -> RawCmmDecl
- -> CFG
- -> UniqSM
- ( [NatCmmDecl statics instr]
- , [CLabel]
- , DwarfFiles
- , CFG
- )
-
-genMachCode dflags this_mod modLoc cmmTopCodeGen fileIds dbgMap cmm_top cmm_cfg
- = do { initial_us <- getUniqueSupplyM
- ; let initial_st = mkNatM_State initial_us 0 dflags this_mod
- modLoc fileIds dbgMap cmm_cfg
- (new_tops, final_st) = initNat initial_st (cmmTopCodeGen cmm_top)
- final_delta = natm_delta final_st
- final_imports = natm_imports final_st
- final_cfg = natm_cfg final_st
- ; if final_delta == 0
- then return (new_tops, final_imports
- , natm_fileid final_st, final_cfg)
- else pprPanic "genMachCode: nonzero final delta" (int final_delta)
- }
-
--- -----------------------------------------------------------------------------
--- Generic Cmm optimiser
-
-{-
-Here we do:
-
- (a) Constant folding
- (c) Position independent code and dynamic linking
- (i) introduce the appropriate indirections
- and position independent refs
- (ii) compile a list of imported symbols
- (d) Some arch-specific optimizations
-
-(a) will be moving to the new Hoopl pipeline, however, (c) and
-(d) are only needed by the native backend and will continue to live
-here.
-
-Ideas for other things we could do (put these in Hoopl please!):
-
- - shortcut jumps-to-jumps
- - simple CSE: if an expr is assigned to a temp, then replace later occs of
- that expr with the temp, until the expr is no longer valid (can push through
- temp assignments, and certain assigns to mem...)
--}
-
-cmmToCmm :: DynFlags -> Module -> RawCmmDecl -> (RawCmmDecl, [CLabel])
-cmmToCmm _ _ top@(CmmData _ _) = (top, [])
-cmmToCmm dflags this_mod (CmmProc info lbl live graph)
- = runCmmOpt dflags this_mod $
- do blocks' <- mapM cmmBlockConFold (toBlockList graph)
- return $ CmmProc info lbl live (ofBlockList (g_entry graph) blocks')
-
--- Avoids using unboxed tuples when loading into GHCi
-#if !defined(GHC_LOADED_INTO_GHCI)
-
-type OptMResult a = (# a, [CLabel] #)
-
-pattern OptMResult :: a -> b -> (# a, b #)
-pattern OptMResult x y = (# x, y #)
-{-# COMPLETE OptMResult #-}
-#else
-
-data OptMResult a = OptMResult !a ![CLabel] deriving (Functor)
-#endif
-
-newtype CmmOptM a = CmmOptM (DynFlags -> Module -> [CLabel] -> OptMResult a)
- deriving (Functor)
-
-instance Applicative CmmOptM where
- pure x = CmmOptM $ \_ _ imports -> OptMResult x imports
- (<*>) = ap
-
-instance Monad CmmOptM where
- (CmmOptM f) >>= g =
- CmmOptM $ \dflags this_mod imports0 ->
- case f dflags this_mod imports0 of
- OptMResult x imports1 ->
- case g x of
- CmmOptM g' -> g' dflags this_mod imports1
-
-instance CmmMakeDynamicReferenceM CmmOptM where
- addImport = addImportCmmOpt
- getThisModule = CmmOptM $ \_ this_mod imports -> OptMResult this_mod imports
-
-addImportCmmOpt :: CLabel -> CmmOptM ()
-addImportCmmOpt lbl = CmmOptM $ \_ _ imports -> OptMResult () (lbl:imports)
-
-instance HasDynFlags CmmOptM where
- getDynFlags = CmmOptM $ \dflags _ imports -> OptMResult dflags imports
-
-runCmmOpt :: DynFlags -> Module -> CmmOptM a -> (a, [CLabel])
-runCmmOpt dflags this_mod (CmmOptM f) =
- case f dflags this_mod [] of
- OptMResult result imports -> (result, imports)
-
-cmmBlockConFold :: CmmBlock -> CmmOptM CmmBlock
-cmmBlockConFold block = do
- let (entry, middle, last) = blockSplit block
- stmts = blockToList middle
- stmts' <- mapM cmmStmtConFold stmts
- last' <- cmmStmtConFold last
- return $ blockJoin entry (blockFromList stmts') last'
-
--- This does three optimizations, but they're very quick to check, so we don't
--- bother turning them off even when the Hoopl code is active. Since
--- this is on the old Cmm representation, we can't reuse the code either:
--- * reg = reg --> nop
--- * if 0 then jump --> nop
--- * if 1 then jump --> jump
--- We might be tempted to skip this step entirely of not Opt_PIC, but
--- there is some PowerPC code for the non-PIC case, which would also
--- have to be separated.
-cmmStmtConFold :: CmmNode e x -> CmmOptM (CmmNode e x)
-cmmStmtConFold stmt
- = case stmt of
- CmmAssign reg src
- -> do src' <- cmmExprConFold DataReference src
- return $ case src' of
- CmmReg reg' | reg == reg' -> CmmComment (fsLit "nop")
- new_src -> CmmAssign reg new_src
-
- CmmStore addr src
- -> do addr' <- cmmExprConFold DataReference addr
- src' <- cmmExprConFold DataReference src
- return $ CmmStore addr' src'
-
- CmmCall { cml_target = addr }
- -> do addr' <- cmmExprConFold JumpReference addr
- return $ stmt { cml_target = addr' }
-
- CmmUnsafeForeignCall target regs args
- -> do target' <- case target of
- ForeignTarget e conv -> do
- e' <- cmmExprConFold CallReference e
- return $ ForeignTarget e' conv
- PrimTarget _ ->
- return target
- args' <- mapM (cmmExprConFold DataReference) args
- return $ CmmUnsafeForeignCall target' regs args'
-
- CmmCondBranch test true false likely
- -> do test' <- cmmExprConFold DataReference test
- return $ case test' of
- CmmLit (CmmInt 0 _) -> CmmBranch false
- CmmLit (CmmInt _ _) -> CmmBranch true
- _other -> CmmCondBranch test' true false likely
-
- CmmSwitch expr ids
- -> do expr' <- cmmExprConFold DataReference expr
- return $ CmmSwitch expr' ids
-
- other
- -> return other
-
-cmmExprConFold :: ReferenceKind -> CmmExpr -> CmmOptM CmmExpr
-cmmExprConFold referenceKind expr = do
- dflags <- getDynFlags
-
- -- With -O1 and greater, the cmmSink pass does constant-folding, so
- -- we don't need to do it again here.
- let expr' = if optLevel dflags >= 1
- then expr
- else cmmExprCon dflags expr
-
- cmmExprNative referenceKind expr'
-
-cmmExprCon :: DynFlags -> CmmExpr -> CmmExpr
-cmmExprCon dflags (CmmLoad addr rep) = CmmLoad (cmmExprCon dflags addr) rep
-cmmExprCon dflags (CmmMachOp mop args)
- = cmmMachOpFold dflags mop (map (cmmExprCon dflags) args)
-cmmExprCon _ other = other
-
--- handles both PIC and non-PIC cases... a very strange mixture
--- of things to do.
-cmmExprNative :: ReferenceKind -> CmmExpr -> CmmOptM CmmExpr
-cmmExprNative referenceKind expr = do
- dflags <- getDynFlags
- let platform = targetPlatform dflags
- arch = platformArch platform
- case expr of
- CmmLoad addr rep
- -> do addr' <- cmmExprNative DataReference addr
- return $ CmmLoad addr' rep
-
- CmmMachOp mop args
- -> do args' <- mapM (cmmExprNative DataReference) args
- return $ CmmMachOp mop args'
-
- CmmLit (CmmBlock id)
- -> cmmExprNative referenceKind (CmmLit (CmmLabel (infoTblLbl id)))
- -- we must convert block Ids to CLabels here, because we
- -- might have to do the PIC transformation. Hence we must
- -- not modify BlockIds beyond this point.
-
- CmmLit (CmmLabel lbl)
- -> do
- cmmMakeDynamicReference dflags referenceKind lbl
- CmmLit (CmmLabelOff lbl off)
- -> do
- dynRef <- cmmMakeDynamicReference dflags referenceKind lbl
- -- need to optimize here, since it's late
- return $ cmmMachOpFold dflags (MO_Add (wordWidth dflags)) [
- dynRef,
- (CmmLit $ CmmInt (fromIntegral off) (wordWidth dflags))
- ]
-
- -- On powerpc (non-PIC), it's easier to jump directly to a label than
- -- to use the register table, so we replace these registers
- -- with the corresponding labels:
- CmmReg (CmmGlobal EagerBlackholeInfo)
- | arch == ArchPPC && not (positionIndependent dflags)
- -> cmmExprNative referenceKind $
- CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit "__stg_EAGER_BLACKHOLE_info")))
- CmmReg (CmmGlobal GCEnter1)
- | arch == ArchPPC && not (positionIndependent dflags)
- -> cmmExprNative referenceKind $
- CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit "__stg_gc_enter_1")))
- CmmReg (CmmGlobal GCFun)
- | arch == ArchPPC && not (positionIndependent dflags)
- -> cmmExprNative referenceKind $
- CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit "__stg_gc_fun")))
-
- other
- -> return other
diff --git a/compiler/nativeGen/BlockLayout.hs b/compiler/nativeGen/BlockLayout.hs
deleted file mode 100644
index e488f0908f..0000000000
--- a/compiler/nativeGen/BlockLayout.hs
+++ /dev/null
@@ -1,895 +0,0 @@
---
--- Copyright (c) 2018 Andreas Klebinger
---
-
-{-# LANGUAGE CPP #-}
-{-# LANGUAGE DataKinds #-}
-{-# LANGUAGE ScopedTypeVariables #-}
-{-# LANGUAGE TypeFamilies #-}
-{-# LANGUAGE GeneralizedNewtypeDeriving #-}
-{-# LANGUAGE FlexibleContexts #-}
-
-module BlockLayout
- ( sequenceTop )
-where
-
-#include "HsVersions.h"
-import GhcPrelude
-
-import Instruction
-import NCGMonad
-import CFG
-
-import GHC.Cmm.BlockId
-import GHC.Cmm
-import GHC.Cmm.Dataflow.Collections
-import GHC.Cmm.Dataflow.Label
-
-import GHC.Driver.Session (gopt, GeneralFlag(..), DynFlags, backendMaintainsCfg)
-import UniqFM
-import Util
-import Unique
-
-import Digraph
-import Outputable
-import Maybes
-
--- DEBUGGING ONLY
---import GHC.Cmm.DebugBlock
---import Debug.Trace
-import ListSetOps (removeDups)
-
-import OrdList
-import Data.List
-import Data.Foldable (toList)
-
-import qualified Data.Set as Set
-import Data.STRef
-import Control.Monad.ST.Strict
-import Control.Monad (foldM)
-
-{-
- Note [CFG based code layout]
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
- The major steps in placing blocks are as follow:
- * Compute a CFG based on the Cmm AST, see getCfgProc.
- This CFG will have edge weights representing a guess
- on how important they are.
- * After we convert Cmm to Asm we run `optimizeCFG` which
- adds a few more "educated guesses" to the equation.
- * Then we run loop analysis on the CFG (`loopInfo`) which tells us
- about loop headers, loop nesting levels and the sort.
- * Based on the CFG and loop information refine the edge weights
- in the CFG and normalize them relative to the most often visited
- node. (See `mkGlobalWeights`)
- * Feed this CFG into the block layout code (`sequenceTop`) in this
- module. Which will then produce a code layout based on the input weights.
-
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- ~~~ Note [Chain based CFG serialization]
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
- For additional information also look at
- https://gitlab.haskell.org/ghc/ghc/wikis/commentary/compiler/code-layout
-
- We have a CFG with edge weights based on which we try to place blocks next to
- each other.
-
- Edge weights not only represent likelihood of control transfer between blocks
- but also how much a block would benefit from being placed sequentially after
- it's predecessor.
- For example blocks which are preceded by an info table are more likely to end
- up in a different cache line than their predecessor and we can't eliminate the jump
- so there is less benefit to placing them sequentially.
-
- For example consider this example:
-
- A: ...
- jmp cond D (weak successor)
- jmp B
- B: ...
- jmp C
- C: ...
- jmp X
- D: ...
- jmp B (weak successor)
-
- We determine a block layout by building up chunks (calling them chains) of
- possible control flows for which blocks will be placed sequentially.
-
- Eg for our example we might end up with two chains like:
- [A->B->C->X],[D]. Blocks inside chains will always be placed sequentially.
- However there is no particular order in which chains are placed since
- (hopefully) the blocks for which sequentiality is important have already
- been placed in the same chain.
-
- -----------------------------------------------------------------------------
- 1) First try to create a list of good chains.
- -----------------------------------------------------------------------------
-
- Good chains are these which allow us to eliminate jump instructions.
- Which further eliminate often executed jumps first.
-
- We do so by:
-
- *) Ignore edges which represent instructions which can not be replaced
- by fall through control flow. Primarily calls and edges to blocks which
- are prefixed by a info table we have to jump across.
-
- *) Then process remaining edges in order of frequency taken and:
-
- +) If source and target have not been placed build a new chain from them.
-
- +) If source and target have been placed, and are ends of differing chains
- try to merge the two chains.
-
- +) If one side of the edge is a end/front of a chain, add the other block of
- to edge to the same chain
-
- Eg if we look at edge (B -> C) and already have the chain (A -> B)
- then we extend the chain to (A -> B -> C).
-
- +) If the edge was used to modify or build a new chain remove the edge from
- our working list.
-
- *) If there any blocks not being placed into a chain after these steps we place
- them into a chain consisting of only this block.
-
- Ranking edges by their taken frequency, if
- two edges compete for fall through on the same target block, the one taken
- more often will automatically win out. Resulting in fewer instructions being
- executed.
-
- Creating singleton chains is required for situations where we have code of the
- form:
-
- A: goto B:
- <infoTable>
- B: goto C:
- <infoTable>
- C: ...
-
- As the code in block B is only connected to the rest of the program via edges
- which will be ignored in this step we make sure that B still ends up in a chain
- this way.
-
- -----------------------------------------------------------------------------
- 2) We also try to fuse chains.
- -----------------------------------------------------------------------------
-
- As a result from the above step we still end up with multiple chains which
- represent sequential control flow chunks. But they are not yet suitable for
- code layout as we need to place *all* blocks into a single sequence.
-
- In this step we combine chains result from the above step via these steps:
-
- *) Look at the ranked list of *all* edges, including calls/jumps across info tables
- and the like.
-
- *) Look at each edge and
-
- +) Given an edge (A -> B) try to find two chains for which
- * Block A is at the end of one chain
- * Block B is at the front of the other chain.
- +) If we find such a chain we "fuse" them into a single chain, remove the
- edge from working set and continue.
- +) If we can't find such chains we skip the edge and continue.
-
- -----------------------------------------------------------------------------
- 3) Place indirect successors (neighbours) after each other
- -----------------------------------------------------------------------------
-
- We might have chains [A,B,C,X],[E] in a CFG of the sort:
-
- A ---> B ---> C --------> X(exit)
- \- ->E- -/
-
- While E does not follow X it's still beneficial to place them near each other.
- This can be advantageous if eg C,X,E will end up in the same cache line.
-
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- ~~~ Note [Triangle Control Flow]
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
- Checking if an argument is already evaluated leads to a somewhat
- special case which looks like this:
-
- A:
- if (R1 & 7 != 0) goto Leval; else goto Lwork;
- Leval: // global
- call (I64[R1])(R1) returns to Lwork, args: 8, res: 8, upd: 8;
- Lwork: // global
- ...
-
- A
- |\
- | Leval
- |/ - (This edge can be missing because of optimizations)
- Lwork
-
- Once we hit the metal the call instruction is just 2-3 bytes large
- depending on the register used. So we lay out the assembly like this:
-
- movq %rbx,%rax
- andl $7,%eax
- cmpq $1,%rax
- jne Lwork
- Leval:
- jmp *(%rbx) # encoded in 2-3 bytes.
- <info table>
- Lwork:
- ...
-
- We could explicitly check for this control flow pattern.
-
- This is advantageous because:
- * It's optimal if the argument isn't evaluated.
- * If it's evaluated we only have the extra cost of jumping over
- the 2-3 bytes for the call.
- * Guarantees the smaller encoding for the conditional jump.
-
- However given that Lwork usually has an info table we
- penalize this edge. So Leval should get placed first
- either way and things work out for the best.
-
- Optimizing for the evaluated case instead would penalize
- the other code path. It adds an jump as we can't fall through
- to Lwork because of the info table.
- Assuming that Lwork is large the chance that the "call" ends up
- in the same cache line is also fairly small.
-
--}
-
-
--- | Look at X number of blocks in two chains to determine
--- if they are "neighbours".
-neighbourOverlapp :: Int
-neighbourOverlapp = 2
-
--- | Maps blocks near the end of a chain to it's chain AND
--- the other blocks near the end.
--- [A,B,C,D,E] Gives entries like (B -> ([A,B], [A,B,C,D,E]))
--- where [A,B] are blocks in the end region of a chain.
--- This is cheaper then recomputing the ends multiple times.
-type FrontierMap = LabelMap ([BlockId],BlockChain)
-
--- | A non empty ordered sequence of basic blocks.
--- It is suitable for serialization in this order.
---
--- We use OrdList instead of [] to allow fast append on both sides
--- when combining chains.
-newtype BlockChain
- = BlockChain { chainBlocks :: (OrdList BlockId) }
-
--- All chains are constructed the same way so comparison
--- including structure is faster.
-instance Eq BlockChain where
- BlockChain b1 == BlockChain b2 = strictlyEqOL b1 b2
-
--- Useful for things like sets and debugging purposes, sorts by blocks
--- in the chain.
-instance Ord (BlockChain) where
- (BlockChain lbls1) `compare` (BlockChain lbls2)
- = ASSERT(toList lbls1 /= toList lbls2 || lbls1 `strictlyEqOL` lbls2)
- strictlyOrdOL lbls1 lbls2
-
-instance Outputable (BlockChain) where
- ppr (BlockChain blks) =
- parens (text "Chain:" <+> ppr (fromOL $ blks) )
-
-chainFoldl :: (b -> BlockId -> b) -> b -> BlockChain -> b
-chainFoldl f z (BlockChain blocks) = foldl' f z blocks
-
-noDups :: [BlockChain] -> Bool
-noDups chains =
- let chainBlocks = concatMap chainToBlocks chains :: [BlockId]
- (_blocks, dups) = removeDups compare chainBlocks
- in if null dups then True
- else pprTrace "Duplicates:" (ppr (map toList dups) $$ text "chains" <+> ppr chains ) False
-
-inFront :: BlockId -> BlockChain -> Bool
-inFront bid (BlockChain seq)
- = headOL seq == bid
-
-chainSingleton :: BlockId -> BlockChain
-chainSingleton lbl
- = BlockChain (unitOL lbl)
-
-chainFromList :: [BlockId] -> BlockChain
-chainFromList = BlockChain . toOL
-
-chainSnoc :: BlockChain -> BlockId -> BlockChain
-chainSnoc (BlockChain blks) lbl
- = BlockChain (blks `snocOL` lbl)
-
-chainCons :: BlockId -> BlockChain -> BlockChain
-chainCons lbl (BlockChain blks)
- = BlockChain (lbl `consOL` blks)
-
-chainConcat :: BlockChain -> BlockChain -> BlockChain
-chainConcat (BlockChain blks1) (BlockChain blks2)
- = BlockChain (blks1 `appOL` blks2)
-
-chainToBlocks :: BlockChain -> [BlockId]
-chainToBlocks (BlockChain blks) = fromOL blks
-
--- | Given the Chain A -> B -> C -> D and we break at C
--- we get the two Chains (A -> B, C -> D) as result.
-breakChainAt :: BlockId -> BlockChain
- -> (BlockChain,BlockChain)
-breakChainAt bid (BlockChain blks)
- | not (bid == head rblks)
- = panic "Block not in chain"
- | otherwise
- = (BlockChain (toOL lblks),
- BlockChain (toOL rblks))
- where
- (lblks, rblks) = break (\lbl -> lbl == bid) (fromOL blks)
-
-takeR :: Int -> BlockChain -> [BlockId]
-takeR n (BlockChain blks) =
- take n . fromOLReverse $ blks
-
-takeL :: Int -> BlockChain -> [BlockId]
-takeL n (BlockChain blks) =
- take n . fromOL $ blks
-
--- Note [Combining neighborhood chains]
--- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
--- See also Note [Chain based CFG serialization]
--- We have the chains (A-B-C-D) and (E-F) and an Edge C->E.
---
--- While placing the latter after the former doesn't result in sequential
--- control flow it is still beneficial. As block C and E might end
--- up in the same cache line.
---
--- So we place these chains next to each other even if we can't fuse them.
---
--- A -> B -> C -> D
--- v
--- - -> E -> F ...
---
--- A simple heuristic to chose which chains we want to combine:
--- * Process edges in descending priority.
--- * Check if there is a edge near the end of one chain which goes
--- to a block near the start of another edge.
---
--- While we could take into account the space between the two blocks which
--- share an edge this blows up compile times quite a bit. It requires
--- us to find all edges between two chains, check the distance for all edges,
--- rank them based on the distance and only then we can select two chains
--- to combine. Which would add a lot of complexity for little gain.
---
--- So instead we just rank by the strength of the edge and use the first pair we
--- find.
-
--- | For a given list of chains and edges try to combine chains with strong
--- edges between them.
-combineNeighbourhood :: [CfgEdge] -- ^ Edges to consider
- -> [BlockChain] -- ^ Current chains of blocks
- -> ([BlockChain], Set.Set (BlockId,BlockId))
- -- ^ Resulting list of block chains, and a set of edges which
- -- were used to fuse chains and as such no longer need to be
- -- considered.
-combineNeighbourhood edges chains
- = -- pprTraceIt "Neighbours" $
- -- pprTrace "combineNeighbours" (ppr edges) $
- applyEdges edges endFrontier startFrontier (Set.empty)
- where
- --Build maps from chain ends to chains
- endFrontier, startFrontier :: FrontierMap
- endFrontier =
- mapFromList $ concatMap (\chain ->
- let ends = getEnds chain :: [BlockId]
- entry = (ends,chain)
- in map (\x -> (x,entry)) ends ) chains
- startFrontier =
- mapFromList $ concatMap (\chain ->
- let front = getFronts chain
- entry = (front,chain)
- in map (\x -> (x,entry)) front) chains
- applyEdges :: [CfgEdge] -> FrontierMap -> FrontierMap -> Set.Set (BlockId, BlockId)
- -> ([BlockChain], Set.Set (BlockId,BlockId))
- applyEdges [] chainEnds _chainFronts combined =
- (ordNub $ map snd $ mapElems chainEnds, combined)
- applyEdges ((CfgEdge from to _w):edges) chainEnds chainFronts combined
- | Just (c1_e,c1) <- mapLookup from chainEnds
- , Just (c2_f,c2) <- mapLookup to chainFronts
- , c1 /= c2 -- Avoid trying to concat a chain with itself.
- = let newChain = chainConcat c1 c2
- newChainFrontier = getFronts newChain
- newChainEnds = getEnds newChain
- newFronts :: FrontierMap
- newFronts =
- let withoutOld =
- foldl' (\m b -> mapDelete b m :: FrontierMap) chainFronts (c2_f ++ getFronts c1)
- entry =
- (newChainFrontier,newChain) --let bound to ensure sharing
- in foldl' (\m x -> mapInsert x entry m)
- withoutOld newChainFrontier
-
- newEnds =
- let withoutOld = foldl' (\m b -> mapDelete b m) chainEnds (c1_e ++ getEnds c2)
- entry = (newChainEnds,newChain) --let bound to ensure sharing
- in foldl' (\m x -> mapInsert x entry m)
- withoutOld newChainEnds
- in
- -- pprTrace "ApplyEdges"
- -- (text "before" $$
- -- text "fronts" <+> ppr chainFronts $$
- -- text "ends" <+> ppr chainEnds $$
-
- -- text "various" $$
- -- text "newChain" <+> ppr newChain $$
- -- text "newChainFrontier" <+> ppr newChainFrontier $$
- -- text "newChainEnds" <+> ppr newChainEnds $$
- -- text "drop" <+> ppr ((c2_f ++ getFronts c1) ++ (c1_e ++ getEnds c2)) $$
-
- -- text "after" $$
- -- text "fronts" <+> ppr newFronts $$
- -- text "ends" <+> ppr newEnds
- -- )
- applyEdges edges newEnds newFronts (Set.insert (from,to) combined)
- | otherwise
- = applyEdges edges chainEnds chainFronts combined
- where
-
- getFronts chain = takeL neighbourOverlapp chain
- getEnds chain = takeR neighbourOverlapp chain
-
--- In the last stop we combine all chains into a single one.
--- Trying to place chains with strong edges next to each other.
-mergeChains :: [CfgEdge] -> [BlockChain]
- -> (BlockChain)
-mergeChains edges chains
- = -- pprTrace "combine" (ppr edges) $
- runST $ do
- let addChain m0 chain = do
- ref <- newSTRef chain
- return $ chainFoldl (\m' b -> mapInsert b ref m') m0 chain
- chainMap' <- foldM (\m0 c -> addChain m0 c) mapEmpty chains
- merge edges chainMap'
- where
- -- We keep a map from ALL blocks to their respective chain (sigh)
- -- This is required since when looking at an edge we need to find
- -- the associated chains quickly.
- -- We use a map of STRefs, maintaining a invariant of one STRef per chain.
- -- When merging chains we can update the
- -- STRef of one chain once (instead of writing to the map for each block).
- -- We then overwrite the STRefs for the other chain so there is again only
- -- a single STRef for the combined chain.
- -- The difference in terms of allocations saved is ~0.2% with -O so actually
- -- significant compared to using a regular map.
-
- merge :: forall s. [CfgEdge] -> LabelMap (STRef s BlockChain) -> ST s BlockChain
- merge [] chains = do
- chains' <- ordNub <$> (mapM readSTRef $ mapElems chains) :: ST s [BlockChain]
- return $ foldl' chainConcat (head chains') (tail chains')
- merge ((CfgEdge from to _):edges) chains
- -- | pprTrace "merge" (ppr (from,to) <> ppr chains) False
- -- = undefined
- | cFrom == cTo
- = merge edges chains
- | otherwise
- = do
- chains' <- mergeComb cFrom cTo
- merge edges chains'
- where
- mergeComb :: STRef s BlockChain -> STRef s BlockChain -> ST s (LabelMap (STRef s BlockChain))
- mergeComb refFrom refTo = do
- cRight <- readSTRef refTo
- chain <- pure chainConcat <*> readSTRef refFrom <*> pure cRight
- writeSTRef refFrom chain
- return $ chainFoldl (\m b -> mapInsert b refFrom m) chains cRight
-
- cFrom = expectJust "mergeChains:chainMap:from" $ mapLookup from chains
- cTo = expectJust "mergeChains:chainMap:to" $ mapLookup to chains
-
-
--- See Note [Chain based CFG serialization] for the general idea.
--- This creates and fuses chains at the same time for performance reasons.
-
--- Try to build chains from a list of edges.
--- Edges must be sorted **descending** by their priority.
--- Returns the constructed chains, along with all edges which
--- are irrelevant past this point, this information doesn't need
--- to be complete - it's only used to speed up the process.
--- An Edge is irrelevant if the ends are part of the same chain.
--- We say these edges are already linked
-buildChains :: [CfgEdge] -> [BlockId]
- -> ( LabelMap BlockChain -- Resulting chains, indexd by end if chain.
- , Set.Set (BlockId, BlockId)) --List of fused edges.
-buildChains edges blocks
- = runST $ buildNext setEmpty mapEmpty mapEmpty edges Set.empty
- where
- -- buildNext builds up chains from edges one at a time.
-
- -- We keep a map from the ends of chains to the chains.
- -- This we we can easily check if an block should be appended to an
- -- existing chain!
- -- We store them using STRefs so we don't have to rebuild the spine of both
- -- maps every time we update a chain.
- buildNext :: forall s. LabelSet
- -> LabelMap (STRef s BlockChain) -- Map from end of chain to chain.
- -> LabelMap (STRef s BlockChain) -- Map from start of chain to chain.
- -> [CfgEdge] -- Edges to check - ordered by decreasing weight
- -> Set.Set (BlockId, BlockId) -- Used edges
- -> ST s ( LabelMap BlockChain -- Chains by end
- , Set.Set (BlockId, BlockId) --List of fused edges
- )
- buildNext placed _chainStarts chainEnds [] linked = do
- ends' <- sequence $ mapMap readSTRef chainEnds :: ST s (LabelMap BlockChain)
- -- Any remaining blocks have to be made to singleton chains.
- -- They might be combined with other chains later on outside this function.
- let unplaced = filter (\x -> not (setMember x placed)) blocks
- singletons = map (\x -> (x,chainSingleton x)) unplaced :: [(BlockId,BlockChain)]
- return (foldl' (\m (k,v) -> mapInsert k v m) ends' singletons , linked)
- buildNext placed chainStarts chainEnds (edge:todo) linked
- | from == to
- -- We skip self edges
- = buildNext placed chainStarts chainEnds todo (Set.insert (from,to) linked)
- | not (alreadyPlaced from) &&
- not (alreadyPlaced to)
- = do
- --pprTraceM "Edge-Chain:" (ppr edge)
- chain' <- newSTRef $ chainFromList [from,to]
- buildNext
- (setInsert to (setInsert from placed))
- (mapInsert from chain' chainStarts)
- (mapInsert to chain' chainEnds)
- todo
- (Set.insert (from,to) linked)
-
- | (alreadyPlaced from) &&
- (alreadyPlaced to)
- , Just predChain <- mapLookup from chainEnds
- , Just succChain <- mapLookup to chainStarts
- , predChain /= succChain -- Otherwise we try to create a cycle.
- = do
- -- pprTraceM "Fusing edge" (ppr edge)
- fuseChain predChain succChain
-
- | (alreadyPlaced from) &&
- (alreadyPlaced to)
- = --pprTraceM "Skipping:" (ppr edge) >>
- buildNext placed chainStarts chainEnds todo linked
-
- | otherwise
- = do -- pprTraceM "Finding chain for:" (ppr edge $$
- -- text "placed" <+> ppr placed)
- findChain
- where
- from = edgeFrom edge
- to = edgeTo edge
- alreadyPlaced blkId = (setMember blkId placed)
-
- -- Combine two chains into a single one.
- fuseChain :: STRef s BlockChain -> STRef s BlockChain
- -> ST s ( LabelMap BlockChain -- Chains by end
- , Set.Set (BlockId, BlockId) --List of fused edges
- )
- fuseChain fromRef toRef = do
- fromChain <- readSTRef fromRef
- toChain <- readSTRef toRef
- let newChain = chainConcat fromChain toChain
- ref <- newSTRef newChain
- let start = head $ takeL 1 newChain
- let end = head $ takeR 1 newChain
- -- chains <- sequence $ mapMap readSTRef chainStarts
- -- pprTraceM "pre-fuse chains:" $ ppr chains
- buildNext
- placed
- (mapInsert start ref $ mapDelete to $ chainStarts)
- (mapInsert end ref $ mapDelete from $ chainEnds)
- todo
- (Set.insert (from,to) linked)
-
-
- --Add the block to a existing chain or creates a new chain
- findChain :: ST s ( LabelMap BlockChain -- Chains by end
- , Set.Set (BlockId, BlockId) --List of fused edges
- )
- findChain
- -- We can attach the block to the end of a chain
- | alreadyPlaced from
- , Just predChain <- mapLookup from chainEnds
- = do
- chain <- readSTRef predChain
- let newChain = chainSnoc chain to
- writeSTRef predChain newChain
- let chainEnds' = mapInsert to predChain $ mapDelete from chainEnds
- -- chains <- sequence $ mapMap readSTRef chainStarts
- -- pprTraceM "from chains:" $ ppr chains
- buildNext (setInsert to placed) chainStarts chainEnds' todo (Set.insert (from,to) linked)
- -- We can attack it to the front of a chain
- | alreadyPlaced to
- , Just succChain <- mapLookup to chainStarts
- = do
- chain <- readSTRef succChain
- let newChain = from `chainCons` chain
- writeSTRef succChain newChain
- let chainStarts' = mapInsert from succChain $ mapDelete to chainStarts
- -- chains <- sequence $ mapMap readSTRef chainStarts'
- -- pprTraceM "to chains:" $ ppr chains
- buildNext (setInsert from placed) chainStarts' chainEnds todo (Set.insert (from,to) linked)
- -- The placed end of the edge is part of a chain already and not an end.
- | otherwise
- = do
- let block = if alreadyPlaced to then from else to
- --pprTraceM "Singleton" $ ppr block
- let newChain = chainSingleton block
- ref <- newSTRef newChain
- buildNext (setInsert block placed) (mapInsert block ref chainStarts)
- (mapInsert block ref chainEnds) todo (linked)
- where
- alreadyPlaced blkId = (setMember blkId placed)
-
--- | Place basic blocks based on the given CFG.
--- See Note [Chain based CFG serialization]
-sequenceChain :: forall a i. (Instruction i, Outputable i)
- => LabelMap a -- ^ Keys indicate an info table on the block.
- -> CFG -- ^ Control flow graph and some meta data.
- -> [GenBasicBlock i] -- ^ List of basic blocks to be placed.
- -> [GenBasicBlock i] -- ^ Blocks placed in sequence.
-sequenceChain _info _weights [] = []
-sequenceChain _info _weights [x] = [x]
-sequenceChain info weights' blocks@((BasicBlock entry _):_) =
- let weights :: CFG
- weights = --pprTrace "cfg'" (pprEdgeWeights cfg')
- cfg'
- where
- (_, globalEdgeWeights) = {-# SCC mkGlobalWeights #-} mkGlobalWeights entry weights'
- cfg' = {-# SCC rewriteEdges #-}
- mapFoldlWithKey
- (\cfg from m ->
- mapFoldlWithKey
- (\cfg to w -> setEdgeWeight cfg (EdgeWeight w) from to )
- cfg m )
- weights'
- globalEdgeWeights
-
- directEdges :: [CfgEdge]
- directEdges = sortBy (flip compare) $ catMaybes . map relevantWeight $ (infoEdgeList weights)
- where
- relevantWeight :: CfgEdge -> Maybe CfgEdge
- relevantWeight edge@(CfgEdge from to edgeInfo)
- | (EdgeInfo CmmSource { trans_cmmNode = CmmCall {} } _) <- edgeInfo
- -- Ignore edges across calls
- = Nothing
- | mapMember to info
- , w <- edgeWeight edgeInfo
- -- The payoff is small if we jump over an info table
- = Just (CfgEdge from to edgeInfo { edgeWeight = w/8 })
- | otherwise
- = Just edge
-
- blockMap :: LabelMap (GenBasicBlock i)
- blockMap
- = foldl' (\m blk@(BasicBlock lbl _ins) ->
- mapInsert lbl blk m)
- mapEmpty blocks
-
- (builtChains, builtEdges)
- = {-# SCC "buildChains" #-}
- --pprTraceIt "generatedChains" $
- --pprTrace "blocks" (ppr (mapKeys blockMap)) $
- buildChains directEdges (mapKeys blockMap)
-
- rankedEdges :: [CfgEdge]
- -- Sort descending by weight, remove fused edges
- rankedEdges =
- filter (\edge -> not (Set.member (edgeFrom edge,edgeTo edge) builtEdges)) $
- directEdges
-
- (neighbourChains, combined)
- = ASSERT(noDups $ mapElems builtChains)
- {-# SCC "groupNeighbourChains" #-}
- -- pprTraceIt "NeighbourChains" $
- combineNeighbourhood rankedEdges (mapElems builtChains)
-
-
- allEdges :: [CfgEdge]
- allEdges = {-# SCC allEdges #-}
- sortOn (relevantWeight) $ filter (not . deadEdge) $ (infoEdgeList weights)
- where
- deadEdge :: CfgEdge -> Bool
- deadEdge (CfgEdge from to _) = let e = (from,to) in Set.member e combined || Set.member e builtEdges
- relevantWeight :: CfgEdge -> EdgeWeight
- relevantWeight (CfgEdge _ _ edgeInfo)
- | EdgeInfo (CmmSource { trans_cmmNode = CmmCall {}}) _ <- edgeInfo
- -- Penalize edges across calls
- = weight/(64.0)
- | otherwise
- = weight
- where
- -- negate to sort descending
- weight = negate (edgeWeight edgeInfo)
-
- masterChain =
- {-# SCC "mergeChains" #-}
- -- pprTraceIt "MergedChains" $
- mergeChains allEdges neighbourChains
-
- --Make sure the first block stays first
- prepedChains
- | inFront entry masterChain
- = [masterChain]
- | (rest,entry) <- breakChainAt entry masterChain
- = [entry,rest]
-#if __GLASGOW_HASKELL__ <= 810
- | otherwise = pprPanic "Entry point eliminated" $
- ppr masterChain
-#endif
-
- blockList
- = ASSERT(noDups [masterChain])
- (concatMap fromOL $ map chainBlocks prepedChains)
-
- --chainPlaced = setFromList $ map blockId blockList :: LabelSet
- chainPlaced = setFromList $ blockList :: LabelSet
- unplaced =
- let blocks = mapKeys blockMap
- isPlaced b = setMember (b) chainPlaced
- in filter (\block -> not (isPlaced block)) blocks
-
- placedBlocks =
- -- We want debug builds to catch this as it's a good indicator for
- -- issues with CFG invariants. But we don't want to blow up production
- -- builds if something slips through.
- ASSERT(null unplaced)
- --pprTraceIt "placedBlocks" $
- -- ++ [] is stil kinda expensive
- if null unplaced then blockList else blockList ++ unplaced
- getBlock bid = expectJust "Block placement" $ mapLookup bid blockMap
- in
- --Assert we placed all blocks given as input
- ASSERT(all (\bid -> mapMember bid blockMap) placedBlocks)
- dropJumps info $ map getBlock placedBlocks
-
-{-# SCC dropJumps #-}
--- | Remove redundant jumps between blocks when we can rely on
--- fall through.
-dropJumps :: forall a i. Instruction i => LabelMap a -> [GenBasicBlock i]
- -> [GenBasicBlock i]
-dropJumps _ [] = []
-dropJumps info ((BasicBlock lbl ins):todo)
- | not . null $ ins --This can happen because of shortcutting
- , [dest] <- jumpDestsOfInstr (last ins)
- , ((BasicBlock nextLbl _) : _) <- todo
- , not (mapMember dest info)
- , nextLbl == dest
- = BasicBlock lbl (init ins) : dropJumps info todo
- | otherwise
- = BasicBlock lbl ins : dropJumps info todo
-
-
--- -----------------------------------------------------------------------------
--- Sequencing the basic blocks
-
--- Cmm BasicBlocks are self-contained entities: they always end in a
--- jump, either non-local or to another basic block in the same proc.
--- In this phase, we attempt to place the basic blocks in a sequence
--- such that as many of the local jumps as possible turn into
--- fallthroughs.
-
-sequenceTop
- :: (Instruction instr, Outputable instr)
- => DynFlags -- Determine which layout algo to use
- -> NcgImpl statics instr jumpDest
- -> Maybe CFG -- ^ CFG if we have one.
- -> NatCmmDecl statics instr -- ^ Function to serialize
- -> NatCmmDecl statics instr
-
-sequenceTop _ _ _ top@(CmmData _ _) = top
-sequenceTop dflags ncgImpl edgeWeights
- (CmmProc info lbl live (ListGraph blocks))
- | (gopt Opt_CfgBlocklayout dflags) && backendMaintainsCfg dflags
- --Use chain based algorithm
- , Just cfg <- edgeWeights
- = CmmProc info lbl live ( ListGraph $ ncgMakeFarBranches ncgImpl info $
- {-# SCC layoutBlocks #-}
- sequenceChain info cfg blocks )
- | otherwise
- --Use old algorithm
- = let cfg = if dontUseCfg then Nothing else edgeWeights
- in CmmProc info lbl live ( ListGraph $ ncgMakeFarBranches ncgImpl info $
- {-# SCC layoutBlocks #-}
- sequenceBlocks cfg info blocks)
- where
- dontUseCfg = gopt Opt_WeightlessBlocklayout dflags ||
- (not $ backendMaintainsCfg dflags)
-
--- The old algorithm:
--- It is very simple (and stupid): We make a graph out of
--- the blocks where there is an edge from one block to another iff the
--- first block ends by jumping to the second. Then we topologically
--- sort this graph. Then traverse the list: for each block, we first
--- output the block, then if it has an out edge, we move the
--- destination of the out edge to the front of the list, and continue.
-
--- FYI, the classic layout for basic blocks uses postorder DFS; this
--- algorithm is implemented in Hoopl.
-
-sequenceBlocks :: Instruction inst => Maybe CFG -> LabelMap a
- -> [GenBasicBlock inst] -> [GenBasicBlock inst]
-sequenceBlocks _edgeWeight _ [] = []
-sequenceBlocks edgeWeights infos (entry:blocks) =
- let entryNode = mkNode edgeWeights entry
- bodyNodes = reverse
- (flattenSCCs (sccBlocks edgeWeights blocks))
- in dropJumps infos . seqBlocks infos $ ( entryNode : bodyNodes)
- -- the first block is the entry point ==> it must remain at the start.
-
-sccBlocks
- :: Instruction instr
- => Maybe CFG -> [NatBasicBlock instr]
- -> [SCC (Node BlockId (NatBasicBlock instr))]
-sccBlocks edgeWeights blocks =
- stronglyConnCompFromEdgedVerticesUniqR
- (map (mkNode edgeWeights) blocks)
-
-mkNode :: (Instruction t)
- => Maybe CFG -> GenBasicBlock t
- -> Node BlockId (GenBasicBlock t)
-mkNode edgeWeights block@(BasicBlock id instrs) =
- DigraphNode block id outEdges
- where
- outEdges :: [BlockId]
- outEdges
- --Select the heaviest successor, ignore weights <= zero
- = successor
- where
- successor
- | Just successors <- fmap (`getSuccEdgesSorted` id)
- edgeWeights -- :: Maybe [(Label, EdgeInfo)]
- = case successors of
- [] -> []
- ((target,info):_)
- | length successors > 2 || edgeWeight info <= 0 -> []
- | otherwise -> [target]
- | otherwise
- = case jumpDestsOfInstr (last instrs) of
- [one] -> [one]
- _many -> []
-
-
-seqBlocks :: LabelMap i -> [Node BlockId (GenBasicBlock t1)]
- -> [GenBasicBlock t1]
-seqBlocks infos blocks = placeNext pullable0 todo0
- where
- -- pullable: Blocks that are not yet placed
- -- todo: Original order of blocks, to be followed if we have no good
- -- reason not to;
- -- may include blocks that have already been placed, but then
- -- these are not in pullable
- pullable0 = listToUFM [ (i,(b,n)) | DigraphNode b i n <- blocks ]
- todo0 = map node_key blocks
-
- placeNext _ [] = []
- placeNext pullable (i:rest)
- | Just (block, pullable') <- lookupDeleteUFM pullable i
- = place pullable' rest block
- | otherwise
- -- We already placed this block, so ignore
- = placeNext pullable rest
-
- place pullable todo (block,[])
- = block : placeNext pullable todo
- place pullable todo (block@(BasicBlock id instrs),[next])
- | mapMember next infos
- = block : placeNext pullable todo
- | Just (nextBlock, pullable') <- lookupDeleteUFM pullable next
- = BasicBlock id instrs : place pullable' todo nextBlock
- | otherwise
- = block : placeNext pullable todo
- place _ _ (_,tooManyNextNodes)
- = pprPanic "seqBlocks" (ppr tooManyNextNodes)
-
-
-lookupDeleteUFM :: Uniquable key => UniqFM elt -> key
- -> Maybe (elt, UniqFM elt)
-lookupDeleteUFM m k = do -- Maybe monad
- v <- lookupUFM m k
- return (v, delFromUFM m k)
diff --git a/compiler/nativeGen/CFG.hs b/compiler/nativeGen/CFG.hs
deleted file mode 100644
index 7e2c2de095..0000000000
--- a/compiler/nativeGen/CFG.hs
+++ /dev/null
@@ -1,1320 +0,0 @@
---
--- Copyright (c) 2018 Andreas Klebinger
---
-
-{-# LANGUAGE TypeFamilies, ScopedTypeVariables #-}
-{-# LANGUAGE TupleSections #-}
-{-# LANGUAGE GeneralizedNewtypeDeriving #-}
-{-# LANGUAGE CPP #-}
-{-# LANGUAGE Rank2Types #-}
-{-# LANGUAGE BangPatterns #-}
-{-# LANGUAGE DataKinds #-}
-
-module CFG
- ( CFG, CfgEdge(..), EdgeInfo(..), EdgeWeight(..)
- , TransitionSource(..)
-
- --Modify the CFG
- , addWeightEdge, addEdge
- , delEdge, delNode
- , addNodesBetween, shortcutWeightMap
- , reverseEdges, filterEdges
- , addImmediateSuccessor
- , mkWeightInfo, adjustEdgeWeight, setEdgeWeight
-
- --Query the CFG
- , infoEdgeList, edgeList
- , getSuccessorEdges, getSuccessors
- , getSuccEdgesSorted
- , getEdgeInfo
- , getCfgNodes, hasNode
-
- -- Loop Information
- , loopMembers, loopLevels, loopInfo
-
- --Construction/Misc
- , getCfg, getCfgProc, pprEdgeWeights, sanityCheckCfg
-
- --Find backedges and update their weight
- , optimizeCFG
- , mkGlobalWeights
-
- )
-where
-
-#include "HsVersions.h"
-
-import GhcPrelude
-
-import GHC.Cmm.BlockId
-import GHC.Cmm as Cmm
-
-import GHC.Cmm.Utils
-import GHC.Cmm.Switch
-import GHC.Cmm.Dataflow.Collections
-import GHC.Cmm.Dataflow.Label
-import GHC.Cmm.Dataflow.Block
-import qualified GHC.Cmm.Dataflow.Graph as G
-
-import Util
-import Digraph
-import Maybes
-
-import Unique
-import qualified Dominators as Dom
-import Data.IntMap.Strict (IntMap)
-import Data.IntSet (IntSet)
-
-import qualified Data.IntMap.Strict as IM
-import qualified Data.Map as M
-import qualified Data.IntSet as IS
-import qualified Data.Set as S
-import Data.Tree
-import Data.Bifunctor
-
-import Outputable
--- DEBUGGING ONLY
---import GHC.Cmm.DebugBlock
---import OrdList
---import GHC.Cmm.DebugBlock.Trace
-import GHC.Cmm.Ppr () -- For Outputable instances
-import qualified GHC.Driver.Session as D
-
-import Data.List (sort, nub, partition)
-import Data.STRef.Strict
-import Control.Monad.ST
-
-import Data.Array.MArray
-import Data.Array.ST
-import Data.Array.IArray
-import Data.Array.Unsafe (unsafeFreeze)
-import Data.Array.Base (unsafeRead, unsafeWrite)
-
-import Control.Monad
-
-type Prob = Double
-
-type Edge = (BlockId, BlockId)
-type Edges = [Edge]
-
-newtype EdgeWeight
- = EdgeWeight { weightToDouble :: Double }
- deriving (Eq,Ord,Enum,Num,Real,Fractional)
-
-instance Outputable EdgeWeight where
- ppr (EdgeWeight w) = doublePrec 5 w
-
-type EdgeInfoMap edgeInfo = LabelMap (LabelMap edgeInfo)
-
--- | A control flow graph where edges have been annotated with a weight.
--- Implemented as IntMap (IntMap <edgeData>)
--- We must uphold the invariant that for each edge A -> B we must have:
--- A entry B in the outer map.
--- A entry B in the map we get when looking up A.
--- Maintaining this invariant is useful as any failed lookup now indicates
--- an actual error in code which might go unnoticed for a while
--- otherwise.
-type CFG = EdgeInfoMap EdgeInfo
-
-data CfgEdge
- = CfgEdge
- { edgeFrom :: !BlockId
- , edgeTo :: !BlockId
- , edgeInfo :: !EdgeInfo
- }
-
--- | Careful! Since we assume there is at most one edge from A to B
--- the Eq instance does not consider weight.
-instance Eq CfgEdge where
- (==) (CfgEdge from1 to1 _) (CfgEdge from2 to2 _)
- = from1 == from2 && to1 == to2
-
--- | Edges are sorted ascending pointwise by weight, source and destination
-instance Ord CfgEdge where
- compare (CfgEdge from1 to1 (EdgeInfo {edgeWeight = weight1}))
- (CfgEdge from2 to2 (EdgeInfo {edgeWeight = weight2}))
- | weight1 < weight2 || weight1 == weight2 && from1 < from2 ||
- weight1 == weight2 && from1 == from2 && to1 < to2
- = LT
- | from1 == from2 && to1 == to2 && weight1 == weight2
- = EQ
- | otherwise
- = GT
-
-instance Outputable CfgEdge where
- ppr (CfgEdge from1 to1 edgeInfo)
- = parens (ppr from1 <+> text "-(" <> ppr edgeInfo <> text ")->" <+> ppr to1)
-
--- | Can we trace back a edge to a specific Cmm Node
--- or has it been introduced during assembly codegen. We use this to maintain
--- some information which would otherwise be lost during the
--- Cmm <-> asm transition.
--- See also Note [Inverting Conditional Branches]
-data TransitionSource
- = CmmSource { trans_cmmNode :: (CmmNode O C)
- , trans_info :: BranchInfo }
- | AsmCodeGen
- deriving (Eq)
-
-data BranchInfo = NoInfo -- ^ Unknown, but not heap or stack check.
- | HeapStackCheck -- ^ Heap or stack check
- deriving Eq
-
-instance Outputable BranchInfo where
- ppr NoInfo = text "regular"
- ppr HeapStackCheck = text "heap/stack"
-
-isHeapOrStackCheck :: TransitionSource -> Bool
-isHeapOrStackCheck (CmmSource { trans_info = HeapStackCheck}) = True
-isHeapOrStackCheck _ = False
-
--- | Information about edges
-data EdgeInfo
- = EdgeInfo
- { transitionSource :: !TransitionSource
- , edgeWeight :: !EdgeWeight
- } deriving (Eq)
-
-instance Outputable EdgeInfo where
- ppr edgeInfo = text "weight:" <+> ppr (edgeWeight edgeInfo)
-
--- | Convenience function, generate edge info based
--- on weight not originating from cmm.
-mkWeightInfo :: EdgeWeight -> EdgeInfo
-mkWeightInfo = EdgeInfo AsmCodeGen
-
--- | Adjust the weight between the blocks using the given function.
--- If there is no such edge returns the original map.
-adjustEdgeWeight :: CFG -> (EdgeWeight -> EdgeWeight)
- -> BlockId -> BlockId -> CFG
-adjustEdgeWeight cfg f from to
- | Just info <- getEdgeInfo from to cfg
- , !weight <- edgeWeight info
- , !newWeight <- f weight
- = addEdge from to (info { edgeWeight = newWeight}) cfg
- | otherwise = cfg
-
--- | Set the weight between the blocks to the given weight.
--- If there is no such edge returns the original map.
-setEdgeWeight :: CFG -> EdgeWeight
- -> BlockId -> BlockId -> CFG
-setEdgeWeight cfg !weight from to
- | Just info <- getEdgeInfo from to cfg
- = addEdge from to (info { edgeWeight = weight}) cfg
- | otherwise = cfg
-
-
-getCfgNodes :: CFG -> [BlockId]
-getCfgNodes m =
- mapKeys m
-
--- | Is this block part of this graph?
-hasNode :: CFG -> BlockId -> Bool
-hasNode m node =
- -- Check the invariant that each node must exist in the first map or not at all.
- ASSERT( found || not (any (mapMember node) m))
- found
- where
- found = mapMember node m
-
-
-
--- | Check if the nodes in the cfg and the set of blocks are the same.
--- In a case of a missmatch we panic and show the difference.
-sanityCheckCfg :: CFG -> LabelSet -> SDoc -> Bool
-sanityCheckCfg m blockSet msg
- | blockSet == cfgNodes
- = True
- | otherwise =
- pprPanic "Block list and cfg nodes don't match" (
- text "difference:" <+> ppr diff $$
- text "blocks:" <+> ppr blockSet $$
- text "cfg:" <+> pprEdgeWeights m $$
- msg )
- False
- where
- cfgNodes = setFromList $ getCfgNodes m :: LabelSet
- diff = (setUnion cfgNodes blockSet) `setDifference` (setIntersection cfgNodes blockSet) :: LabelSet
-
--- | Filter the CFG with a custom function f.
--- Paramaeters are `f from to edgeInfo`
-filterEdges :: (BlockId -> BlockId -> EdgeInfo -> Bool) -> CFG -> CFG
-filterEdges f cfg =
- mapMapWithKey filterSources cfg
- where
- filterSources from m =
- mapFilterWithKey (\to w -> f from to w) m
-
-
-{- Note [Updating the CFG during shortcutting]
-
-See Note [What is shortcutting] in the control flow optimization
-code (GHC.Cmm.ContFlowOpt) for a slightly more in depth explanation on shortcutting.
-
-In the native backend we shortcut jumps at the assembly level. (AsmCodeGen.hs)
-This means we remove blocks containing only one jump from the code
-and instead redirecting all jumps targeting this block to the deleted
-blocks jump target.
-
-However we want to have an accurate representation of control
-flow in the CFG. So we add/remove edges accordingly to account
-for the eliminated blocks and new edges.
-
-If we shortcut A -> B -> C to A -> C:
-* We delete edges A -> B and B -> C
-* Replacing them with the edge A -> C
-
-We also try to preserve jump weights while doing so.
-
-Note that:
-* The edge B -> C can't have interesting weights since
- the block B consists of a single unconditional jump without branching.
-* We delete the edge A -> B and add the edge A -> C.
-* The edge A -> B can be one of many edges originating from A so likely
- has edge weights we want to preserve.
-
-For this reason we simply store the edge info from the original A -> B
-edge and apply this information to the new edge A -> C.
-
-Sometimes we have a scenario where jump target C is not represented by an
-BlockId but an immediate value. I'm only aware of this happening without
-tables next to code currently.
-
-Then we go from A ---> B - -> IMM to A - -> IMM where the dashed arrows
-are not stored in the CFG.
-
-In that case we simply delete the edge A -> B.
-
-In terms of implementation the native backend first builds a mapping
-from blocks suitable for shortcutting to their jump targets.
-Then it redirects all jump instructions to these blocks using the
-built up mapping.
-This function (shortcutWeightMap) takes the same mapping and
-applies the mapping to the CFG in the way laid out above.
-
--}
-shortcutWeightMap :: LabelMap (Maybe BlockId) -> CFG -> CFG
-shortcutWeightMap cuts cfg =
- foldl' applyMapping cfg $ mapToList cuts
- where
--- takes the tuple (B,C) from the notation in [Updating the CFG during shortcutting]
- applyMapping :: CFG -> (BlockId,Maybe BlockId) -> CFG
- --Shortcut immediate
- applyMapping m (from, Nothing) =
- mapDelete from .
- fmap (mapDelete from) $ m
- --Regular shortcut
- applyMapping m (from, Just to) =
- let updatedMap :: CFG
- updatedMap
- = fmap (shortcutEdge (from,to)) $
- (mapDelete from m :: CFG )
- --Sometimes we can shortcut multiple blocks like so:
- -- A -> B -> C -> D -> E => A -> E
- -- so we check for such chains.
- in case mapLookup to cuts of
- Nothing -> updatedMap
- Just dest -> applyMapping updatedMap (to, dest)
- --Redirect edge from B to C
- shortcutEdge :: (BlockId, BlockId) -> LabelMap EdgeInfo -> LabelMap EdgeInfo
- shortcutEdge (from, to) m =
- case mapLookup from m of
- Just info -> mapInsert to info $ mapDelete from m
- Nothing -> m
-
--- | Sometimes we insert a block which should unconditionally be executed
--- after a given block. This function updates the CFG for these cases.
--- So we get A -> B => A -> A' -> B
--- \ \
--- -> C => -> C
---
-addImmediateSuccessor :: BlockId -> BlockId -> CFG -> CFG
-addImmediateSuccessor node follower cfg
- = updateEdges . addWeightEdge node follower uncondWeight $ cfg
- where
- uncondWeight = fromIntegral . D.uncondWeight .
- D.cfgWeightInfo $ D.unsafeGlobalDynFlags
- targets = getSuccessorEdges cfg node
- successors = map fst targets :: [BlockId]
- updateEdges = addNewSuccs . remOldSuccs
- remOldSuccs m = foldl' (flip (delEdge node)) m successors
- addNewSuccs m =
- foldl' (\m' (t,info) -> addEdge follower t info m') m targets
-
--- | Adds a new edge, overwrites existing edges if present
-addEdge :: BlockId -> BlockId -> EdgeInfo -> CFG -> CFG
-addEdge from to info cfg =
- mapAlter addFromToEdge from $
- mapAlter addDestNode to cfg
- where
- -- Simply insert the edge into the edge list.
- addFromToEdge Nothing = Just $ mapSingleton to info
- addFromToEdge (Just wm) = Just $ mapInsert to info wm
- -- We must add the destination node explicitly
- addDestNode Nothing = Just $ mapEmpty
- addDestNode n@(Just _) = n
-
-
--- | Adds a edge with the given weight to the cfg
--- If there already existed an edge it is overwritten.
--- `addWeightEdge from to weight cfg`
-addWeightEdge :: BlockId -> BlockId -> EdgeWeight -> CFG -> CFG
-addWeightEdge from to weight cfg =
- addEdge from to (mkWeightInfo weight) cfg
-
-delEdge :: BlockId -> BlockId -> CFG -> CFG
-delEdge from to m =
- mapAlter remDest from m
- where
- remDest Nothing = Nothing
- remDest (Just wm) = Just $ mapDelete to wm
-
-delNode :: BlockId -> CFG -> CFG
-delNode node cfg =
- fmap (mapDelete node) -- < Edges to the node
- (mapDelete node cfg) -- < Edges from the node
-
--- | Destinations from bid ordered by weight (descending)
-getSuccEdgesSorted :: CFG -> BlockId -> [(BlockId,EdgeInfo)]
-getSuccEdgesSorted m bid =
- let destMap = mapFindWithDefault mapEmpty bid m
- cfgEdges = mapToList destMap
- sortedEdges = sortWith (negate . edgeWeight . snd) cfgEdges
- in --pprTrace "getSuccEdgesSorted" (ppr bid <+> text "map:" <+> ppr m)
- sortedEdges
-
--- | Get successors of a given node with edge weights.
-getSuccessorEdges :: HasDebugCallStack => CFG -> BlockId -> [(BlockId,EdgeInfo)]
-getSuccessorEdges m bid = maybe lookupError mapToList (mapLookup bid m)
- where
- lookupError = pprPanic "getSuccessorEdges: Block does not exist" $
- ppr bid <+> pprEdgeWeights m
-
-getEdgeInfo :: BlockId -> BlockId -> CFG -> Maybe EdgeInfo
-getEdgeInfo from to m
- | Just wm <- mapLookup from m
- , Just info <- mapLookup to wm
- = Just $! info
- | otherwise
- = Nothing
-
-getEdgeWeight :: CFG -> BlockId -> BlockId -> EdgeWeight
-getEdgeWeight cfg from to =
- edgeWeight $ expectJust "Edgeweight for noexisting block" $
- getEdgeInfo from to cfg
-
-getTransitionSource :: BlockId -> BlockId -> CFG -> TransitionSource
-getTransitionSource from to cfg = transitionSource $ expectJust "Source info for noexisting block" $
- getEdgeInfo from to cfg
-
-reverseEdges :: CFG -> CFG
-reverseEdges cfg = mapFoldlWithKey (\cfg from toMap -> go (addNode cfg from) from toMap) mapEmpty cfg
- where
- -- We must preserve nodes without outgoing edges!
- addNode :: CFG -> BlockId -> CFG
- addNode cfg b = mapInsertWith mapUnion b mapEmpty cfg
- go :: CFG -> BlockId -> (LabelMap EdgeInfo) -> CFG
- go cfg from toMap = mapFoldlWithKey (\cfg to info -> addEdge to from info cfg) cfg toMap :: CFG
-
-
--- | Returns a unordered list of all edges with info
-infoEdgeList :: CFG -> [CfgEdge]
-infoEdgeList m =
- go (mapToList m) []
- where
- -- We avoid foldMap to avoid thunk buildup
- go :: [(BlockId,LabelMap EdgeInfo)] -> [CfgEdge] -> [CfgEdge]
- go [] acc = acc
- go ((from,toMap):xs) acc
- = go' xs from (mapToList toMap) acc
- go' :: [(BlockId,LabelMap EdgeInfo)] -> BlockId -> [(BlockId,EdgeInfo)] -> [CfgEdge] -> [CfgEdge]
- go' froms _ [] acc = go froms acc
- go' froms from ((to,info):tos) acc
- = go' froms from tos (CfgEdge from to info : acc)
-
--- | Returns a unordered list of all edges without weights
-edgeList :: CFG -> [Edge]
-edgeList m =
- go (mapToList m) []
- where
- -- We avoid foldMap to avoid thunk buildup
- go :: [(BlockId,LabelMap EdgeInfo)] -> [Edge] -> [Edge]
- go [] acc = acc
- go ((from,toMap):xs) acc
- = go' xs from (mapKeys toMap) acc
- go' :: [(BlockId,LabelMap EdgeInfo)] -> BlockId -> [BlockId] -> [Edge] -> [Edge]
- go' froms _ [] acc = go froms acc
- go' froms from (to:tos) acc
- = go' froms from tos ((from,to) : acc)
-
--- | Get successors of a given node without edge weights.
-getSuccessors :: HasDebugCallStack => CFG -> BlockId -> [BlockId]
-getSuccessors m bid
- | Just wm <- mapLookup bid m
- = mapKeys wm
- | otherwise = lookupError
- where
- lookupError = pprPanic "getSuccessors: Block does not exist" $
- ppr bid <+> pprEdgeWeights m
-
-pprEdgeWeights :: CFG -> SDoc
-pprEdgeWeights m =
- let edges = sort $ infoEdgeList m :: [CfgEdge]
- printEdge (CfgEdge from to (EdgeInfo { edgeWeight = weight }))
- = text "\t" <> ppr from <+> text "->" <+> ppr to <>
- text "[label=\"" <> ppr weight <> text "\",weight=\"" <>
- ppr weight <> text "\"];\n"
- --for the case that there are no edges from/to this node.
- --This should rarely happen but it can save a lot of time
- --to immediately see it when it does.
- printNode node
- = text "\t" <> ppr node <> text ";\n"
- getEdgeNodes (CfgEdge from to _) = [from,to]
- edgeNodes = setFromList $ concatMap getEdgeNodes edges :: LabelSet
- nodes = filter (\n -> (not . setMember n) edgeNodes) . mapKeys $ mapFilter null m
- in
- text "digraph {\n" <>
- (foldl' (<>) empty (map printEdge edges)) <>
- (foldl' (<>) empty (map printNode nodes)) <>
- text "}\n"
-
-{-# INLINE updateEdgeWeight #-} --Allows eliminating the tuple when possible
--- | Invariant: The edge **must** exist already in the graph.
-updateEdgeWeight :: (EdgeWeight -> EdgeWeight) -> Edge -> CFG -> CFG
-updateEdgeWeight f (from, to) cfg
- | Just oldInfo <- getEdgeInfo from to cfg
- = let !oldWeight = edgeWeight oldInfo
- !newWeight = f oldWeight
- in addEdge from to (oldInfo {edgeWeight = newWeight}) cfg
- | otherwise
- = panic "Trying to update invalid edge"
-
--- from to oldWeight => newWeight
-mapWeights :: (BlockId -> BlockId -> EdgeWeight -> EdgeWeight) -> CFG -> CFG
-mapWeights f cfg =
- foldl' (\cfg (CfgEdge from to info) ->
- let oldWeight = edgeWeight info
- newWeight = f from to oldWeight
- in addEdge from to (info {edgeWeight = newWeight}) cfg)
- cfg (infoEdgeList cfg)
-
-
--- | Insert a block in the control flow between two other blocks.
--- We pass a list of tuples (A,B,C) where
--- * A -> C: Old edge
--- * A -> B -> C : New Arc, where B is the new block.
--- It's possible that a block has two jumps to the same block
--- in the assembly code. However we still only store a single edge for
--- these cases.
--- We assign the old edge info to the edge A -> B and assign B -> C the
--- weight of an unconditional jump.
-addNodesBetween :: CFG -> [(BlockId,BlockId,BlockId)] -> CFG
-addNodesBetween m updates =
- foldl' updateWeight m .
- weightUpdates $ updates
- where
- weight = fromIntegral . D.uncondWeight .
- D.cfgWeightInfo $ D.unsafeGlobalDynFlags
- -- We might add two blocks for different jumps along a single
- -- edge. So we end up with edges: A -> B -> C , A -> D -> C
- -- in this case after applying the first update the weight for A -> C
- -- is no longer available. So we calculate future weights before updates.
- weightUpdates = map getWeight
- getWeight :: (BlockId,BlockId,BlockId) -> (BlockId,BlockId,BlockId,EdgeInfo)
- getWeight (from,between,old)
- | Just edgeInfo <- getEdgeInfo from old m
- = (from,between,old,edgeInfo)
- | otherwise
- = pprPanic "Can't find weight for edge that should have one" (
- text "triple" <+> ppr (from,between,old) $$
- text "updates" <+> ppr updates $$
- text "cfg:" <+> pprEdgeWeights m )
- updateWeight :: CFG -> (BlockId,BlockId,BlockId,EdgeInfo) -> CFG
- updateWeight m (from,between,old,edgeInfo)
- = addEdge from between edgeInfo .
- addWeightEdge between old weight .
- delEdge from old $ m
-
-{-
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- ~~~ Note [CFG Edge Weights] ~~~
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
- Edge weights assigned do not currently represent a specific
- cost model and rather just a ranking of which blocks should
- be placed next to each other given their connection type in
- the CFG.
- This is especially relevant if we whenever two blocks will
- jump to the same target.
-
- A B
- \ /
- C
-
- Should A or B be placed in front of C? The block layout algorithm
- decides this based on which edge (A,C)/(B,C) is heavier. So we
- make a educated guess on which branch should be preferred.
-
- We rank edges in this order:
- * Unconditional Control Transfer - They will always
- transfer control to their target. Unless there is a info table
- we can turn the jump into a fallthrough as well.
- We use 20k as default, so it's easy to spot if values have been
- modified but unlikely that we run into issues with overflow.
- * If branches (likely) - We assume branches marked as likely
- are taken more than 80% of the time.
- By ranking them below unconditional jumps we make sure we
- prefer the unconditional if there is a conditional and
- unconditional edge towards a block.
- * If branches (regular) - The false branch can potentially be turned
- into a fallthrough so we prefer it slightly over the true branch.
- * Unlikely branches - These can be assumed to be taken less than 20%
- of the time. So we given them one of the lowest priorities.
- * Switches - Switches at this level are implemented as jump tables
- so have a larger number of successors. So without more information
- we can only say that each individual successor is unlikely to be
- jumped to and we rank them accordingly.
- * Calls - We currently ignore calls completely:
- * By the time we return from a call there is a good chance
- that the address we return to has already been evicted from
- cache eliminating a main advantage sequential placement brings.
- * Calls always require a info table in front of their return
- address. This reduces the chance that we return to the same
- cache line further.
-
--}
--- | Generate weights for a Cmm proc based on some simple heuristics.
-getCfgProc :: D.CfgWeights -> RawCmmDecl -> CFG
-getCfgProc _ (CmmData {}) = mapEmpty
-getCfgProc weights (CmmProc _info _lab _live graph) = getCfg weights graph
-
-getCfg :: D.CfgWeights -> CmmGraph -> CFG
-getCfg weights graph =
- foldl' insertEdge edgelessCfg $ concatMap getBlockEdges blocks
- where
- D.CFGWeights
- { D.uncondWeight = uncondWeight
- , D.condBranchWeight = condBranchWeight
- , D.switchWeight = switchWeight
- , D.callWeight = callWeight
- , D.likelyCondWeight = likelyCondWeight
- , D.unlikelyCondWeight = unlikelyCondWeight
- -- Last two are used in other places
- --, D.infoTablePenalty = infoTablePenalty
- --, D.backEdgeBonus = backEdgeBonus
- } = weights
- -- Explicitly add all nodes to the cfg to ensure they are part of the
- -- CFG.
- edgelessCfg = mapFromList $ zip (map G.entryLabel blocks) (repeat mapEmpty)
- insertEdge :: CFG -> ((BlockId,BlockId),EdgeInfo) -> CFG
- insertEdge m ((from,to),weight) =
- mapAlter f from m
- where
- f :: Maybe (LabelMap EdgeInfo) -> Maybe (LabelMap EdgeInfo)
- f Nothing = Just $ mapSingleton to weight
- f (Just destMap) = Just $ mapInsert to weight destMap
- getBlockEdges :: CmmBlock -> [((BlockId,BlockId),EdgeInfo)]
- getBlockEdges block =
- case branch of
- CmmBranch dest -> [mkEdge dest uncondWeight]
- CmmCondBranch cond t f l
- | l == Nothing ->
- [mkEdge f condBranchWeight, mkEdge t condBranchWeight]
- | l == Just True ->
- [mkEdge f unlikelyCondWeight, mkEdge t likelyCondWeight]
- | l == Just False ->
- [mkEdge f likelyCondWeight, mkEdge t unlikelyCondWeight]
- where
- mkEdgeInfo = -- pprTrace "Info" (ppr branchInfo <+> ppr cond)
- EdgeInfo (CmmSource branch branchInfo) . fromIntegral
- mkEdge target weight = ((bid,target), mkEdgeInfo weight)
- branchInfo =
- foldRegsUsed
- (panic "foldRegsDynFlags")
- (\info r -> if r == SpLim || r == HpLim || r == BaseReg
- then HeapStackCheck else info)
- NoInfo cond
-
- (CmmSwitch _e ids) ->
- let switchTargets = switchTargetsToList ids
- --Compiler performance hack - for very wide switches don't
- --consider targets for layout.
- adjustedWeight =
- if (length switchTargets > 10) then -1 else switchWeight
- in map (\x -> mkEdge x adjustedWeight) switchTargets
- (CmmCall { cml_cont = Just cont}) -> [mkEdge cont callWeight]
- (CmmForeignCall {Cmm.succ = cont}) -> [mkEdge cont callWeight]
- (CmmCall { cml_cont = Nothing }) -> []
- other ->
- panic "Foo" $
- ASSERT2(False, ppr "Unknown successor cause:" <>
- (ppr branch <+> text "=>" <> ppr (G.successors other)))
- map (\x -> ((bid,x),mkEdgeInfo 0)) $ G.successors other
- where
- bid = G.entryLabel block
- mkEdgeInfo = EdgeInfo (CmmSource branch NoInfo) . fromIntegral
- mkEdge target weight = ((bid,target), mkEdgeInfo weight)
- branch = lastNode block :: CmmNode O C
-
- blocks = revPostorder graph :: [CmmBlock]
-
---Find back edges by BFS
-findBackEdges :: HasDebugCallStack => BlockId -> CFG -> Edges
-findBackEdges root cfg =
- --pprTraceIt "Backedges:" $
- map fst .
- filter (\x -> snd x == Backward) $ typedEdges
- where
- edges = edgeList cfg :: [(BlockId,BlockId)]
- getSuccs = getSuccessors cfg :: BlockId -> [BlockId]
- typedEdges =
- classifyEdges root getSuccs edges :: [((BlockId,BlockId),EdgeType)]
-
-
-optimizeCFG :: D.CfgWeights -> RawCmmDecl -> CFG -> CFG
-optimizeCFG _ (CmmData {}) cfg = cfg
-optimizeCFG weights (CmmProc info _lab _live graph) cfg =
- {-# SCC optimizeCFG #-}
- -- pprTrace "Initial:" (pprEdgeWeights cfg) $
- -- pprTrace "Initial:" (ppr $ mkGlobalWeights (g_entry graph) cfg) $
-
- -- pprTrace "LoopInfo:" (ppr $ loopInfo cfg (g_entry graph)) $
- favourFewerPreds .
- penalizeInfoTables info .
- increaseBackEdgeWeight (g_entry graph) $ cfg
- where
-
- -- | Increase the weight of all backedges in the CFG
- -- this helps to make loop jumpbacks the heaviest edges
- increaseBackEdgeWeight :: BlockId -> CFG -> CFG
- increaseBackEdgeWeight root cfg =
- let backedges = findBackEdges root cfg
- update weight
- --Keep irrelevant edges irrelevant
- | weight <= 0 = 0
- | otherwise
- = weight + fromIntegral (D.backEdgeBonus weights)
- in foldl' (\cfg edge -> updateEdgeWeight update edge cfg)
- cfg backedges
-
- -- | Since we cant fall through info tables we penalize these.
- penalizeInfoTables :: LabelMap a -> CFG -> CFG
- penalizeInfoTables info cfg =
- mapWeights fupdate cfg
- where
- fupdate :: BlockId -> BlockId -> EdgeWeight -> EdgeWeight
- fupdate _ to weight
- | mapMember to info
- = weight - (fromIntegral $ D.infoTablePenalty weights)
- | otherwise = weight
-
- -- | If a block has two successors, favour the one with fewer
- -- predecessors and/or the one allowing fall through.
- favourFewerPreds :: CFG -> CFG
- favourFewerPreds cfg =
- let
- revCfg =
- reverseEdges $ filterEdges
- (\_from -> fallthroughTarget) cfg
-
- predCount n = length $ getSuccessorEdges revCfg n
- nodes = getCfgNodes cfg
-
- modifiers :: Int -> Int -> (EdgeWeight, EdgeWeight)
- modifiers preds1 preds2
- | preds1 < preds2 = ( 1,-1)
- | preds1 == preds2 = ( 0, 0)
- | otherwise = (-1, 1)
-
- update :: CFG -> BlockId -> CFG
- update cfg node
- | [(s1,e1),(s2,e2)] <- getSuccessorEdges cfg node
- , !w1 <- edgeWeight e1
- , !w2 <- edgeWeight e2
- --Only change the weights if there isn't already a ordering.
- , w1 == w2
- , (mod1,mod2) <- modifiers (predCount s1) (predCount s2)
- = (\cfg' ->
- (adjustEdgeWeight cfg' (+mod2) node s2))
- (adjustEdgeWeight cfg (+mod1) node s1)
- | otherwise
- = cfg
- in foldl' update cfg nodes
- where
- fallthroughTarget :: BlockId -> EdgeInfo -> Bool
- fallthroughTarget to (EdgeInfo source _weight)
- | mapMember to info = False
- | AsmCodeGen <- source = True
- | CmmSource { trans_cmmNode = CmmBranch {} } <- source = True
- | CmmSource { trans_cmmNode = CmmCondBranch {} } <- source = True
- | otherwise = False
-
--- | Determine loop membership of blocks based on SCC analysis
--- This is faster but only gives yes/no answers.
-loopMembers :: HasDebugCallStack => CFG -> LabelMap Bool
-loopMembers cfg =
- foldl' (flip setLevel) mapEmpty sccs
- where
- mkNode :: BlockId -> Node BlockId BlockId
- mkNode bid = DigraphNode bid bid (getSuccessors cfg bid)
- nodes = map mkNode (getCfgNodes cfg)
-
- sccs = stronglyConnCompFromEdgedVerticesOrd nodes
-
- setLevel :: SCC BlockId -> LabelMap Bool -> LabelMap Bool
- setLevel (AcyclicSCC bid) m = mapInsert bid False m
- setLevel (CyclicSCC bids) m = foldl' (\m k -> mapInsert k True m) m bids
-
-loopLevels :: CFG -> BlockId -> LabelMap Int
-loopLevels cfg root = liLevels loopInfos
- where
- loopInfos = loopInfo cfg root
-
-data LoopInfo = LoopInfo
- { liBackEdges :: [(Edge)] -- ^ List of back edges
- , liLevels :: LabelMap Int -- ^ BlockId -> LoopLevel mapping
- , liLoops :: [(Edge, LabelSet)] -- ^ (backEdge, loopBody), body includes header
- }
-
-instance Outputable LoopInfo where
- ppr (LoopInfo _ _lvls loops) =
- text "Loops:(backEdge, bodyNodes)" $$
- (vcat $ map ppr loops)
-
-{- Note [Determining the loop body]
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
- Starting with the knowledge that:
- * head dominates the loop
- * `tail` -> `head` is a backedge
-
- We can determine all nodes by:
- * Deleting the loop head from the graph.
- * Collect all blocks which are reachable from the `tail`.
-
- We do so by performing bfs from the tail node towards the head.
- -}
-
--- | Determine loop membership of blocks based on Dominator analysis.
--- This is slower but gives loop levels instead of just loop membership.
--- However it only detects natural loops. Irreducible control flow is not
--- recognized even if it loops. But that is rare enough that we don't have
--- to care about that special case.
-loopInfo :: HasDebugCallStack => CFG -> BlockId -> LoopInfo
-loopInfo cfg root = LoopInfo { liBackEdges = backEdges
- , liLevels = mapFromList loopCounts
- , liLoops = loopBodies }
- where
- revCfg = reverseEdges cfg
-
- graph = -- pprTrace "CFG - loopInfo" (pprEdgeWeights cfg) $
- fmap (setFromList . mapKeys ) cfg :: LabelMap LabelSet
-
-
- --TODO - This should be a no op: Export constructors? Use unsafeCoerce? ...
- rooted = ( fromBlockId root
- , toIntMap $ fmap toIntSet graph) :: (Int, IntMap IntSet)
- tree = fmap toBlockId $ Dom.domTree rooted :: Tree BlockId
-
- -- Map from Nodes to their dominators
- domMap :: LabelMap LabelSet
- domMap = mkDomMap tree
-
- edges = edgeList cfg :: [(BlockId, BlockId)]
- -- We can't recompute nodes from edges, there might be blocks not connected via edges.
- nodes = getCfgNodes cfg :: [BlockId]
-
- -- identify back edges
- isBackEdge (from,to)
- | Just doms <- mapLookup from domMap
- , setMember to doms
- = True
- | otherwise = False
-
- -- See Note [Determining the loop body]
- -- Get the loop body associated with a back edge.
- findBody edge@(tail, head)
- = ( edge, setInsert head $ go (setSingleton tail) (setSingleton tail) )
- where
- -- See Note [Determining the loop body]
- cfg' = delNode head revCfg
-
- go :: LabelSet -> LabelSet -> LabelSet
- go found current
- | setNull current = found
- | otherwise = go (setUnion newSuccessors found)
- newSuccessors
- where
- -- Really predecessors, since we use the reversed cfg.
- newSuccessors = setFilter (\n -> not $ setMember n found) successors :: LabelSet
- successors = setFromList $ concatMap
- (getSuccessors cfg')
- -- we filter head as it's no longer part of the cfg.
- (filter (/= head) $ setElems current) :: LabelSet
-
- backEdges = filter isBackEdge edges
- loopBodies = map findBody backEdges :: [(Edge, LabelSet)]
-
- -- Block b is part of n loop bodies => loop nest level of n
- loopCounts =
- let bodies = map (first snd) loopBodies -- [(Header, Body)]
- loopCount n = length $ nub . map fst . filter (setMember n . snd) $ bodies
- in map (\n -> (n, loopCount n)) $ nodes :: [(BlockId, Int)]
-
- toIntSet :: LabelSet -> IntSet
- toIntSet s = IS.fromList . map fromBlockId . setElems $ s
- toIntMap :: LabelMap a -> IntMap a
- toIntMap m = IM.fromList $ map (\(x,y) -> (fromBlockId x,y)) $ mapToList m
-
- mkDomMap :: Tree BlockId -> LabelMap LabelSet
- mkDomMap root = mapFromList $ go setEmpty root
- where
- go :: LabelSet -> Tree BlockId -> [(Label,LabelSet)]
- go parents (Node lbl [])
- = [(lbl, parents)]
- go parents (Node _ leaves)
- = let nodes = map rootLabel leaves
- entries = map (\x -> (x,parents)) nodes
- in entries ++ concatMap
- (\n -> go (setInsert (rootLabel n) parents) n)
- leaves
-
- fromBlockId :: BlockId -> Int
- fromBlockId = getKey . getUnique
-
- toBlockId :: Int -> BlockId
- toBlockId = mkBlockId . mkUniqueGrimily
-
--- We make the CFG a Hoopl Graph, so we can reuse revPostOrder.
-newtype BlockNode (e :: Extensibility) (x :: Extensibility) = BN (BlockId,[BlockId])
-
-instance G.NonLocal (BlockNode) where
- entryLabel (BN (lbl,_)) = lbl
- successors (BN (_,succs)) = succs
-
-revPostorderFrom :: HasDebugCallStack => CFG -> BlockId -> [BlockId]
-revPostorderFrom cfg root =
- map fromNode $ G.revPostorderFrom hooplGraph root
- where
- nodes = getCfgNodes cfg
- hooplGraph = foldl' (\m n -> mapInsert n (toNode n) m) mapEmpty nodes
-
- fromNode :: BlockNode C C -> BlockId
- fromNode (BN x) = fst x
-
- toNode :: BlockId -> BlockNode C C
- toNode bid =
- BN (bid,getSuccessors cfg $ bid)
-
-
--- | We take in a CFG which has on its edges weights which are
--- relative only to other edges originating from the same node.
---
--- We return a CFG for which each edge represents a GLOBAL weight.
--- This means edge weights are comparable across the whole graph.
---
--- For irreducible control flow results might be imprecise, otherwise they
--- are reliable.
---
--- The algorithm is based on the Paper
--- "Static Branch Prediction and Program Profile Analysis" by Y Wu, JR Larus
--- The only big change is that we go over the nodes in the body of loops in
--- reverse post order. Which is required for diamond control flow to work probably.
---
--- We also apply a few prediction heuristics (based on the same paper)
-
-{-# NOINLINE mkGlobalWeights #-}
-{-# SCC mkGlobalWeights #-}
-mkGlobalWeights :: HasDebugCallStack => BlockId -> CFG -> (LabelMap Double, LabelMap (LabelMap Double))
-mkGlobalWeights root localCfg
- | null localCfg = panic "Error - Empty CFG"
- | otherwise
- = (blockFreqs', edgeFreqs')
- where
- -- Calculate fixpoints
- (blockFreqs, edgeFreqs) = calcFreqs nodeProbs backEdges' bodies' revOrder'
- blockFreqs' = mapFromList $ map (first fromVertex) (assocs blockFreqs) :: LabelMap Double
- edgeFreqs' = fmap fromVertexMap $ fromVertexMap edgeFreqs
-
- fromVertexMap :: IM.IntMap x -> LabelMap x
- fromVertexMap m = mapFromList . map (first fromVertex) $ IM.toList m
-
- revOrder = revPostorderFrom localCfg root :: [BlockId]
- loopResults@(LoopInfo backedges _levels bodies) = loopInfo localCfg root
-
- revOrder' = map toVertex revOrder
- backEdges' = map (bimap toVertex toVertex) backedges
- bodies' = map calcBody bodies
-
- estimatedCfg = staticBranchPrediction root loopResults localCfg
- -- Normalize the weights to probabilities and apply heuristics
- nodeProbs = cfgEdgeProbabilities estimatedCfg toVertex
-
- -- By mapping vertices to numbers in reverse post order we can bring any subset into reverse post
- -- order simply by sorting.
- -- TODO: The sort is redundant if we can guarantee that setElems returns elements ascending
- calcBody (backedge, blocks) =
- (toVertex $ snd backedge, sort . map toVertex $ (setElems blocks))
-
- vertexMapping = mapFromList $ zip revOrder [0..] :: LabelMap Int
- blockMapping = listArray (0,mapSize vertexMapping - 1) revOrder :: Array Int BlockId
- -- Map from blockId to indices starting at zero
- toVertex :: BlockId -> Int
- toVertex blockId = expectJust "mkGlobalWeights" $ mapLookup blockId vertexMapping
- -- Map from indices starting at zero to blockIds
- fromVertex :: Int -> BlockId
- fromVertex vertex = blockMapping ! vertex
-
-{- Note [Static Branch Prediction]
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-The work here has been based on the paper
-"Static Branch Prediction and Program Profile Analysis" by Y Wu, JR Larus.
-
-The primary differences are that if we branch on the result of a heap
-check we do not apply any of the heuristics.
-The reason is simple: They look like loops in the control flow graph
-but are usually never entered, and if at most once.
-
-Currently implemented is a heuristic to predict that we do not exit
-loops (lehPredicts) and one to predict that backedges are more likely
-than any other edge.
-
-The back edge case is special as it superceeds any other heuristic if it
-applies.
-
-Do NOT rely solely on nofib results for benchmarking this. I recommend at least
-comparing megaparsec and container benchmarks. Nofib does not seeem to have
-many instances of "loopy" Cmm where these make a difference.
-
-TODO:
-* The paper containers more benchmarks which should be implemented.
-* If we turn the likelihood on if/else branches into a probability
- instead of true/false we could implement this as a Cmm pass.
- + The complete Cmm code still exists and can be accessed by the heuristics
- + There is no chance of register allocation/codegen inserting branches/blocks
- + making the TransitionSource info wrong.
- + potential to use this information in CmmPasses.
- - Requires refactoring of all the code relying on the binary nature of likelihood.
- - Requires refactoring `loopInfo` to work on both, Cmm Graphs and the backend CFG.
--}
-
--- | Combination of target node id and information about the branch
--- we are looking at.
-type TargetNodeInfo = (BlockId, EdgeInfo)
-
-
--- | Update branch weights based on certain heuristics.
--- See Note [Static Branch Prediction]
--- TODO: This should be combined with optimizeCFG
-{-# SCC staticBranchPrediction #-}
-staticBranchPrediction :: BlockId -> LoopInfo -> CFG -> CFG
-staticBranchPrediction _root (LoopInfo l_backEdges loopLevels l_loops) cfg =
- -- pprTrace "staticEstimatesOn" (ppr (cfg)) $
- foldl' update cfg nodes
- where
- nodes = getCfgNodes cfg
- backedges = S.fromList $ l_backEdges
- -- Loops keyed by their back edge
- loops = M.fromList $ l_loops :: M.Map Edge LabelSet
- loopHeads = S.fromList $ map snd $ M.keys loops
-
- update :: CFG -> BlockId -> CFG
- update cfg node
- -- No successors, nothing to do.
- | null successors = cfg
-
- -- Mix of backedges and others:
- -- Always predict the backedges.
- | not (null m) && length m < length successors
- -- Heap/Stack checks "loop", but only once.
- -- So we simply exclude any case involving them.
- , not $ any (isHeapOrStackCheck . transitionSource . snd) successors
- = let loopChance = repeat $! pred_LBH / (fromIntegral $ length m)
- exitChance = repeat $! (1 - pred_LBH) / fromIntegral (length not_m)
- updates = zip (map fst m) loopChance ++ zip (map fst not_m) exitChance
- in -- pprTrace "mix" (ppr (node,successors)) $
- foldl' (\cfg (to,weight) -> setEdgeWeight cfg weight node to) cfg updates
-
- -- For (regular) non-binary branches we keep the weights from the STG -> Cmm translation.
- | length successors /= 2
- = cfg
-
- -- Only backedges - no need to adjust
- | length m > 0
- = cfg
-
- -- A regular binary branch, we can plug addition predictors in here.
- | [(s1,s1_info),(s2,s2_info)] <- successors
- , not $ any (isHeapOrStackCheck . transitionSource . snd) successors
- = -- Normalize weights to total of 1
- let !w1 = max (edgeWeight s1_info) (0)
- !w2 = max (edgeWeight s2_info) (0)
- -- Of both weights are <= 0 we set both to 0.5
- normalizeWeight w = if w1 + w2 == 0 then 0.5 else w/(w1+w2)
- !cfg' = setEdgeWeight cfg (normalizeWeight w1) node s1
- !cfg'' = setEdgeWeight cfg' (normalizeWeight w2) node s2
-
- -- Figure out which heuristics apply to these successors
- heuristics = map ($ ((s1,s1_info),(s2,s2_info)))
- [lehPredicts, phPredicts, ohPredicts, ghPredicts, lhhPredicts, chPredicts
- , shPredicts, rhPredicts]
- -- Apply result of a heuristic. Argument is the likelihood
- -- predicted for s1.
- applyHeuristic :: CFG -> Maybe Prob -> CFG
- applyHeuristic cfg Nothing = cfg
- applyHeuristic cfg (Just (s1_pred :: Double))
- | s1_old == 0 || s2_old == 0 ||
- isHeapOrStackCheck (transitionSource s1_info) ||
- isHeapOrStackCheck (transitionSource s2_info)
- = cfg
- | otherwise =
- let -- Predictions from heuristic
- s1_prob = EdgeWeight s1_pred :: EdgeWeight
- s2_prob = 1.0 - s1_prob
- -- Update
- d = (s1_old * s1_prob) + (s2_old * s2_prob) :: EdgeWeight
- s1_prob' = s1_old * s1_prob / d
- !s2_prob' = s2_old * s2_prob / d
- !cfg_s1 = setEdgeWeight cfg s1_prob' node s1
- in -- pprTrace "Applying heuristic!" (ppr (node,s1,s2) $$ ppr (s1_prob', s2_prob')) $
- setEdgeWeight cfg_s1 s2_prob' node s2
- where
- -- Old weights
- s1_old = getEdgeWeight cfg node s1
- s2_old = getEdgeWeight cfg node s2
-
- in
- -- pprTraceIt "RegularCfgResult" $
- foldl' applyHeuristic cfg'' heuristics
-
- -- Branch on heap/stack check
- | otherwise = cfg
-
- where
- -- Chance that loops are taken.
- pred_LBH = 0.875
- -- successors
- successors = getSuccessorEdges cfg node
- -- backedges
- (m,not_m) = partition (\succ -> S.member (node, fst succ) backedges) successors
-
- -- Heuristics return nothing if they don't say anything about this branch
- -- or Just (prob_s1) where prob_s1 is the likelihood for s1 to be the
- -- taken branch. s1 is the branch in the true case.
-
- -- Loop exit heuristic.
- -- We are unlikely to leave a loop unless it's to enter another one.
- pred_LEH = 0.75
- -- If and only if no successor is a loopheader,
- -- then we will likely not exit the current loop body.
- lehPredicts :: (TargetNodeInfo,TargetNodeInfo) -> Maybe Prob
- lehPredicts ((s1,_s1_info),(s2,_s2_info))
- | S.member s1 loopHeads || S.member s2 loopHeads
- = Nothing
-
- | otherwise
- = --pprTrace "lehPredict:" (ppr $ compare s1Level s2Level) $
- case compare s1Level s2Level of
- EQ -> Nothing
- LT -> Just (1-pred_LEH) --s1 exits to a shallower loop level (exits loop)
- GT -> Just (pred_LEH) --s1 exits to a deeper loop level
- where
- s1Level = mapLookup s1 loopLevels
- s2Level = mapLookup s2 loopLevels
-
- -- Comparing to a constant is unlikely to be equal.
- ohPredicts (s1,_s2)
- | CmmSource { trans_cmmNode = src1 } <- getTransitionSource node (fst s1) cfg
- , CmmCondBranch cond ltrue _lfalse likely <- src1
- , likely == Nothing
- , CmmMachOp mop args <- cond
- , MO_Eq {} <- mop
- , not (null [x | x@CmmLit{} <- args])
- = if fst s1 == ltrue then Just 0.3 else Just 0.7
-
- | otherwise
- = Nothing
-
- -- TODO: These are all the other heuristics from the paper.
- -- Not all will apply, for now we just stub them out as Nothing.
- phPredicts = const Nothing
- ghPredicts = const Nothing
- lhhPredicts = const Nothing
- chPredicts = const Nothing
- shPredicts = const Nothing
- rhPredicts = const Nothing
-
--- We normalize all edge weights as probabilities between 0 and 1.
--- Ignoring rounding errors all outgoing edges sum up to 1.
-cfgEdgeProbabilities :: CFG -> (BlockId -> Int) -> IM.IntMap (IM.IntMap Prob)
-cfgEdgeProbabilities cfg toVertex
- = mapFoldlWithKey foldEdges IM.empty cfg
- where
- foldEdges = (\m from toMap -> IM.insert (toVertex from) (normalize toMap) m)
-
- normalize :: (LabelMap EdgeInfo) -> (IM.IntMap Prob)
- normalize weightMap
- | edgeCount <= 1 = mapFoldlWithKey (\m k _ -> IM.insert (toVertex k) 1.0 m) IM.empty weightMap
- | otherwise = mapFoldlWithKey (\m k _ -> IM.insert (toVertex k) (normalWeight k) m) IM.empty weightMap
- where
- edgeCount = mapSize weightMap
- -- Negative weights are generally allowed but are mapped to zero.
- -- We then check if there is at least one non-zero edge and if not
- -- assign uniform weights to all branches.
- minWeight = 0 :: Prob
- weightMap' = fmap (\w -> max (weightToDouble . edgeWeight $ w) minWeight) weightMap
- totalWeight = sum weightMap'
-
- normalWeight :: BlockId -> Prob
- normalWeight bid
- | totalWeight == 0
- = 1.0 / fromIntegral edgeCount
- | Just w <- mapLookup bid weightMap'
- = w/totalWeight
- | otherwise = panic "impossible"
-
--- This is the fixpoint algorithm from
--- "Static Branch Prediction and Program Profile Analysis" by Y Wu, JR Larus
--- The adaption to Haskell is my own.
-calcFreqs :: IM.IntMap (IM.IntMap Prob) -> [(Int,Int)] -> [(Int, [Int])] -> [Int]
- -> (Array Int Double, IM.IntMap (IM.IntMap Prob))
-calcFreqs graph backEdges loops revPostOrder = runST $ do
- visitedNodes <- newArray (0,nodeCount-1) False :: ST s (STUArray s Int Bool)
- blockFreqs <- newArray (0,nodeCount-1) 0.0 :: ST s (STUArray s Int Double)
- edgeProbs <- newSTRef graph
- edgeBackProbs <- newSTRef graph
-
- -- let traceArray a = do
- -- vs <- forM [0..nodeCount-1] $ \i -> readArray a i >>= (\v -> return (i,v))
- -- trace ("array: " ++ show vs) $ return ()
-
- let -- See #1600, we need to inline or unboxing makes perf worse.
- -- {-# INLINE getFreq #-}
- {-# INLINE visited #-}
- visited b = unsafeRead visitedNodes b
- getFreq b = unsafeRead blockFreqs b
- -- setFreq :: forall s. Int -> Double -> ST s ()
- setFreq b f = unsafeWrite blockFreqs b f
- -- setVisited :: forall s. Node -> ST s ()
- setVisited b = unsafeWrite visitedNodes b True
- -- Frequency/probability that edge is taken.
- getProb' arr b1 b2 = readSTRef arr >>=
- (\graph ->
- return .
- fromMaybe (error "getFreq 1") .
- IM.lookup b2 .
- fromMaybe (error "getFreq 2") $
- (IM.lookup b1 graph)
- )
- setProb' arr b1 b2 prob = do
- g <- readSTRef arr
- let !m = fromMaybe (error "Foo") $ IM.lookup b1 g
- !m' = IM.insert b2 prob m
- writeSTRef arr $! (IM.insert b1 m' g)
-
- getEdgeFreq b1 b2 = getProb' edgeProbs b1 b2
- setEdgeFreq b1 b2 = setProb' edgeProbs b1 b2
- getProb b1 b2 = fromMaybe (error "getProb") $ do
- m' <- IM.lookup b1 graph
- IM.lookup b2 m'
-
- getBackProb b1 b2 = getProb' edgeBackProbs b1 b2
- setBackProb b1 b2 = setProb' edgeBackProbs b1 b2
-
-
- let -- calcOutFreqs :: Node -> ST s ()
- calcOutFreqs bhead block = do
- !f <- getFreq block
- forM (successors block) $ \bi -> do
- let !prob = getProb block bi
- let !succFreq = f * prob
- setEdgeFreq block bi succFreq
- -- traceM $ "SetOut: " ++ show (block, bi, f, prob, succFreq)
- when (bi == bhead) $ setBackProb block bi succFreq
-
-
- let propFreq block head = do
- -- traceM ("prop:" ++ show (block,head))
- -- traceShowM block
-
- !v <- visited block
- if v then
- return () --Dont look at nodes twice
- else if block == head then
- setFreq block 1.0 -- Loop header frequency is always 1
- else do
- let preds = IS.elems $ predecessors block
- irreducible <- (fmap or) $ forM preds $ \bp -> do
- !bp_visited <- visited bp
- let bp_backedge = isBackEdge bp block
- return (not bp_visited && not bp_backedge)
-
- if irreducible
- then return () -- Rare we don't care
- else do
- setFreq block 0
- !cycleProb <- sum <$> (forM preds $ \pred -> do
- if isBackEdge pred block
- then
- getBackProb pred block
- else do
- !f <- getFreq block
- !prob <- getEdgeFreq pred block
- setFreq block $! f + prob
- return 0)
- -- traceM $ "cycleProb:" ++ show cycleProb
- let limit = 1 - 1/512 -- Paper uses 1 - epsilon, but this works.
- -- determines how large likelyhoods in loops can grow.
- !cycleProb <- return $ min cycleProb limit -- <- return $ if cycleProb > limit then limit else cycleProb
- -- traceM $ "cycleProb:" ++ show cycleProb
-
- !f <- getFreq block
- setFreq block (f / (1.0 - cycleProb))
-
- setVisited block
- calcOutFreqs head block
-
- -- Loops, by nesting, inner to outer
- forM_ loops $ \(head, body) -> do
- forM_ [0 .. nodeCount - 1] (\i -> unsafeWrite visitedNodes i True) -- Mark all nodes as visited.
- forM_ body (\i -> unsafeWrite visitedNodes i False) -- Mark all blocks reachable from head as not visited
- forM_ body $ \block -> propFreq block head
-
- -- After dealing with all loops, deal with non-looping parts of the CFG
- forM_ [0 .. nodeCount - 1] (\i -> unsafeWrite visitedNodes i False) -- Everything in revPostOrder is reachable
- forM_ revPostOrder $ \block -> propFreq block (head revPostOrder)
-
- -- trace ("Final freqs:") $ return ()
- -- let freqString = pprFreqs freqs
- -- trace (unlines freqString) $ return ()
- -- trace (pprFre) $ return ()
- graph' <- readSTRef edgeProbs
- freqs' <- unsafeFreeze blockFreqs
-
- return (freqs', graph')
- where
- -- How can these lookups fail? Consider the CFG [A -> B]
- predecessors :: Int -> IS.IntSet
- predecessors b = fromMaybe IS.empty $ IM.lookup b revGraph
- successors :: Int -> [Int]
- successors b = fromMaybe (lookupError "succ" b graph)$ IM.keys <$> IM.lookup b graph
- lookupError s b g = pprPanic ("Lookup error " ++ s) $
- ( text "node" <+> ppr b $$
- text "graph" <+>
- vcat (map (\(k,m) -> ppr (k,m :: IM.IntMap Double)) $ IM.toList g)
- )
-
- nodeCount = IM.foldl' (\count toMap -> IM.foldlWithKey' countTargets count toMap) (IM.size graph) graph
- where
- countTargets = (\count k _ -> countNode k + count )
- countNode n = if IM.member n graph then 0 else 1
-
- isBackEdge from to = S.member (from,to) backEdgeSet
- backEdgeSet = S.fromList backEdges
-
- revGraph :: IntMap IntSet
- revGraph = IM.foldlWithKey' (\m from toMap -> addEdges m from toMap) IM.empty graph
- where
- addEdges m0 from toMap = IM.foldlWithKey' (\m k _ -> addEdge m from k) m0 toMap
- addEdge m0 from to = IM.insertWith IS.union to (IS.singleton from) m0
diff --git a/compiler/nativeGen/CPrim.hs b/compiler/nativeGen/CPrim.hs
deleted file mode 100644
index 344e62d53c..0000000000
--- a/compiler/nativeGen/CPrim.hs
+++ /dev/null
@@ -1,133 +0,0 @@
--- | Generating C symbol names emitted by the compiler.
-module CPrim
- ( atomicReadLabel
- , atomicWriteLabel
- , atomicRMWLabel
- , cmpxchgLabel
- , popCntLabel
- , pdepLabel
- , pextLabel
- , bSwapLabel
- , bRevLabel
- , clzLabel
- , ctzLabel
- , word2FloatLabel
- ) where
-
-import GhcPrelude
-
-import GHC.Cmm.Type
-import GHC.Cmm.MachOp
-import Outputable
-
-popCntLabel :: Width -> String
-popCntLabel w = "hs_popcnt" ++ pprWidth w
- where
- pprWidth W8 = "8"
- pprWidth W16 = "16"
- pprWidth W32 = "32"
- pprWidth W64 = "64"
- pprWidth w = pprPanic "popCntLabel: Unsupported word width " (ppr w)
-
-pdepLabel :: Width -> String
-pdepLabel w = "hs_pdep" ++ pprWidth w
- where
- pprWidth W8 = "8"
- pprWidth W16 = "16"
- pprWidth W32 = "32"
- pprWidth W64 = "64"
- pprWidth w = pprPanic "pdepLabel: Unsupported word width " (ppr w)
-
-pextLabel :: Width -> String
-pextLabel w = "hs_pext" ++ pprWidth w
- where
- pprWidth W8 = "8"
- pprWidth W16 = "16"
- pprWidth W32 = "32"
- pprWidth W64 = "64"
- pprWidth w = pprPanic "pextLabel: Unsupported word width " (ppr w)
-
-bSwapLabel :: Width -> String
-bSwapLabel w = "hs_bswap" ++ pprWidth w
- where
- pprWidth W16 = "16"
- pprWidth W32 = "32"
- pprWidth W64 = "64"
- pprWidth w = pprPanic "bSwapLabel: Unsupported word width " (ppr w)
-
-bRevLabel :: Width -> String
-bRevLabel w = "hs_bitrev" ++ pprWidth w
- where
- pprWidth W8 = "8"
- pprWidth W16 = "16"
- pprWidth W32 = "32"
- pprWidth W64 = "64"
- pprWidth w = pprPanic "bRevLabel: Unsupported word width " (ppr w)
-
-clzLabel :: Width -> String
-clzLabel w = "hs_clz" ++ pprWidth w
- where
- pprWidth W8 = "8"
- pprWidth W16 = "16"
- pprWidth W32 = "32"
- pprWidth W64 = "64"
- pprWidth w = pprPanic "clzLabel: Unsupported word width " (ppr w)
-
-ctzLabel :: Width -> String
-ctzLabel w = "hs_ctz" ++ pprWidth w
- where
- pprWidth W8 = "8"
- pprWidth W16 = "16"
- pprWidth W32 = "32"
- pprWidth W64 = "64"
- pprWidth w = pprPanic "ctzLabel: Unsupported word width " (ppr w)
-
-word2FloatLabel :: Width -> String
-word2FloatLabel w = "hs_word2float" ++ pprWidth w
- where
- pprWidth W32 = "32"
- pprWidth W64 = "64"
- pprWidth w = pprPanic "word2FloatLabel: Unsupported word width " (ppr w)
-
-atomicRMWLabel :: Width -> AtomicMachOp -> String
-atomicRMWLabel w amop = "hs_atomic_" ++ pprFunName amop ++ pprWidth w
- where
- pprWidth W8 = "8"
- pprWidth W16 = "16"
- pprWidth W32 = "32"
- pprWidth W64 = "64"
- pprWidth w = pprPanic "atomicRMWLabel: Unsupported word width " (ppr w)
-
- pprFunName AMO_Add = "add"
- pprFunName AMO_Sub = "sub"
- pprFunName AMO_And = "and"
- pprFunName AMO_Nand = "nand"
- pprFunName AMO_Or = "or"
- pprFunName AMO_Xor = "xor"
-
-cmpxchgLabel :: Width -> String
-cmpxchgLabel w = "hs_cmpxchg" ++ pprWidth w
- where
- pprWidth W8 = "8"
- pprWidth W16 = "16"
- pprWidth W32 = "32"
- pprWidth W64 = "64"
- pprWidth w = pprPanic "cmpxchgLabel: Unsupported word width " (ppr w)
-
-atomicReadLabel :: Width -> String
-atomicReadLabel w = "hs_atomicread" ++ pprWidth w
- where
- pprWidth W8 = "8"
- pprWidth W16 = "16"
- pprWidth W32 = "32"
- pprWidth W64 = "64"
- pprWidth w = pprPanic "atomicReadLabel: Unsupported word width " (ppr w)
-
-atomicWriteLabel :: Width -> String
-atomicWriteLabel w = "hs_atomicwrite" ++ pprWidth w
- where
- pprWidth W8 = "8"
- pprWidth W16 = "16"
- pprWidth W32 = "32"
- pprWidth W64 = "64"
- pprWidth w = pprPanic "atomicWriteLabel: Unsupported word width " (ppr w)
diff --git a/compiler/nativeGen/Dwarf.hs b/compiler/nativeGen/Dwarf.hs
deleted file mode 100644
index 5bd62a7234..0000000000
--- a/compiler/nativeGen/Dwarf.hs
+++ /dev/null
@@ -1,269 +0,0 @@
-module Dwarf (
- dwarfGen
- ) where
-
-import GhcPrelude
-
-import GHC.Cmm.CLabel
-import GHC.Cmm.Expr ( GlobalReg(..) )
-import Config ( cProjectName, cProjectVersion )
-import CoreSyn ( Tickish(..) )
-import GHC.Cmm.DebugBlock
-import GHC.Driver.Session
-import Module
-import Outputable
-import GHC.Platform
-import Unique
-import UniqSupply
-
-import Dwarf.Constants
-import Dwarf.Types
-
-import Control.Arrow ( first )
-import Control.Monad ( mfilter )
-import Data.Maybe
-import Data.List ( sortBy )
-import Data.Ord ( comparing )
-import qualified Data.Map as Map
-import System.FilePath
-import System.Directory ( getCurrentDirectory )
-
-import qualified GHC.Cmm.Dataflow.Label as H
-import qualified GHC.Cmm.Dataflow.Collections as H
-
--- | Generate DWARF/debug information
-dwarfGen :: DynFlags -> ModLocation -> UniqSupply -> [DebugBlock]
- -> IO (SDoc, UniqSupply)
-dwarfGen _ _ us [] = return (empty, us)
-dwarfGen df modLoc us blocks = do
-
- -- Convert debug data structures to DWARF info records
- -- We strip out block information when running with -g0 or -g1.
- let procs = debugSplitProcs blocks
- stripBlocks dbg
- | debugLevel df < 2 = dbg { dblBlocks = [] }
- | otherwise = dbg
- compPath <- getCurrentDirectory
- let lowLabel = dblCLabel $ head procs
- highLabel = mkAsmTempEndLabel $ dblCLabel $ last procs
- dwarfUnit = DwarfCompileUnit
- { dwChildren = map (procToDwarf df) (map stripBlocks procs)
- , dwName = fromMaybe "" (ml_hs_file modLoc)
- , dwCompDir = addTrailingPathSeparator compPath
- , dwProducer = cProjectName ++ " " ++ cProjectVersion
- , dwLowLabel = lowLabel
- , dwHighLabel = highLabel
- , dwLineLabel = dwarfLineLabel
- }
-
- -- Check whether we have any source code information, so we do not
- -- end up writing a pointer to an empty .debug_line section
- -- (dsymutil on Mac Os gets confused by this).
- let haveSrcIn blk = isJust (dblSourceTick blk) && isJust (dblPosition blk)
- || any haveSrcIn (dblBlocks blk)
- haveSrc = any haveSrcIn procs
-
- -- .debug_abbrev section: Declare the format we're using
- let abbrevSct = pprAbbrevDecls haveSrc
-
- -- .debug_info section: Information records on procedures and blocks
- let -- unique to identify start and end compilation unit .debug_inf
- (unitU, us') = takeUniqFromSupply us
- infoSct = vcat [ ptext dwarfInfoLabel <> colon
- , dwarfInfoSection
- , compileUnitHeader unitU
- , pprDwarfInfo haveSrc dwarfUnit
- , compileUnitFooter unitU
- ]
-
- -- .debug_line section: Generated mainly by the assembler, but we
- -- need to label it
- let lineSct = dwarfLineSection $$
- ptext dwarfLineLabel <> colon
-
- -- .debug_frame section: Information about the layout of the GHC stack
- let (framesU, us'') = takeUniqFromSupply us'
- frameSct = dwarfFrameSection $$
- ptext dwarfFrameLabel <> colon $$
- pprDwarfFrame (debugFrame framesU procs)
-
- -- .aranges section: Information about the bounds of compilation units
- let aranges' | gopt Opt_SplitSections df = map mkDwarfARange procs
- | otherwise = [DwarfARange lowLabel highLabel]
- let aranges = dwarfARangesSection $$ pprDwarfARanges aranges' unitU
-
- return (infoSct $$ abbrevSct $$ lineSct $$ frameSct $$ aranges, us'')
-
--- | Build an address range entry for one proc.
--- With split sections, each proc needs its own entry, since they may get
--- scattered in the final binary. Without split sections, we could make a
--- single arange based on the first/last proc.
-mkDwarfARange :: DebugBlock -> DwarfARange
-mkDwarfARange proc = DwarfARange start end
- where
- start = dblCLabel proc
- end = mkAsmTempEndLabel start
-
--- | Header for a compilation unit, establishing global format
--- parameters
-compileUnitHeader :: Unique -> SDoc
-compileUnitHeader unitU = sdocWithPlatform $ \plat ->
- let cuLabel = mkAsmTempLabel unitU -- sits right before initialLength field
- length = ppr (mkAsmTempEndLabel cuLabel) <> char '-' <> ppr cuLabel
- <> text "-4" -- length of initialLength field
- in vcat [ ppr cuLabel <> colon
- , text "\t.long " <> length -- compilation unit size
- , pprHalf 3 -- DWARF version
- , sectionOffset (ptext dwarfAbbrevLabel) (ptext dwarfAbbrevLabel)
- -- abbrevs offset
- , text "\t.byte " <> ppr (platformWordSizeInBytes plat) -- word size
- ]
-
--- | Compilation unit footer, mainly establishing size of debug sections
-compileUnitFooter :: Unique -> SDoc
-compileUnitFooter unitU =
- let cuEndLabel = mkAsmTempEndLabel $ mkAsmTempLabel unitU
- in ppr cuEndLabel <> colon
-
--- | Splits the blocks by procedures. In the result all nested blocks
--- will come from the same procedure as the top-level block. See
--- Note [Splitting DebugBlocks] for details.
-debugSplitProcs :: [DebugBlock] -> [DebugBlock]
-debugSplitProcs b = concat $ H.mapElems $ mergeMaps $ map (split Nothing) b
- where mergeMaps = foldr (H.mapUnionWithKey (const (++))) H.mapEmpty
- split :: Maybe DebugBlock -> DebugBlock -> H.LabelMap [DebugBlock]
- split parent blk = H.mapInsert prc [blk'] nested
- where prc = dblProcedure blk
- blk' = blk { dblBlocks = own_blks
- , dblParent = parent
- }
- own_blks = fromMaybe [] $ H.mapLookup prc nested
- nested = mergeMaps $ map (split parent') $ dblBlocks blk
- -- Figure out who should be the parent of nested blocks.
- -- If @blk@ is optimized out then it isn't a good choice
- -- and we just use its parent.
- parent'
- | Nothing <- dblPosition blk = parent
- | otherwise = Just blk
-
-{-
-Note [Splitting DebugBlocks]
-
-DWARF requires that we break up the nested DebugBlocks produced from
-the C-- AST. For instance, we begin with tick trees containing nested procs.
-For example,
-
- proc A [tick1, tick2]
- block B [tick3]
- proc C [tick4]
-
-when producing DWARF we need to procs (which are represented in DWARF as
-TAG_subprogram DIEs) to be top-level DIEs. debugSplitProcs is responsible for
-this transform, pulling out the nested procs into top-level procs.
-
-However, in doing this we need to be careful to preserve the parentage of the
-nested procs. This is the reason DebugBlocks carry the dblParent field, allowing
-us to reorganize the above tree as,
-
- proc A [tick1, tick2]
- block B [tick3]
- proc C [tick4] parent=B
-
-Here we have annotated the new proc C with an attribute giving its original
-parent, B.
--}
-
--- | Generate DWARF info for a procedure debug block
-procToDwarf :: DynFlags -> DebugBlock -> DwarfInfo
-procToDwarf df prc
- = DwarfSubprogram { dwChildren = map (blockToDwarf df) (dblBlocks prc)
- , dwName = case dblSourceTick prc of
- Just s@SourceNote{} -> sourceName s
- _otherwise -> showSDocDump df $ ppr $ dblLabel prc
- , dwLabel = dblCLabel prc
- , dwParent = fmap mkAsmTempDieLabel
- $ mfilter goodParent
- $ fmap dblCLabel (dblParent prc)
- }
- where
- goodParent a | a == dblCLabel prc = False
- -- Omit parent if it would be self-referential
- goodParent a | not (externallyVisibleCLabel a)
- , debugLevel df < 2 = False
- -- We strip block information when running -g0 or -g1, don't
- -- refer to blocks in that case. Fixes #14894.
- goodParent _ = True
-
--- | Generate DWARF info for a block
-blockToDwarf :: DynFlags -> DebugBlock -> DwarfInfo
-blockToDwarf df blk
- = DwarfBlock { dwChildren = concatMap (tickToDwarf df) (dblTicks blk)
- ++ map (blockToDwarf df) (dblBlocks blk)
- , dwLabel = dblCLabel blk
- , dwMarker = marker
- }
- where
- marker
- | Just _ <- dblPosition blk = Just $ mkAsmTempLabel $ dblLabel blk
- | otherwise = Nothing -- block was optimized out
-
-tickToDwarf :: DynFlags -> Tickish () -> [DwarfInfo]
-tickToDwarf _ (SourceNote ss _) = [DwarfSrcNote ss]
-tickToDwarf _ _ = []
-
--- | Generates the data for the debug frame section, which encodes the
--- desired stack unwind behaviour for the debugger
-debugFrame :: Unique -> [DebugBlock] -> DwarfFrame
-debugFrame u procs
- = DwarfFrame { dwCieLabel = mkAsmTempLabel u
- , dwCieInit = initUws
- , dwCieProcs = map (procToFrame initUws) procs
- }
- where
- initUws :: UnwindTable
- initUws = Map.fromList [(Sp, Just (UwReg Sp 0))]
-
--- | Generates unwind information for a procedure debug block
-procToFrame :: UnwindTable -> DebugBlock -> DwarfFrameProc
-procToFrame initUws blk
- = DwarfFrameProc { dwFdeProc = dblCLabel blk
- , dwFdeHasInfo = dblHasInfoTbl blk
- , dwFdeBlocks = map (uncurry blockToFrame)
- (setHasInfo blockUws)
- }
- where blockUws :: [(DebugBlock, [UnwindPoint])]
- blockUws = map snd $ sortBy (comparing fst) $ flatten blk
-
- flatten :: DebugBlock
- -> [(Int, (DebugBlock, [UnwindPoint]))]
- flatten b@DebugBlock{ dblPosition=pos, dblUnwind=uws, dblBlocks=blocks }
- | Just p <- pos = (p, (b, uws')):nested
- | otherwise = nested -- block was optimized out
- where uws' = addDefaultUnwindings initUws uws
- nested = concatMap flatten blocks
-
- -- | If the current procedure has an info table, then we also say that
- -- its first block has one to ensure that it gets the necessary -1
- -- offset applied to its start address.
- -- See Note [Info Offset] in Dwarf.Types.
- setHasInfo :: [(DebugBlock, [UnwindPoint])]
- -> [(DebugBlock, [UnwindPoint])]
- setHasInfo [] = []
- setHasInfo (c0:cs) = first setIt c0 : cs
- where
- setIt child =
- child { dblHasInfoTbl = dblHasInfoTbl child
- || dblHasInfoTbl blk }
-
-blockToFrame :: DebugBlock -> [UnwindPoint] -> DwarfFrameBlock
-blockToFrame blk uws
- = DwarfFrameBlock { dwFdeBlkHasInfo = dblHasInfoTbl blk
- , dwFdeUnwind = uws
- }
-
-addDefaultUnwindings :: UnwindTable -> [UnwindPoint] -> [UnwindPoint]
-addDefaultUnwindings tbl pts =
- [ UnwindPoint lbl (tbl' `mappend` tbl)
- -- mappend is left-biased
- | UnwindPoint lbl tbl' <- pts
- ]
diff --git a/compiler/nativeGen/Dwarf/Constants.hs b/compiler/nativeGen/Dwarf/Constants.hs
deleted file mode 100644
index 01b85c47bc..0000000000
--- a/compiler/nativeGen/Dwarf/Constants.hs
+++ /dev/null
@@ -1,229 +0,0 @@
--- | Constants describing the DWARF format. Most of this simply
--- mirrors /usr/include/dwarf.h.
-
-module Dwarf.Constants where
-
-import GhcPrelude
-
-import AsmUtils
-import FastString
-import GHC.Platform
-import Outputable
-
-import Reg
-import X86.Regs
-
-import Data.Word
-
--- | Language ID used for Haskell.
-dW_LANG_Haskell :: Word
-dW_LANG_Haskell = 0x18
- -- Thanks to Nathan Howell for getting us our very own language ID!
-
--- * Dwarf tags
-dW_TAG_compile_unit, dW_TAG_subroutine_type,
- dW_TAG_file_type, dW_TAG_subprogram, dW_TAG_lexical_block,
- dW_TAG_base_type, dW_TAG_structure_type, dW_TAG_pointer_type,
- dW_TAG_array_type, dW_TAG_subrange_type, dW_TAG_typedef,
- dW_TAG_variable, dW_TAG_arg_variable, dW_TAG_auto_variable,
- dW_TAG_ghc_src_note :: Word
-dW_TAG_array_type = 1
-dW_TAG_lexical_block = 11
-dW_TAG_pointer_type = 15
-dW_TAG_compile_unit = 17
-dW_TAG_structure_type = 19
-dW_TAG_typedef = 22
-dW_TAG_subroutine_type = 32
-dW_TAG_subrange_type = 33
-dW_TAG_base_type = 36
-dW_TAG_file_type = 41
-dW_TAG_subprogram = 46
-dW_TAG_variable = 52
-dW_TAG_auto_variable = 256
-dW_TAG_arg_variable = 257
-
-dW_TAG_ghc_src_note = 0x5b00
-
--- * Dwarf attributes
-dW_AT_name, dW_AT_stmt_list, dW_AT_low_pc, dW_AT_high_pc, dW_AT_language,
- dW_AT_comp_dir, dW_AT_producer, dW_AT_external, dW_AT_frame_base,
- dW_AT_use_UTF8, dW_AT_MIPS_linkage_name :: Word
-dW_AT_name = 0x03
-dW_AT_stmt_list = 0x10
-dW_AT_low_pc = 0x11
-dW_AT_high_pc = 0x12
-dW_AT_language = 0x13
-dW_AT_comp_dir = 0x1b
-dW_AT_producer = 0x25
-dW_AT_external = 0x3f
-dW_AT_frame_base = 0x40
-dW_AT_use_UTF8 = 0x53
-dW_AT_MIPS_linkage_name = 0x2007
-
--- * Custom DWARF attributes
--- Chosen a more or less random section of the vendor-extensible region
-
--- ** Describing C-- blocks
--- These appear in DW_TAG_lexical_scope DIEs corresponding to C-- blocks
-dW_AT_ghc_tick_parent :: Word
-dW_AT_ghc_tick_parent = 0x2b20
-
--- ** Describing source notes
--- These appear in DW_TAG_ghc_src_note DIEs
-dW_AT_ghc_span_file, dW_AT_ghc_span_start_line,
- dW_AT_ghc_span_start_col, dW_AT_ghc_span_end_line,
- dW_AT_ghc_span_end_col :: Word
-dW_AT_ghc_span_file = 0x2b00
-dW_AT_ghc_span_start_line = 0x2b01
-dW_AT_ghc_span_start_col = 0x2b02
-dW_AT_ghc_span_end_line = 0x2b03
-dW_AT_ghc_span_end_col = 0x2b04
-
-
--- * Abbrev declarations
-dW_CHILDREN_no, dW_CHILDREN_yes :: Word8
-dW_CHILDREN_no = 0
-dW_CHILDREN_yes = 1
-
-dW_FORM_addr, dW_FORM_data2, dW_FORM_data4, dW_FORM_string, dW_FORM_flag,
- dW_FORM_block1, dW_FORM_ref4, dW_FORM_ref_addr, dW_FORM_flag_present :: Word
-dW_FORM_addr = 0x01
-dW_FORM_data2 = 0x05
-dW_FORM_data4 = 0x06
-dW_FORM_string = 0x08
-dW_FORM_flag = 0x0c
-dW_FORM_block1 = 0x0a
-dW_FORM_ref_addr = 0x10
-dW_FORM_ref4 = 0x13
-dW_FORM_flag_present = 0x19
-
--- * Dwarf native types
-dW_ATE_address, dW_ATE_boolean, dW_ATE_float, dW_ATE_signed,
- dW_ATE_signed_char, dW_ATE_unsigned, dW_ATE_unsigned_char :: Word
-dW_ATE_address = 1
-dW_ATE_boolean = 2
-dW_ATE_float = 4
-dW_ATE_signed = 5
-dW_ATE_signed_char = 6
-dW_ATE_unsigned = 7
-dW_ATE_unsigned_char = 8
-
--- * Call frame information
-dW_CFA_set_loc, dW_CFA_undefined, dW_CFA_same_value,
- dW_CFA_def_cfa, dW_CFA_def_cfa_offset, dW_CFA_def_cfa_expression,
- dW_CFA_expression, dW_CFA_offset_extended_sf, dW_CFA_def_cfa_offset_sf,
- dW_CFA_def_cfa_sf, dW_CFA_val_offset, dW_CFA_val_expression,
- dW_CFA_offset :: Word8
-dW_CFA_set_loc = 0x01
-dW_CFA_undefined = 0x07
-dW_CFA_same_value = 0x08
-dW_CFA_def_cfa = 0x0c
-dW_CFA_def_cfa_offset = 0x0e
-dW_CFA_def_cfa_expression = 0x0f
-dW_CFA_expression = 0x10
-dW_CFA_offset_extended_sf = 0x11
-dW_CFA_def_cfa_sf = 0x12
-dW_CFA_def_cfa_offset_sf = 0x13
-dW_CFA_val_offset = 0x14
-dW_CFA_val_expression = 0x16
-dW_CFA_offset = 0x80
-
--- * Operations
-dW_OP_addr, dW_OP_deref, dW_OP_consts,
- dW_OP_minus, dW_OP_mul, dW_OP_plus,
- dW_OP_lit0, dW_OP_breg0, dW_OP_call_frame_cfa :: Word8
-dW_OP_addr = 0x03
-dW_OP_deref = 0x06
-dW_OP_consts = 0x11
-dW_OP_minus = 0x1c
-dW_OP_mul = 0x1e
-dW_OP_plus = 0x22
-dW_OP_lit0 = 0x30
-dW_OP_breg0 = 0x70
-dW_OP_call_frame_cfa = 0x9c
-
--- * Dwarf section declarations
-dwarfInfoSection, dwarfAbbrevSection, dwarfLineSection,
- dwarfFrameSection, dwarfGhcSection, dwarfARangesSection :: SDoc
-dwarfInfoSection = dwarfSection "info"
-dwarfAbbrevSection = dwarfSection "abbrev"
-dwarfLineSection = dwarfSection "line"
-dwarfFrameSection = dwarfSection "frame"
-dwarfGhcSection = dwarfSection "ghc"
-dwarfARangesSection = dwarfSection "aranges"
-
-dwarfSection :: String -> SDoc
-dwarfSection name = sdocWithPlatform $ \plat ->
- case platformOS plat of
- os | osElfTarget os
- -> text "\t.section .debug_" <> text name <> text ",\"\","
- <> sectionType "progbits"
- | osMachOTarget os
- -> text "\t.section __DWARF,__debug_" <> text name <> text ",regular,debug"
- | otherwise
- -> text "\t.section .debug_" <> text name <> text ",\"dr\""
-
--- * Dwarf section labels
-dwarfInfoLabel, dwarfAbbrevLabel, dwarfLineLabel, dwarfFrameLabel :: PtrString
-dwarfInfoLabel = sLit ".Lsection_info"
-dwarfAbbrevLabel = sLit ".Lsection_abbrev"
-dwarfLineLabel = sLit ".Lsection_line"
-dwarfFrameLabel = sLit ".Lsection_frame"
-
--- | Mapping of registers to DWARF register numbers
-dwarfRegNo :: Platform -> Reg -> Word8
-dwarfRegNo p r = case platformArch p of
- ArchX86
- | r == eax -> 0
- | r == ecx -> 1 -- yes, no typo
- | r == edx -> 2
- | r == ebx -> 3
- | r == esp -> 4
- | r == ebp -> 5
- | r == esi -> 6
- | r == edi -> 7
- ArchX86_64
- | r == rax -> 0
- | r == rdx -> 1 -- this neither. The order GCC allocates registers in?
- | r == rcx -> 2
- | r == rbx -> 3
- | r == rsi -> 4
- | r == rdi -> 5
- | r == rbp -> 6
- | r == rsp -> 7
- | r == r8 -> 8
- | r == r9 -> 9
- | r == r10 -> 10
- | r == r11 -> 11
- | r == r12 -> 12
- | r == r13 -> 13
- | r == r14 -> 14
- | r == r15 -> 15
- | r == xmm0 -> 17
- | r == xmm1 -> 18
- | r == xmm2 -> 19
- | r == xmm3 -> 20
- | r == xmm4 -> 21
- | r == xmm5 -> 22
- | r == xmm6 -> 23
- | r == xmm7 -> 24
- | r == xmm8 -> 25
- | r == xmm9 -> 26
- | r == xmm10 -> 27
- | r == xmm11 -> 28
- | r == xmm12 -> 29
- | r == xmm13 -> 30
- | r == xmm14 -> 31
- | r == xmm15 -> 32
- _other -> error "dwarfRegNo: Unsupported platform or unknown register!"
-
--- | Virtual register number to use for return address.
-dwarfReturnRegNo :: Platform -> Word8
-dwarfReturnRegNo p
- -- We "overwrite" IP with our pseudo register - that makes sense, as
- -- when using this mechanism gdb already knows the IP anyway. Clang
- -- does this too, so it must be safe.
- = case platformArch p of
- ArchX86 -> 8 -- eip
- ArchX86_64 -> 16 -- rip
- _other -> error "dwarfReturnRegNo: Unsupported platform!"
diff --git a/compiler/nativeGen/Dwarf/Types.hs b/compiler/nativeGen/Dwarf/Types.hs
deleted file mode 100644
index c006081872..0000000000
--- a/compiler/nativeGen/Dwarf/Types.hs
+++ /dev/null
@@ -1,612 +0,0 @@
-module Dwarf.Types
- ( -- * Dwarf information
- DwarfInfo(..)
- , pprDwarfInfo
- , pprAbbrevDecls
- -- * Dwarf address range table
- , DwarfARange(..)
- , pprDwarfARanges
- -- * Dwarf frame
- , DwarfFrame(..), DwarfFrameProc(..), DwarfFrameBlock(..)
- , pprDwarfFrame
- -- * Utilities
- , pprByte
- , pprHalf
- , pprData4'
- , pprDwWord
- , pprWord
- , pprLEBWord
- , pprLEBInt
- , wordAlign
- , sectionOffset
- )
- where
-
-import GhcPrelude
-
-import GHC.Cmm.DebugBlock
-import GHC.Cmm.CLabel
-import GHC.Cmm.Expr ( GlobalReg(..) )
-import Encoding
-import FastString
-import Outputable
-import GHC.Platform
-import Unique
-import Reg
-import SrcLoc
-import Util
-
-import Dwarf.Constants
-
-import qualified Data.ByteString as BS
-import qualified Control.Monad.Trans.State.Strict as S
-import Control.Monad (zipWithM, join)
-import Data.Bits
-import qualified Data.Map as Map
-import Data.Word
-import Data.Char
-
-import GHC.Platform.Regs
-
--- | Individual dwarf records. Each one will be encoded as an entry in
--- the @.debug_info@ section.
-data DwarfInfo
- = DwarfCompileUnit { dwChildren :: [DwarfInfo]
- , dwName :: String
- , dwProducer :: String
- , dwCompDir :: String
- , dwLowLabel :: CLabel
- , dwHighLabel :: CLabel
- , dwLineLabel :: PtrString }
- | DwarfSubprogram { dwChildren :: [DwarfInfo]
- , dwName :: String
- , dwLabel :: CLabel
- , dwParent :: Maybe CLabel
- -- ^ label of DIE belonging to the parent tick
- }
- | DwarfBlock { dwChildren :: [DwarfInfo]
- , dwLabel :: CLabel
- , dwMarker :: Maybe CLabel
- }
- | DwarfSrcNote { dwSrcSpan :: RealSrcSpan
- }
-
--- | Abbreviation codes used for encoding above records in the
--- @.debug_info@ section.
-data DwarfAbbrev
- = DwAbbrNull -- ^ Pseudo, used for marking the end of lists
- | DwAbbrCompileUnit
- | DwAbbrSubprogram
- | DwAbbrSubprogramWithParent
- | DwAbbrBlockWithoutCode
- | DwAbbrBlock
- | DwAbbrGhcSrcNote
- deriving (Eq, Enum)
-
--- | Generate assembly for the given abbreviation code
-pprAbbrev :: DwarfAbbrev -> SDoc
-pprAbbrev = pprLEBWord . fromIntegral . fromEnum
-
--- | Abbreviation declaration. This explains the binary encoding we
--- use for representing 'DwarfInfo'. Be aware that this must be updated
--- along with 'pprDwarfInfo'.
-pprAbbrevDecls :: Bool -> SDoc
-pprAbbrevDecls haveDebugLine =
- let mkAbbrev abbr tag chld flds =
- let fld (tag, form) = pprLEBWord tag $$ pprLEBWord form
- in pprAbbrev abbr $$ pprLEBWord tag $$ pprByte chld $$
- vcat (map fld flds) $$ pprByte 0 $$ pprByte 0
- -- These are shared between DwAbbrSubprogram and
- -- DwAbbrSubprogramWithParent
- subprogramAttrs =
- [ (dW_AT_name, dW_FORM_string)
- , (dW_AT_MIPS_linkage_name, dW_FORM_string)
- , (dW_AT_external, dW_FORM_flag)
- , (dW_AT_low_pc, dW_FORM_addr)
- , (dW_AT_high_pc, dW_FORM_addr)
- , (dW_AT_frame_base, dW_FORM_block1)
- ]
- in dwarfAbbrevSection $$
- ptext dwarfAbbrevLabel <> colon $$
- mkAbbrev DwAbbrCompileUnit dW_TAG_compile_unit dW_CHILDREN_yes
- ([(dW_AT_name, dW_FORM_string)
- , (dW_AT_producer, dW_FORM_string)
- , (dW_AT_language, dW_FORM_data4)
- , (dW_AT_comp_dir, dW_FORM_string)
- , (dW_AT_use_UTF8, dW_FORM_flag_present) -- not represented in body
- , (dW_AT_low_pc, dW_FORM_addr)
- , (dW_AT_high_pc, dW_FORM_addr)
- ] ++
- (if haveDebugLine
- then [ (dW_AT_stmt_list, dW_FORM_data4) ]
- else [])) $$
- mkAbbrev DwAbbrSubprogram dW_TAG_subprogram dW_CHILDREN_yes
- subprogramAttrs $$
- mkAbbrev DwAbbrSubprogramWithParent dW_TAG_subprogram dW_CHILDREN_yes
- (subprogramAttrs ++ [(dW_AT_ghc_tick_parent, dW_FORM_ref_addr)]) $$
- mkAbbrev DwAbbrBlockWithoutCode dW_TAG_lexical_block dW_CHILDREN_yes
- [ (dW_AT_name, dW_FORM_string)
- ] $$
- mkAbbrev DwAbbrBlock dW_TAG_lexical_block dW_CHILDREN_yes
- [ (dW_AT_name, dW_FORM_string)
- , (dW_AT_low_pc, dW_FORM_addr)
- , (dW_AT_high_pc, dW_FORM_addr)
- ] $$
- mkAbbrev DwAbbrGhcSrcNote dW_TAG_ghc_src_note dW_CHILDREN_no
- [ (dW_AT_ghc_span_file, dW_FORM_string)
- , (dW_AT_ghc_span_start_line, dW_FORM_data4)
- , (dW_AT_ghc_span_start_col, dW_FORM_data2)
- , (dW_AT_ghc_span_end_line, dW_FORM_data4)
- , (dW_AT_ghc_span_end_col, dW_FORM_data2)
- ] $$
- pprByte 0
-
--- | Generate assembly for DWARF data
-pprDwarfInfo :: Bool -> DwarfInfo -> SDoc
-pprDwarfInfo haveSrc d
- = case d of
- DwarfCompileUnit {} -> hasChildren
- DwarfSubprogram {} -> hasChildren
- DwarfBlock {} -> hasChildren
- DwarfSrcNote {} -> noChildren
- where
- hasChildren =
- pprDwarfInfoOpen haveSrc d $$
- vcat (map (pprDwarfInfo haveSrc) (dwChildren d)) $$
- pprDwarfInfoClose
- noChildren = pprDwarfInfoOpen haveSrc d
-
--- | Prints assembler data corresponding to DWARF info records. Note
--- that the binary format of this is parameterized in @abbrevDecls@ and
--- has to be kept in synch.
-pprDwarfInfoOpen :: Bool -> DwarfInfo -> SDoc
-pprDwarfInfoOpen haveSrc (DwarfCompileUnit _ name producer compDir lowLabel
- highLabel lineLbl) =
- pprAbbrev DwAbbrCompileUnit
- $$ pprString name
- $$ pprString producer
- $$ pprData4 dW_LANG_Haskell
- $$ pprString compDir
- $$ pprWord (ppr lowLabel)
- $$ pprWord (ppr highLabel)
- $$ if haveSrc
- then sectionOffset (ptext lineLbl) (ptext dwarfLineLabel)
- else empty
-pprDwarfInfoOpen _ (DwarfSubprogram _ name label
- parent) = sdocWithDynFlags $ \df ->
- ppr (mkAsmTempDieLabel label) <> colon
- $$ pprAbbrev abbrev
- $$ pprString name
- $$ pprString (renderWithStyle (initSDocContext df (mkCodeStyle CStyle)) (ppr label))
- $$ pprFlag (externallyVisibleCLabel label)
- $$ pprWord (ppr label)
- $$ pprWord (ppr $ mkAsmTempEndLabel label)
- $$ pprByte 1
- $$ pprByte dW_OP_call_frame_cfa
- $$ parentValue
- where
- abbrev = case parent of Nothing -> DwAbbrSubprogram
- Just _ -> DwAbbrSubprogramWithParent
- parentValue = maybe empty pprParentDie parent
- pprParentDie sym = sectionOffset (ppr sym) (ptext dwarfInfoLabel)
-pprDwarfInfoOpen _ (DwarfBlock _ label Nothing) = sdocWithDynFlags $ \df ->
- ppr (mkAsmTempDieLabel label) <> colon
- $$ pprAbbrev DwAbbrBlockWithoutCode
- $$ pprString (renderWithStyle (initSDocContext df (mkCodeStyle CStyle)) (ppr label))
-pprDwarfInfoOpen _ (DwarfBlock _ label (Just marker)) = sdocWithDynFlags $ \df ->
- ppr (mkAsmTempDieLabel label) <> colon
- $$ pprAbbrev DwAbbrBlock
- $$ pprString (renderWithStyle (initSDocContext df (mkCodeStyle CStyle)) (ppr label))
- $$ pprWord (ppr marker)
- $$ pprWord (ppr $ mkAsmTempEndLabel marker)
-pprDwarfInfoOpen _ (DwarfSrcNote ss) =
- pprAbbrev DwAbbrGhcSrcNote
- $$ pprString' (ftext $ srcSpanFile ss)
- $$ pprData4 (fromIntegral $ srcSpanStartLine ss)
- $$ pprHalf (fromIntegral $ srcSpanStartCol ss)
- $$ pprData4 (fromIntegral $ srcSpanEndLine ss)
- $$ pprHalf (fromIntegral $ srcSpanEndCol ss)
-
--- | Close a DWARF info record with children
-pprDwarfInfoClose :: SDoc
-pprDwarfInfoClose = pprAbbrev DwAbbrNull
-
--- | A DWARF address range. This is used by the debugger to quickly locate
--- which compilation unit a given address belongs to. This type assumes
--- a non-segmented address-space.
-data DwarfARange
- = DwarfARange
- { dwArngStartLabel :: CLabel
- , dwArngEndLabel :: CLabel
- }
-
--- | Print assembler directives corresponding to a DWARF @.debug_aranges@
--- address table entry.
-pprDwarfARanges :: [DwarfARange] -> Unique -> SDoc
-pprDwarfARanges arngs unitU = sdocWithPlatform $ \plat ->
- let wordSize = platformWordSizeInBytes plat
- paddingSize = 4 :: Int
- -- header is 12 bytes long.
- -- entry is 8 bytes (32-bit platform) or 16 bytes (64-bit platform).
- -- pad such that first entry begins at multiple of entry size.
- pad n = vcat $ replicate n $ pprByte 0
- -- Fix for #17428
- initialLength = 8 + paddingSize + (1 + length arngs) * 2 * wordSize
- in pprDwWord (ppr initialLength)
- $$ pprHalf 2
- $$ sectionOffset (ppr $ mkAsmTempLabel $ unitU)
- (ptext dwarfInfoLabel)
- $$ pprByte (fromIntegral wordSize)
- $$ pprByte 0
- $$ pad paddingSize
- -- body
- $$ vcat (map pprDwarfARange arngs)
- -- terminus
- $$ pprWord (char '0')
- $$ pprWord (char '0')
-
-pprDwarfARange :: DwarfARange -> SDoc
-pprDwarfARange arng = pprWord (ppr $ dwArngStartLabel arng) $$ pprWord length
- where
- length = ppr (dwArngEndLabel arng)
- <> char '-' <> ppr (dwArngStartLabel arng)
-
--- | Information about unwind instructions for a procedure. This
--- corresponds to a "Common Information Entry" (CIE) in DWARF.
-data DwarfFrame
- = DwarfFrame
- { dwCieLabel :: CLabel
- , dwCieInit :: UnwindTable
- , dwCieProcs :: [DwarfFrameProc]
- }
-
--- | Unwind instructions for an individual procedure. Corresponds to a
--- "Frame Description Entry" (FDE) in DWARF.
-data DwarfFrameProc
- = DwarfFrameProc
- { dwFdeProc :: CLabel
- , dwFdeHasInfo :: Bool
- , dwFdeBlocks :: [DwarfFrameBlock]
- -- ^ List of blocks. Order must match asm!
- }
-
--- | Unwind instructions for a block. Will become part of the
--- containing FDE.
-data DwarfFrameBlock
- = DwarfFrameBlock
- { dwFdeBlkHasInfo :: Bool
- , dwFdeUnwind :: [UnwindPoint]
- -- ^ these unwind points must occur in the same order as they occur
- -- in the block
- }
-
-instance Outputable DwarfFrameBlock where
- ppr (DwarfFrameBlock hasInfo unwinds) = braces $ ppr hasInfo <+> ppr unwinds
-
--- | Header for the @.debug_frame@ section. Here we emit the "Common
--- Information Entry" record that establishes general call frame
--- parameters and the default stack layout.
-pprDwarfFrame :: DwarfFrame -> SDoc
-pprDwarfFrame DwarfFrame{dwCieLabel=cieLabel,dwCieInit=cieInit,dwCieProcs=procs}
- = sdocWithPlatform $ \plat ->
- let cieStartLabel= mkAsmTempDerivedLabel cieLabel (fsLit "_start")
- cieEndLabel = mkAsmTempEndLabel cieLabel
- length = ppr cieEndLabel <> char '-' <> ppr cieStartLabel
- spReg = dwarfGlobalRegNo plat Sp
- retReg = dwarfReturnRegNo plat
- wordSize = platformWordSizeInBytes plat
- pprInit :: (GlobalReg, Maybe UnwindExpr) -> SDoc
- pprInit (g, uw) = pprSetUnwind plat g (Nothing, uw)
-
- -- Preserve C stack pointer: This necessary to override that default
- -- unwinding behavior of setting $sp = CFA.
- preserveSp = case platformArch plat of
- ArchX86 -> pprByte dW_CFA_same_value $$ pprLEBWord 4
- ArchX86_64 -> pprByte dW_CFA_same_value $$ pprLEBWord 7
- _ -> empty
- in vcat [ ppr cieLabel <> colon
- , pprData4' length -- Length of CIE
- , ppr cieStartLabel <> colon
- , pprData4' (text "-1")
- -- Common Information Entry marker (-1 = 0xf..f)
- , pprByte 3 -- CIE version (we require DWARF 3)
- , pprByte 0 -- Augmentation (none)
- , pprByte 1 -- Code offset multiplicator
- , pprByte (128-fromIntegral wordSize)
- -- Data offset multiplicator
- -- (stacks grow down => "-w" in signed LEB128)
- , pprByte retReg -- virtual register holding return address
- ] $$
- -- Initial unwind table
- vcat (map pprInit $ Map.toList cieInit) $$
- vcat [ -- RET = *CFA
- pprByte (dW_CFA_offset+retReg)
- , pprByte 0
-
- -- Preserve C stack pointer
- , preserveSp
-
- -- Sp' = CFA
- -- (we need to set this manually as our (STG) Sp register is
- -- often not the architecture's default stack register)
- , pprByte dW_CFA_val_offset
- , pprLEBWord (fromIntegral spReg)
- , pprLEBWord 0
- ] $$
- wordAlign $$
- ppr cieEndLabel <> colon $$
- -- Procedure unwind tables
- vcat (map (pprFrameProc cieLabel cieInit) procs)
-
--- | Writes a "Frame Description Entry" for a procedure. This consists
--- mainly of referencing the CIE and writing state machine
--- instructions to describe how the frame base (CFA) changes.
-pprFrameProc :: CLabel -> UnwindTable -> DwarfFrameProc -> SDoc
-pprFrameProc frameLbl initUw (DwarfFrameProc procLbl hasInfo blocks)
- = let fdeLabel = mkAsmTempDerivedLabel procLbl (fsLit "_fde")
- fdeEndLabel = mkAsmTempDerivedLabel procLbl (fsLit "_fde_end")
- procEnd = mkAsmTempEndLabel procLbl
- ifInfo str = if hasInfo then text str else empty
- -- see [Note: Info Offset]
- in vcat [ whenPprDebug $ text "# Unwinding for" <+> ppr procLbl <> colon
- , pprData4' (ppr fdeEndLabel <> char '-' <> ppr fdeLabel)
- , ppr fdeLabel <> colon
- , pprData4' (ppr frameLbl <> char '-' <>
- ptext dwarfFrameLabel) -- Reference to CIE
- , pprWord (ppr procLbl <> ifInfo "-1") -- Code pointer
- , pprWord (ppr procEnd <> char '-' <>
- ppr procLbl <> ifInfo "+1") -- Block byte length
- ] $$
- vcat (S.evalState (mapM pprFrameBlock blocks) initUw) $$
- wordAlign $$
- ppr fdeEndLabel <> colon
-
--- | Generates unwind information for a block. We only generate
--- instructions where unwind information actually changes. This small
--- optimisations saves a lot of space, as subsequent blocks often have
--- the same unwind information.
-pprFrameBlock :: DwarfFrameBlock -> S.State UnwindTable SDoc
-pprFrameBlock (DwarfFrameBlock hasInfo uws0) =
- vcat <$> zipWithM pprFrameDecl (True : repeat False) uws0
- where
- pprFrameDecl :: Bool -> UnwindPoint -> S.State UnwindTable SDoc
- pprFrameDecl firstDecl (UnwindPoint lbl uws) = S.state $ \oldUws ->
- let -- Did a register's unwind expression change?
- isChanged :: GlobalReg -> Maybe UnwindExpr
- -> Maybe (Maybe UnwindExpr, Maybe UnwindExpr)
- isChanged g new
- -- the value didn't change
- | Just new == old = Nothing
- -- the value was and still is undefined
- | Nothing <- old
- , Nothing <- new = Nothing
- -- the value changed
- | otherwise = Just (join old, new)
- where
- old = Map.lookup g oldUws
-
- changed = Map.toList $ Map.mapMaybeWithKey isChanged uws
-
- in if oldUws == uws
- then (empty, oldUws)
- else let -- see [Note: Info Offset]
- needsOffset = firstDecl && hasInfo
- lblDoc = ppr lbl <>
- if needsOffset then text "-1" else empty
- doc = sdocWithPlatform $ \plat ->
- pprByte dW_CFA_set_loc $$ pprWord lblDoc $$
- vcat (map (uncurry $ pprSetUnwind plat) changed)
- in (doc, uws)
-
--- Note [Info Offset]
---
--- GDB was pretty much written with C-like programs in mind, and as a
--- result they assume that once you have a return address, it is a
--- good idea to look at (PC-1) to unwind further - as that's where the
--- "call" instruction is supposed to be.
---
--- Now on one hand, code generated by GHC looks nothing like what GDB
--- expects, and in fact going up from a return pointer is guaranteed
--- to land us inside an info table! On the other hand, that actually
--- gives us some wiggle room, as we expect IP to never *actually* end
--- up inside the info table, so we can "cheat" by putting whatever GDB
--- expects to see there. This is probably pretty safe, as GDB cannot
--- assume (PC-1) to be a valid code pointer in the first place - and I
--- have seen no code trying to correct this.
---
--- Note that this will not prevent GDB from failing to look-up the
--- correct function name for the frame, as that uses the symbol table,
--- which we can not manipulate as easily.
---
--- There's a GDB patch to address this at [1]. At the moment of writing
--- it's not merged, so I recommend building GDB with the patch if you
--- care about unwinding. The hack above doesn't cover every case.
---
--- [1] https://sourceware.org/ml/gdb-patches/2018-02/msg00055.html
-
--- | Get DWARF register ID for a given GlobalReg
-dwarfGlobalRegNo :: Platform -> GlobalReg -> Word8
-dwarfGlobalRegNo p UnwindReturnReg = dwarfReturnRegNo p
-dwarfGlobalRegNo p reg = maybe 0 (dwarfRegNo p . RegReal) $ globalRegMaybe p reg
-
--- | Generate code for setting the unwind information for a register,
--- optimized using its known old value in the table. Note that "Sp" is
--- special: We see it as synonym for the CFA.
-pprSetUnwind :: Platform
- -> GlobalReg
- -- ^ the register to produce an unwinding table entry for
- -> (Maybe UnwindExpr, Maybe UnwindExpr)
- -- ^ the old and new values of the register
- -> SDoc
-pprSetUnwind plat g (_, Nothing)
- = pprUndefUnwind plat g
-pprSetUnwind _ Sp (Just (UwReg s _), Just (UwReg s' o')) | s == s'
- = if o' >= 0
- then pprByte dW_CFA_def_cfa_offset $$ pprLEBWord (fromIntegral o')
- else pprByte dW_CFA_def_cfa_offset_sf $$ pprLEBInt o'
-pprSetUnwind plat Sp (_, Just (UwReg s' o'))
- = if o' >= 0
- then pprByte dW_CFA_def_cfa $$
- pprLEBRegNo plat s' $$
- pprLEBWord (fromIntegral o')
- else pprByte dW_CFA_def_cfa_sf $$
- pprLEBRegNo plat s' $$
- pprLEBInt o'
-pprSetUnwind _ Sp (_, Just uw)
- = pprByte dW_CFA_def_cfa_expression $$ pprUnwindExpr False uw
-pprSetUnwind plat g (_, Just (UwDeref (UwReg Sp o)))
- | o < 0 && ((-o) `mod` platformWordSizeInBytes plat) == 0 -- expected case
- = pprByte (dW_CFA_offset + dwarfGlobalRegNo plat g) $$
- pprLEBWord (fromIntegral ((-o) `div` platformWordSizeInBytes plat))
- | otherwise
- = pprByte dW_CFA_offset_extended_sf $$
- pprLEBRegNo plat g $$
- pprLEBInt o
-pprSetUnwind plat g (_, Just (UwDeref uw))
- = pprByte dW_CFA_expression $$
- pprLEBRegNo plat g $$
- pprUnwindExpr True uw
-pprSetUnwind plat g (_, Just (UwReg g' 0))
- | g == g'
- = pprByte dW_CFA_same_value $$
- pprLEBRegNo plat g
-pprSetUnwind plat g (_, Just uw)
- = pprByte dW_CFA_val_expression $$
- pprLEBRegNo plat g $$
- pprUnwindExpr True uw
-
--- | Print the register number of the given 'GlobalReg' as an unsigned LEB128
--- encoded number.
-pprLEBRegNo :: Platform -> GlobalReg -> SDoc
-pprLEBRegNo plat = pprLEBWord . fromIntegral . dwarfGlobalRegNo plat
-
--- | Generates a DWARF expression for the given unwind expression. If
--- @spIsCFA@ is true, we see @Sp@ as the frame base CFA where it gets
--- mentioned.
-pprUnwindExpr :: Bool -> UnwindExpr -> SDoc
-pprUnwindExpr spIsCFA expr
- = sdocWithPlatform $ \plat ->
- let pprE (UwConst i)
- | i >= 0 && i < 32 = pprByte (dW_OP_lit0 + fromIntegral i)
- | otherwise = pprByte dW_OP_consts $$ pprLEBInt i -- lazy...
- pprE (UwReg Sp i) | spIsCFA
- = if i == 0
- then pprByte dW_OP_call_frame_cfa
- else pprE (UwPlus (UwReg Sp 0) (UwConst i))
- pprE (UwReg g i) = pprByte (dW_OP_breg0+dwarfGlobalRegNo plat g) $$
- pprLEBInt i
- pprE (UwDeref u) = pprE u $$ pprByte dW_OP_deref
- pprE (UwLabel l) = pprByte dW_OP_addr $$ pprWord (ppr l)
- pprE (UwPlus u1 u2) = pprE u1 $$ pprE u2 $$ pprByte dW_OP_plus
- pprE (UwMinus u1 u2) = pprE u1 $$ pprE u2 $$ pprByte dW_OP_minus
- pprE (UwTimes u1 u2) = pprE u1 $$ pprE u2 $$ pprByte dW_OP_mul
- in text "\t.uleb128 2f-1f" $$ -- DW_FORM_block length
- -- computed as the difference of the following local labels 2: and 1:
- text "1:" $$
- pprE expr $$
- text "2:"
-
--- | Generate code for re-setting the unwind information for a
--- register to @undefined@
-pprUndefUnwind :: Platform -> GlobalReg -> SDoc
-pprUndefUnwind plat g = pprByte dW_CFA_undefined $$
- pprLEBRegNo plat g
-
-
--- | Align assembly at (machine) word boundary
-wordAlign :: SDoc
-wordAlign = sdocWithPlatform $ \plat ->
- text "\t.align " <> case platformOS plat of
- OSDarwin -> case platformWordSize plat of
- PW8 -> char '3'
- PW4 -> char '2'
- _other -> ppr (platformWordSizeInBytes plat)
-
--- | Assembly for a single byte of constant DWARF data
-pprByte :: Word8 -> SDoc
-pprByte x = text "\t.byte " <> ppr (fromIntegral x :: Word)
-
--- | Assembly for a two-byte constant integer
-pprHalf :: Word16 -> SDoc
-pprHalf x = text "\t.short" <+> ppr (fromIntegral x :: Word)
-
--- | Assembly for a constant DWARF flag
-pprFlag :: Bool -> SDoc
-pprFlag f = pprByte (if f then 0xff else 0x00)
-
--- | Assembly for 4 bytes of dynamic DWARF data
-pprData4' :: SDoc -> SDoc
-pprData4' x = text "\t.long " <> x
-
--- | Assembly for 4 bytes of constant DWARF data
-pprData4 :: Word -> SDoc
-pprData4 = pprData4' . ppr
-
--- | Assembly for a DWARF word of dynamic data. This means 32 bit, as
--- we are generating 32 bit DWARF.
-pprDwWord :: SDoc -> SDoc
-pprDwWord = pprData4'
-
--- | Assembly for a machine word of dynamic data. Depends on the
--- architecture we are currently generating code for.
-pprWord :: SDoc -> SDoc
-pprWord s = (<> s) . sdocWithPlatform $ \plat ->
- case platformWordSize plat of
- PW4 -> text "\t.long "
- PW8 -> text "\t.quad "
-
--- | Prints a number in "little endian base 128" format. The idea is
--- to optimize for small numbers by stopping once all further bytes
--- would be 0. The highest bit in every byte signals whether there
--- are further bytes to read.
-pprLEBWord :: Word -> SDoc
-pprLEBWord x | x < 128 = pprByte (fromIntegral x)
- | otherwise = pprByte (fromIntegral $ 128 .|. (x .&. 127)) $$
- pprLEBWord (x `shiftR` 7)
-
--- | Same as @pprLEBWord@, but for a signed number
-pprLEBInt :: Int -> SDoc
-pprLEBInt x | x >= -64 && x < 64
- = pprByte (fromIntegral (x .&. 127))
- | otherwise = pprByte (fromIntegral $ 128 .|. (x .&. 127)) $$
- pprLEBInt (x `shiftR` 7)
-
--- | Generates a dynamic null-terminated string. If required the
--- caller needs to make sure that the string is escaped properly.
-pprString' :: SDoc -> SDoc
-pprString' str = text "\t.asciz \"" <> str <> char '"'
-
--- | Generate a string constant. We take care to escape the string.
-pprString :: String -> SDoc
-pprString str
- = pprString' $ hcat $ map escapeChar $
- if str `lengthIs` utf8EncodedLength str
- then str
- else map (chr . fromIntegral) $ BS.unpack $ bytesFS $ mkFastString str
-
--- | Escape a single non-unicode character
-escapeChar :: Char -> SDoc
-escapeChar '\\' = text "\\\\"
-escapeChar '\"' = text "\\\""
-escapeChar '\n' = text "\\n"
-escapeChar c
- | isAscii c && isPrint c && c /= '?' -- prevents trigraph warnings
- = char c
- | otherwise
- = char '\\' <> char (intToDigit (ch `div` 64)) <>
- char (intToDigit ((ch `div` 8) `mod` 8)) <>
- char (intToDigit (ch `mod` 8))
- where ch = ord c
-
--- | Generate an offset into another section. This is tricky because
--- this is handled differently depending on platform: Mac Os expects
--- us to calculate the offset using assembler arithmetic. Linux expects
--- us to just reference the target directly, and will figure out on
--- their own that we actually need an offset. Finally, Windows has
--- a special directive to refer to relative offsets. Fun.
-sectionOffset :: SDoc -> SDoc -> SDoc
-sectionOffset target section = sdocWithPlatform $ \plat ->
- case platformOS plat of
- OSDarwin -> pprDwWord (target <> char '-' <> section)
- OSMinGW32 -> text "\t.secrel32 " <> target
- _other -> pprDwWord target
diff --git a/compiler/nativeGen/Format.hs b/compiler/nativeGen/Format.hs
deleted file mode 100644
index d7b6f6b868..0000000000
--- a/compiler/nativeGen/Format.hs
+++ /dev/null
@@ -1,105 +0,0 @@
--- | Formats on this architecture
--- A Format is a combination of width and class
---
--- TODO: Signed vs unsigned?
---
--- TODO: This module is currently shared by all architectures because
--- NCGMonad need to know about it to make a VReg. It would be better
--- to have architecture specific formats, and do the overloading
--- properly. eg SPARC doesn't care about FF80.
---
-module Format (
- Format(..),
- intFormat,
- floatFormat,
- isFloatFormat,
- cmmTypeFormat,
- formatToWidth,
- formatInBytes
-)
-
-where
-
-import GhcPrelude
-
-import GHC.Cmm
-import Outputable
-
--- It looks very like the old MachRep, but it's now of purely local
--- significance, here in the native code generator. You can change it
--- without global consequences.
---
--- A major use is as an opcode qualifier; thus the opcode
--- mov.l a b
--- might be encoded
--- MOV II32 a b
--- where the Format field encodes the ".l" part.
-
--- ToDo: it's not clear to me that we need separate signed-vs-unsigned formats
--- here. I've removed them from the x86 version, we'll see what happens --SDM
-
--- ToDo: quite a few occurrences of Format could usefully be replaced by Width
-
-data Format
- = II8
- | II16
- | II32
- | II64
- | FF32
- | FF64
- deriving (Show, Eq)
-
-
--- | Get the integer format of this width.
-intFormat :: Width -> Format
-intFormat width
- = case width of
- W8 -> II8
- W16 -> II16
- W32 -> II32
- W64 -> II64
- other -> sorry $ "The native code generator cannot " ++
- "produce code for Format.intFormat " ++ show other
- ++ "\n\tConsider using the llvm backend with -fllvm"
-
-
--- | Get the float format of this width.
-floatFormat :: Width -> Format
-floatFormat width
- = case width of
- W32 -> FF32
- W64 -> FF64
-
- other -> pprPanic "Format.floatFormat" (ppr other)
-
-
--- | Check if a format represents a floating point value.
-isFloatFormat :: Format -> Bool
-isFloatFormat format
- = case format of
- FF32 -> True
- FF64 -> True
- _ -> False
-
-
--- | Convert a Cmm type to a Format.
-cmmTypeFormat :: CmmType -> Format
-cmmTypeFormat ty
- | isFloatType ty = floatFormat (typeWidth ty)
- | otherwise = intFormat (typeWidth ty)
-
-
--- | Get the Width of a Format.
-formatToWidth :: Format -> Width
-formatToWidth format
- = case format of
- II8 -> W8
- II16 -> W16
- II32 -> W32
- II64 -> W64
- FF32 -> W32
- FF64 -> W64
-
-
-formatInBytes :: Format -> Int
-formatInBytes = widthInBytes . formatToWidth
diff --git a/compiler/nativeGen/Instruction.hs b/compiler/nativeGen/Instruction.hs
deleted file mode 100644
index 23c5ced1d8..0000000000
--- a/compiler/nativeGen/Instruction.hs
+++ /dev/null
@@ -1,202 +0,0 @@
-
-module Instruction (
- RegUsage(..),
- noUsage,
- GenBasicBlock(..), blockId,
- ListGraph(..),
- NatCmm,
- NatCmmDecl,
- NatBasicBlock,
- topInfoTable,
- entryBlocks,
- Instruction(..)
-)
-
-where
-
-import GhcPrelude
-
-import Reg
-
-import GHC.Cmm.BlockId
-import GHC.Cmm.Dataflow.Collections
-import GHC.Cmm.Dataflow.Label
-import GHC.Driver.Session
-import GHC.Cmm hiding (topInfoTable)
-import GHC.Platform
-
--- | Holds a list of source and destination registers used by a
--- particular instruction.
---
--- Machine registers that are pre-allocated to stgRegs are filtered
--- out, because they are uninteresting from a register allocation
--- standpoint. (We wouldn't want them to end up on the free list!)
---
--- As far as we are concerned, the fixed registers simply don't exist
--- (for allocation purposes, anyway).
---
-data RegUsage
- = RU [Reg] [Reg]
-
--- | No regs read or written to.
-noUsage :: RegUsage
-noUsage = RU [] []
-
--- Our flavours of the Cmm types
--- Type synonyms for Cmm populated with native code
-type NatCmm instr
- = GenCmmGroup
- RawCmmStatics
- (LabelMap RawCmmStatics)
- (ListGraph instr)
-
-type NatCmmDecl statics instr
- = GenCmmDecl
- statics
- (LabelMap RawCmmStatics)
- (ListGraph instr)
-
-
-type NatBasicBlock instr
- = GenBasicBlock instr
-
-
--- | Returns the info table associated with the CmmDecl's entry point,
--- if any.
-topInfoTable :: GenCmmDecl a (LabelMap i) (ListGraph b) -> Maybe i
-topInfoTable (CmmProc infos _ _ (ListGraph (b:_)))
- = mapLookup (blockId b) infos
-topInfoTable _
- = Nothing
-
--- | Return the list of BlockIds in a CmmDecl that are entry points
--- for this proc (i.e. they may be jumped to from outside this proc).
-entryBlocks :: GenCmmDecl a (LabelMap i) (ListGraph b) -> [BlockId]
-entryBlocks (CmmProc info _ _ (ListGraph code)) = entries
- where
- infos = mapKeys info
- entries = case code of
- [] -> infos
- BasicBlock entry _ : _ -- first block is the entry point
- | entry `elem` infos -> infos
- | otherwise -> entry : infos
-entryBlocks _ = []
-
--- | Common things that we can do with instructions, on all architectures.
--- These are used by the shared parts of the native code generator,
--- specifically the register allocators.
---
-class Instruction instr where
-
- -- | Get the registers that are being used by this instruction.
- -- regUsage doesn't need to do any trickery for jumps and such.
- -- Just state precisely the regs read and written by that insn.
- -- The consequences of control flow transfers, as far as register
- -- allocation goes, are taken care of by the register allocator.
- --
- regUsageOfInstr
- :: Platform
- -> instr
- -> RegUsage
-
-
- -- | Apply a given mapping to all the register references in this
- -- instruction.
- patchRegsOfInstr
- :: instr
- -> (Reg -> Reg)
- -> instr
-
-
- -- | Checks whether this instruction is a jump/branch instruction.
- -- One that can change the flow of control in a way that the
- -- register allocator needs to worry about.
- isJumpishInstr
- :: instr -> Bool
-
-
- -- | Give the possible destinations of this jump instruction.
- -- Must be defined for all jumpish instructions.
- jumpDestsOfInstr
- :: instr -> [BlockId]
-
-
- -- | Change the destination of this jump instruction.
- -- Used in the linear allocator when adding fixup blocks for join
- -- points.
- patchJumpInstr
- :: instr
- -> (BlockId -> BlockId)
- -> instr
-
-
- -- | An instruction to spill a register into a spill slot.
- mkSpillInstr
- :: DynFlags
- -> Reg -- ^ the reg to spill
- -> Int -- ^ the current stack delta
- -> Int -- ^ spill slot to use
- -> instr
-
-
- -- | An instruction to reload a register from a spill slot.
- mkLoadInstr
- :: DynFlags
- -> Reg -- ^ the reg to reload.
- -> Int -- ^ the current stack delta
- -> Int -- ^ the spill slot to use
- -> instr
-
- -- | See if this instruction is telling us the current C stack delta
- takeDeltaInstr
- :: instr
- -> Maybe Int
-
- -- | Check whether this instruction is some meta thing inserted into
- -- the instruction stream for other purposes.
- --
- -- Not something that has to be treated as a real machine instruction
- -- and have its registers allocated.
- --
- -- eg, comments, delta, ldata, etc.
- isMetaInstr
- :: instr
- -> Bool
-
-
-
- -- | Copy the value in a register to another one.
- -- Must work for all register classes.
- mkRegRegMoveInstr
- :: Platform
- -> Reg -- ^ source register
- -> Reg -- ^ destination register
- -> instr
-
- -- | Take the source and destination from this reg -> reg move instruction
- -- or Nothing if it's not one
- takeRegRegMoveInstr
- :: instr
- -> Maybe (Reg, Reg)
-
- -- | Make an unconditional jump instruction.
- -- For architectures with branch delay slots, its ok to put
- -- a NOP after the jump. Don't fill the delay slot with an
- -- instruction that references regs or you'll confuse the
- -- linear allocator.
- mkJumpInstr
- :: BlockId
- -> [instr]
-
-
- -- Subtract an amount from the C stack pointer
- mkStackAllocInstr
- :: Platform
- -> Int
- -> [instr]
-
- -- Add an amount to the C stack pointer
- mkStackDeallocInstr
- :: Platform
- -> Int
- -> [instr]
diff --git a/compiler/nativeGen/NCGMonad.hs b/compiler/nativeGen/NCGMonad.hs
deleted file mode 100644
index 5f2af49d4c..0000000000
--- a/compiler/nativeGen/NCGMonad.hs
+++ /dev/null
@@ -1,294 +0,0 @@
-{-# LANGUAGE CPP #-}
-{-# LANGUAGE DeriveFunctor #-}
-{-# LANGUAGE BangPatterns #-}
-
--- -----------------------------------------------------------------------------
---
--- (c) The University of Glasgow 1993-2004
---
--- The native code generator's monad.
---
--- -----------------------------------------------------------------------------
-
-module NCGMonad (
- NcgImpl(..),
- NatM_State(..), mkNatM_State,
-
- NatM, -- instance Monad
- initNat,
- addImportNat,
- addNodeBetweenNat,
- addImmediateSuccessorNat,
- updateCfgNat,
- getUniqueNat,
- mapAccumLNat,
- setDeltaNat,
- getDeltaNat,
- getThisModuleNat,
- getBlockIdNat,
- getNewLabelNat,
- getNewRegNat,
- getNewRegPairNat,
- getPicBaseMaybeNat,
- getPicBaseNat,
- getDynFlags,
- getModLoc,
- getFileId,
- getDebugBlock,
-
- DwarfFiles
-)
-
-where
-
-#include "HsVersions.h"
-
-import GhcPrelude
-
-import Reg
-import Format
-import TargetReg
-
-import GHC.Cmm.BlockId
-import GHC.Cmm.Dataflow.Collections
-import GHC.Cmm.Dataflow.Label
-import GHC.Cmm.CLabel ( CLabel )
-import GHC.Cmm.DebugBlock
-import FastString ( FastString )
-import UniqFM
-import UniqSupply
-import Unique ( Unique )
-import GHC.Driver.Session
-import Module
-
-import Control.Monad ( ap )
-
-import Instruction
-import Outputable (SDoc, pprPanic, ppr)
-import GHC.Cmm (RawCmmDecl, RawCmmStatics)
-import CFG
-
-data NcgImpl statics instr jumpDest = NcgImpl {
- cmmTopCodeGen :: RawCmmDecl -> NatM [NatCmmDecl statics instr],
- generateJumpTableForInstr :: instr -> Maybe (NatCmmDecl statics instr),
- getJumpDestBlockId :: jumpDest -> Maybe BlockId,
- canShortcut :: instr -> Maybe jumpDest,
- shortcutStatics :: (BlockId -> Maybe jumpDest) -> statics -> statics,
- shortcutJump :: (BlockId -> Maybe jumpDest) -> instr -> instr,
- pprNatCmmDecl :: NatCmmDecl statics instr -> SDoc,
- maxSpillSlots :: Int,
- allocatableRegs :: [RealReg],
- ncgExpandTop :: [NatCmmDecl statics instr] -> [NatCmmDecl statics instr],
- ncgAllocMoreStack :: Int -> NatCmmDecl statics instr
- -> UniqSM (NatCmmDecl statics instr, [(BlockId,BlockId)]),
- -- ^ The list of block ids records the redirected jumps to allow us to update
- -- the CFG.
- ncgMakeFarBranches :: LabelMap RawCmmStatics -> [NatBasicBlock instr] -> [NatBasicBlock instr],
- extractUnwindPoints :: [instr] -> [UnwindPoint],
- -- ^ given the instruction sequence of a block, produce a list of
- -- the block's 'UnwindPoint's
- -- See Note [What is this unwinding business?] in Debug
- -- and Note [Unwinding information in the NCG] in this module.
- invertCondBranches :: Maybe CFG -> LabelMap RawCmmStatics -> [NatBasicBlock instr]
- -> [NatBasicBlock instr]
- -- ^ Turn the sequence of `jcc l1; jmp l2` into `jncc l2; <block_l1>`
- -- when possible.
- }
-
-data NatM_State
- = NatM_State {
- natm_us :: UniqSupply,
- natm_delta :: Int,
- natm_imports :: [(CLabel)],
- natm_pic :: Maybe Reg,
- natm_dflags :: DynFlags,
- natm_this_module :: Module,
- natm_modloc :: ModLocation,
- natm_fileid :: DwarfFiles,
- natm_debug_map :: LabelMap DebugBlock,
- natm_cfg :: CFG
- -- ^ Having a CFG with additional information is essential for some
- -- operations. However we can't reconstruct all information once we
- -- generated instructions. So instead we update the CFG as we go.
- }
-
-type DwarfFiles = UniqFM (FastString, Int)
-
-newtype NatM result = NatM (NatM_State -> (result, NatM_State))
- deriving (Functor)
-
-unNat :: NatM a -> NatM_State -> (a, NatM_State)
-unNat (NatM a) = a
-
-mkNatM_State :: UniqSupply -> Int -> DynFlags -> Module -> ModLocation ->
- DwarfFiles -> LabelMap DebugBlock -> CFG -> NatM_State
-mkNatM_State us delta dflags this_mod
- = \loc dwf dbg cfg ->
- NatM_State
- { natm_us = us
- , natm_delta = delta
- , natm_imports = []
- , natm_pic = Nothing
- , natm_dflags = dflags
- , natm_this_module = this_mod
- , natm_modloc = loc
- , natm_fileid = dwf
- , natm_debug_map = dbg
- , natm_cfg = cfg
- }
-
-initNat :: NatM_State -> NatM a -> (a, NatM_State)
-initNat init_st m
- = case unNat m init_st of { (r,st) -> (r,st) }
-
-instance Applicative NatM where
- pure = returnNat
- (<*>) = ap
-
-instance Monad NatM where
- (>>=) = thenNat
-
-instance MonadUnique NatM where
- getUniqueSupplyM = NatM $ \st ->
- case splitUniqSupply (natm_us st) of
- (us1, us2) -> (us1, st {natm_us = us2})
-
- getUniqueM = NatM $ \st ->
- case takeUniqFromSupply (natm_us st) of
- (uniq, us') -> (uniq, st {natm_us = us'})
-
-thenNat :: NatM a -> (a -> NatM b) -> NatM b
-thenNat expr cont
- = NatM $ \st -> case unNat expr st of
- (result, st') -> unNat (cont result) st'
-
-returnNat :: a -> NatM a
-returnNat result
- = NatM $ \st -> (result, st)
-
-mapAccumLNat :: (acc -> x -> NatM (acc, y))
- -> acc
- -> [x]
- -> NatM (acc, [y])
-
-mapAccumLNat _ b []
- = return (b, [])
-mapAccumLNat f b (x:xs)
- = do (b__2, x__2) <- f b x
- (b__3, xs__2) <- mapAccumLNat f b__2 xs
- return (b__3, x__2:xs__2)
-
-getUniqueNat :: NatM Unique
-getUniqueNat = NatM $ \ st ->
- case takeUniqFromSupply $ natm_us st of
- (uniq, us') -> (uniq, st {natm_us = us'})
-
-instance HasDynFlags NatM where
- getDynFlags = NatM $ \ st -> (natm_dflags st, st)
-
-
-getDeltaNat :: NatM Int
-getDeltaNat = NatM $ \ st -> (natm_delta st, st)
-
-
-setDeltaNat :: Int -> NatM ()
-setDeltaNat delta = NatM $ \ st -> ((), st {natm_delta = delta})
-
-
-getThisModuleNat :: NatM Module
-getThisModuleNat = NatM $ \ st -> (natm_this_module st, st)
-
-
-addImportNat :: CLabel -> NatM ()
-addImportNat imp
- = NatM $ \ st -> ((), st {natm_imports = imp : natm_imports st})
-
-updateCfgNat :: (CFG -> CFG) -> NatM ()
-updateCfgNat f
- = NatM $ \ st -> let !cfg' = f (natm_cfg st)
- in ((), st { natm_cfg = cfg'})
-
--- | Record that we added a block between `from` and `old`.
-addNodeBetweenNat :: BlockId -> BlockId -> BlockId -> NatM ()
-addNodeBetweenNat from between to
- = do df <- getDynFlags
- let jmpWeight = fromIntegral . uncondWeight .
- cfgWeightInfo $ df
- updateCfgNat (updateCfg jmpWeight from between to)
- where
- -- When transforming A -> B to A -> A' -> B
- -- A -> A' keeps the old edge info while
- -- A' -> B gets the info for an unconditional
- -- jump.
- updateCfg weight from between old m
- | Just info <- getEdgeInfo from old m
- = addEdge from between info .
- addWeightEdge between old weight .
- delEdge from old $ m
- | otherwise
- = pprPanic "Failed to update cfg: Untracked edge" (ppr (from,to))
-
-
--- | Place `succ` after `block` and change any edges
--- block -> X to `succ` -> X
-addImmediateSuccessorNat :: BlockId -> BlockId -> NatM ()
-addImmediateSuccessorNat block succ
- = updateCfgNat (addImmediateSuccessor block succ)
-
-getBlockIdNat :: NatM BlockId
-getBlockIdNat
- = do u <- getUniqueNat
- return (mkBlockId u)
-
-
-getNewLabelNat :: NatM CLabel
-getNewLabelNat
- = blockLbl <$> getBlockIdNat
-
-
-getNewRegNat :: Format -> NatM Reg
-getNewRegNat rep
- = do u <- getUniqueNat
- dflags <- getDynFlags
- return (RegVirtual $ targetMkVirtualReg (targetPlatform dflags) u rep)
-
-
-getNewRegPairNat :: Format -> NatM (Reg,Reg)
-getNewRegPairNat rep
- = do u <- getUniqueNat
- dflags <- getDynFlags
- let vLo = targetMkVirtualReg (targetPlatform dflags) u rep
- let lo = RegVirtual $ targetMkVirtualReg (targetPlatform dflags) u rep
- let hi = RegVirtual $ getHiVirtualRegFromLo vLo
- return (lo, hi)
-
-
-getPicBaseMaybeNat :: NatM (Maybe Reg)
-getPicBaseMaybeNat
- = NatM (\state -> (natm_pic state, state))
-
-
-getPicBaseNat :: Format -> NatM Reg
-getPicBaseNat rep
- = do mbPicBase <- getPicBaseMaybeNat
- case mbPicBase of
- Just picBase -> return picBase
- Nothing
- -> do
- reg <- getNewRegNat rep
- NatM (\state -> (reg, state { natm_pic = Just reg }))
-
-getModLoc :: NatM ModLocation
-getModLoc
- = NatM $ \ st -> (natm_modloc st, st)
-
-getFileId :: FastString -> NatM Int
-getFileId f = NatM $ \st ->
- case lookupUFM (natm_fileid st) f of
- Just (_,n) -> (n, st)
- Nothing -> let n = 1 + sizeUFM (natm_fileid st)
- fids = addToUFM (natm_fileid st) f (f,n)
- in n `seq` fids `seq` (n, st { natm_fileid = fids })
-
-getDebugBlock :: Label -> NatM (Maybe DebugBlock)
-getDebugBlock l = NatM $ \st -> (mapLookup l (natm_debug_map st), st)
diff --git a/compiler/nativeGen/NOTES b/compiler/nativeGen/NOTES
deleted file mode 100644
index 9068a7fc2c..0000000000
--- a/compiler/nativeGen/NOTES
+++ /dev/null
@@ -1,41 +0,0 @@
-TODO in new NCG
-~~~~~~~~~~~~~~~
-
-- Are we being careful enough about narrowing those out-of-range CmmInts?
-
-- Register allocator:
- - fixup code
- - keep track of free stack slots
-
- Optimisations:
-
- - picking the assignment on entry to a block: better to defer this
- until we know all the assignments. In a loop, we should pick
- the assignment from the looping jump (fixpointing?), so that any
- fixup code ends up *outside* the loop. Otherwise, we should
- pick the assignment that results in the least fixup code.
-
-- splitting?
-
--- -----------------------------------------------------------------------------
--- x86 ToDos
-
-- x86 genCCall needs to tack on the @size for stdcalls (might not be in the
- foreignlabel).
-
-- x86: should really clean up that IMUL64 stuff, and tell the code gen about
- Intel imul instructions.
-
-- x86: we're not careful enough about making sure that we only use
- byte-addressable registers in byte instructions. Should we do it this
- way, or stick to using 32-bit registers everywhere?
-
-- Use SSE for floating point, optionally.
-
-------------------------------------------------------------------------------
--- Further optimisations:
-
-- We might be able to extend the scope of the inlining phase so it can
- skip over more statements that don't affect the value of the inlined
- expr.
-
diff --git a/compiler/nativeGen/PIC.hs b/compiler/nativeGen/PIC.hs
deleted file mode 100644
index 5c217f2fe6..0000000000
--- a/compiler/nativeGen/PIC.hs
+++ /dev/null
@@ -1,838 +0,0 @@
-{-
- This module handles generation of position independent code and
- dynamic-linking related issues for the native code generator.
-
- This depends both the architecture and OS, so we define it here
- instead of in one of the architecture specific modules.
-
- Things outside this module which are related to this:
-
- + module CLabel
- - PIC base label (pretty printed as local label 1)
- - DynamicLinkerLabels - several kinds:
- CodeStub, SymbolPtr, GotSymbolPtr, GotSymbolOffset
- - labelDynamic predicate
- + module Cmm
- - The GlobalReg datatype has a PicBaseReg constructor
- - The CmmLit datatype has a CmmLabelDiffOff constructor
- + codeGen & RTS
- - When tablesNextToCode, no absolute addresses are stored in info tables
- any more. Instead, offsets from the info label are used.
- - For Win32 only, SRTs might contain addresses of __imp_ symbol pointers
- because Win32 doesn't support external references in data sections.
- TODO: make sure this still works, it might be bitrotted
- + NCG
- - The cmmToCmm pass in AsmCodeGen calls cmmMakeDynamicReference for all
- labels.
- - nativeCodeGen calls pprImportedSymbol and pprGotDeclaration to output
- all the necessary stuff for imported symbols.
- - The NCG monad keeps track of a list of imported symbols.
- - MachCodeGen invokes initializePicBase to generate code to initialize
- the PIC base register when needed.
- - MachCodeGen calls cmmMakeDynamicReference whenever it uses a CLabel
- that wasn't in the original Cmm code (e.g. floating point literals).
--}
-
-module PIC (
- cmmMakeDynamicReference,
- CmmMakeDynamicReferenceM(..),
- ReferenceKind(..),
- needImportedSymbols,
- pprImportedSymbol,
- pprGotDeclaration,
-
- initializePicBase_ppc,
- initializePicBase_x86
-)
-
-where
-
-import GhcPrelude
-
-import qualified PPC.Instr as PPC
-import qualified PPC.Regs as PPC
-
-import qualified X86.Instr as X86
-
-import GHC.Platform
-import Instruction
-import Reg
-import NCGMonad
-
-
-import GHC.Cmm.Dataflow.Collections
-import GHC.Cmm
-import GHC.Cmm.CLabel ( CLabel, ForeignLabelSource(..), pprCLabel,
- mkDynamicLinkerLabel, DynamicLinkerLabelInfo(..),
- dynamicLinkerLabelInfo, mkPicBaseLabel,
- labelDynamic, externallyVisibleCLabel )
-
-import GHC.Cmm.CLabel ( mkForeignLabel )
-
-
-import BasicTypes
-import Module
-
-import Outputable
-
-import GHC.Driver.Session
-import FastString
-
-
-
---------------------------------------------------------------------------------
--- It gets called by the cmmToCmm pass for every CmmLabel in the Cmm
--- code. It does The Right Thing(tm) to convert the CmmLabel into a
--- position-independent, dynamic-linking-aware reference to the thing
--- in question.
--- Note that this also has to be called from MachCodeGen in order to
--- access static data like floating point literals (labels that were
--- created after the cmmToCmm pass).
--- The function must run in a monad that can keep track of imported symbols
--- A function for recording an imported symbol must be passed in:
--- - addImportCmmOpt for the CmmOptM monad
--- - addImportNat for the NatM monad.
-
-data ReferenceKind
- = DataReference
- | CallReference
- | JumpReference
- deriving(Eq)
-
-class Monad m => CmmMakeDynamicReferenceM m where
- addImport :: CLabel -> m ()
- getThisModule :: m Module
-
-instance CmmMakeDynamicReferenceM NatM where
- addImport = addImportNat
- getThisModule = getThisModuleNat
-
-cmmMakeDynamicReference
- :: CmmMakeDynamicReferenceM m
- => DynFlags
- -> ReferenceKind -- whether this is the target of a jump
- -> CLabel -- the label
- -> m CmmExpr
-
-cmmMakeDynamicReference dflags referenceKind lbl
- | Just _ <- dynamicLinkerLabelInfo lbl
- = return $ CmmLit $ CmmLabel lbl -- already processed it, pass through
-
- | otherwise
- = do this_mod <- getThisModule
- case howToAccessLabel
- dflags
- (platformArch $ targetPlatform dflags)
- (platformOS $ targetPlatform dflags)
- this_mod
- referenceKind lbl of
-
- AccessViaStub -> do
- let stub = mkDynamicLinkerLabel CodeStub lbl
- addImport stub
- return $ CmmLit $ CmmLabel stub
-
- AccessViaSymbolPtr -> do
- let symbolPtr = mkDynamicLinkerLabel SymbolPtr lbl
- addImport symbolPtr
- return $ CmmLoad (cmmMakePicReference dflags symbolPtr) (bWord dflags)
-
- AccessDirectly -> case referenceKind of
- -- for data, we might have to make some calculations:
- DataReference -> return $ cmmMakePicReference dflags lbl
- -- all currently supported processors support
- -- PC-relative branch and call instructions,
- -- so just jump there if it's a call or a jump
- _ -> return $ CmmLit $ CmmLabel lbl
-
-
--- -----------------------------------------------------------------------------
--- Create a position independent reference to a label.
--- (but do not bother with dynamic linking).
--- We calculate the label's address by adding some (platform-dependent)
--- offset to our base register; this offset is calculated by
--- the function picRelative in the platform-dependent part below.
-
-cmmMakePicReference :: DynFlags -> CLabel -> CmmExpr
-cmmMakePicReference dflags lbl
-
- -- Windows doesn't need PIC,
- -- everything gets relocated at runtime
- | OSMinGW32 <- platformOS $ targetPlatform dflags
- = CmmLit $ CmmLabel lbl
-
- | OSAIX <- platformOS $ targetPlatform dflags
- = CmmMachOp (MO_Add W32)
- [ CmmReg (CmmGlobal PicBaseReg)
- , CmmLit $ picRelative dflags
- (platformArch $ targetPlatform dflags)
- (platformOS $ targetPlatform dflags)
- lbl ]
-
- -- both ABI versions default to medium code model
- | ArchPPC_64 _ <- platformArch $ targetPlatform dflags
- = CmmMachOp (MO_Add W32) -- code model medium
- [ CmmReg (CmmGlobal PicBaseReg)
- , CmmLit $ picRelative dflags
- (platformArch $ targetPlatform dflags)
- (platformOS $ targetPlatform dflags)
- lbl ]
-
- | (positionIndependent dflags || gopt Opt_ExternalDynamicRefs dflags)
- && absoluteLabel lbl
- = CmmMachOp (MO_Add (wordWidth dflags))
- [ CmmReg (CmmGlobal PicBaseReg)
- , CmmLit $ picRelative dflags
- (platformArch $ targetPlatform dflags)
- (platformOS $ targetPlatform dflags)
- lbl ]
-
- | otherwise
- = CmmLit $ CmmLabel lbl
-
-
-absoluteLabel :: CLabel -> Bool
-absoluteLabel lbl
- = case dynamicLinkerLabelInfo lbl of
- Just (GotSymbolPtr, _) -> False
- Just (GotSymbolOffset, _) -> False
- _ -> True
-
-
---------------------------------------------------------------------------------
--- Knowledge about how special dynamic linker labels like symbol
--- pointers, code stubs and GOT offsets look like is located in the
--- module CLabel.
-
--- We have to decide which labels need to be accessed
--- indirectly or via a piece of stub code.
-data LabelAccessStyle
- = AccessViaStub
- | AccessViaSymbolPtr
- | AccessDirectly
-
-howToAccessLabel
- :: DynFlags -> Arch -> OS -> Module -> ReferenceKind -> CLabel -> LabelAccessStyle
-
-
--- Windows
--- In Windows speak, a "module" is a set of objects linked into the
--- same Portable Executable (PE) file. (both .exe and .dll files are PEs).
---
--- If we're compiling a multi-module program then symbols from other modules
--- are accessed by a symbol pointer named __imp_SYMBOL. At runtime we have the
--- following.
---
--- (in the local module)
--- __imp_SYMBOL: addr of SYMBOL
---
--- (in the other module)
--- SYMBOL: the real function / data.
---
--- To access the function at SYMBOL from our local module, we just need to
--- dereference the local __imp_SYMBOL.
---
--- If not compiling with -dynamic we assume that all our code will be linked
--- into the same .exe file. In this case we always access symbols directly,
--- and never use __imp_SYMBOL.
---
-howToAccessLabel dflags _ OSMinGW32 this_mod _ lbl
-
- -- Assume all symbols will be in the same PE, so just access them directly.
- | not (gopt Opt_ExternalDynamicRefs dflags)
- = AccessDirectly
-
- -- If the target symbol is in another PE we need to access it via the
- -- appropriate __imp_SYMBOL pointer.
- | labelDynamic dflags this_mod lbl
- = AccessViaSymbolPtr
-
- -- Target symbol is in the same PE as the caller, so just access it directly.
- | otherwise
- = AccessDirectly
-
-
--- Mach-O (Darwin, Mac OS X)
---
--- Indirect access is required in the following cases:
--- * things imported from a dynamic library
--- * (not on x86_64) data from a different module, if we're generating PIC code
--- It is always possible to access something indirectly,
--- even when it's not necessary.
---
-howToAccessLabel dflags arch OSDarwin this_mod DataReference lbl
- -- data access to a dynamic library goes via a symbol pointer
- | labelDynamic dflags this_mod lbl
- = AccessViaSymbolPtr
-
- -- when generating PIC code, all cross-module data references must
- -- must go via a symbol pointer, too, because the assembler
- -- cannot generate code for a label difference where one
- -- label is undefined. Doesn't apply t x86_64.
- -- Unfortunately, we don't know whether it's cross-module,
- -- so we do it for all externally visible labels.
- -- This is a slight waste of time and space, but otherwise
- -- we'd need to pass the current Module all the way in to
- -- this function.
- | arch /= ArchX86_64
- , positionIndependent dflags && externallyVisibleCLabel lbl
- = AccessViaSymbolPtr
-
- | otherwise
- = AccessDirectly
-
-howToAccessLabel dflags arch OSDarwin this_mod JumpReference lbl
- -- dyld code stubs don't work for tailcalls because the
- -- stack alignment is only right for regular calls.
- -- Therefore, we have to go via a symbol pointer:
- | arch == ArchX86 || arch == ArchX86_64
- , labelDynamic dflags this_mod lbl
- = AccessViaSymbolPtr
-
-
-howToAccessLabel dflags arch OSDarwin this_mod _ lbl
- -- Code stubs are the usual method of choice for imported code;
- -- not needed on x86_64 because Apple's new linker, ld64, generates
- -- them automatically.
- | arch /= ArchX86_64
- , labelDynamic dflags this_mod lbl
- = AccessViaStub
-
- | otherwise
- = AccessDirectly
-
-
-----------------------------------------------------------------------------
--- AIX
-
--- quite simple (for now)
-howToAccessLabel _dflags _arch OSAIX _this_mod kind _lbl
- = case kind of
- DataReference -> AccessViaSymbolPtr
- CallReference -> AccessDirectly
- JumpReference -> AccessDirectly
-
--- ELF (Linux)
---
--- ELF tries to pretend to the main application code that dynamic linking does
--- not exist. While this may sound convenient, it tends to mess things up in
--- very bad ways, so we have to be careful when we generate code for a non-PIE
--- main program (-dynamic but no -fPIC).
---
--- Indirect access is required for references to imported symbols
--- from position independent code. It is also required from the main program
--- when dynamic libraries containing Haskell code are used.
-
-howToAccessLabel _ (ArchPPC_64 _) os _ kind _
- | osElfTarget os
- = case kind of
- -- ELF PPC64 (powerpc64-linux), AIX, MacOS 9, BeOS/PPC
- DataReference -> AccessViaSymbolPtr
- -- RTLD does not generate stubs for function descriptors
- -- in tail calls. Create a symbol pointer and generate
- -- the code to load the function descriptor at the call site.
- JumpReference -> AccessViaSymbolPtr
- -- regular calls are handled by the runtime linker
- _ -> AccessDirectly
-
-howToAccessLabel dflags _ os _ _ _
- -- no PIC -> the dynamic linker does everything for us;
- -- if we don't dynamically link to Haskell code,
- -- it actually manages to do so without messing things up.
- | osElfTarget os
- , not (positionIndependent dflags) &&
- not (gopt Opt_ExternalDynamicRefs dflags)
- = AccessDirectly
-
-howToAccessLabel dflags arch os this_mod DataReference lbl
- | osElfTarget os
- = case () of
- -- A dynamic label needs to be accessed via a symbol pointer.
- _ | labelDynamic dflags this_mod lbl
- -> AccessViaSymbolPtr
-
- -- For PowerPC32 -fPIC, we have to access even static data
- -- via a symbol pointer (see below for an explanation why
- -- PowerPC32 Linux is especially broken).
- | arch == ArchPPC
- , positionIndependent dflags
- -> AccessViaSymbolPtr
-
- | otherwise
- -> AccessDirectly
-
-
- -- In most cases, we have to avoid symbol stubs on ELF, for the following reasons:
- -- on i386, the position-independent symbol stubs in the Procedure Linkage Table
- -- require the address of the GOT to be loaded into register %ebx on entry.
- -- The linker will take any reference to the symbol stub as a hint that
- -- the label in question is a code label. When linking executables, this
- -- will cause the linker to replace even data references to the label with
- -- references to the symbol stub.
-
- -- This leaves calling a (foreign) function from non-PIC code
- -- (AccessDirectly, because we get an implicit symbol stub)
- -- and calling functions from PIC code on non-i386 platforms (via a symbol stub)
-
-howToAccessLabel dflags arch os this_mod CallReference lbl
- | osElfTarget os
- , labelDynamic dflags this_mod lbl && not (positionIndependent dflags)
- = AccessDirectly
-
- | osElfTarget os
- , arch /= ArchX86
- , labelDynamic dflags this_mod lbl
- , positionIndependent dflags
- = AccessViaStub
-
-howToAccessLabel dflags _ os this_mod _ lbl
- | osElfTarget os
- = if labelDynamic dflags this_mod lbl
- then AccessViaSymbolPtr
- else AccessDirectly
-
--- all other platforms
-howToAccessLabel dflags _ _ _ _ _
- | not (positionIndependent dflags)
- = AccessDirectly
-
- | otherwise
- = panic "howToAccessLabel: PIC not defined for this platform"
-
-
-
--- -------------------------------------------------------------------
--- | Says what we have to add to our 'PIC base register' in order to
--- get the address of a label.
-
-picRelative :: DynFlags -> Arch -> OS -> CLabel -> CmmLit
-
--- Darwin, but not x86_64:
--- The PIC base register points to the PIC base label at the beginning
--- of the current CmmDecl. We just have to use a label difference to
--- get the offset.
--- We have already made sure that all labels that are not from the current
--- module are accessed indirectly ('as' can't calculate differences between
--- undefined labels).
-picRelative dflags arch OSDarwin lbl
- | arch /= ArchX86_64
- = CmmLabelDiffOff lbl mkPicBaseLabel 0 (wordWidth dflags)
-
--- On AIX we use an indirect local TOC anchored by 'gotLabel'.
--- This way we use up only one global TOC entry per compilation-unit
--- (this is quite similar to GCC's @-mminimal-toc@ compilation mode)
-picRelative dflags _ OSAIX lbl
- = CmmLabelDiffOff lbl gotLabel 0 (wordWidth dflags)
-
--- PowerPC Linux:
--- The PIC base register points to our fake GOT. Use a label difference
--- to get the offset.
--- We have made sure that *everything* is accessed indirectly, so this
--- is only used for offsets from the GOT to symbol pointers inside the
--- GOT.
-picRelative dflags ArchPPC os lbl
- | osElfTarget os
- = CmmLabelDiffOff lbl gotLabel 0 (wordWidth dflags)
-
-
--- Most Linux versions:
--- The PIC base register points to the GOT. Use foo@got for symbol
--- pointers, and foo@gotoff for everything else.
--- Linux and Darwin on x86_64:
--- The PIC base register is %rip, we use foo@gotpcrel for symbol pointers,
--- and a GotSymbolOffset label for other things.
--- For reasons of tradition, the symbol offset label is written as a plain label.
-picRelative _ arch os lbl
- | osElfTarget os || (os == OSDarwin && arch == ArchX86_64)
- = let result
- | Just (SymbolPtr, lbl') <- dynamicLinkerLabelInfo lbl
- = CmmLabel $ mkDynamicLinkerLabel GotSymbolPtr lbl'
-
- | otherwise
- = CmmLabel $ mkDynamicLinkerLabel GotSymbolOffset lbl
-
- in result
-
-picRelative _ _ _ _
- = panic "PositionIndependentCode.picRelative undefined for this platform"
-
-
-
---------------------------------------------------------------------------------
-
-needImportedSymbols :: DynFlags -> Arch -> OS -> Bool
-needImportedSymbols dflags arch os
- | os == OSDarwin
- , arch /= ArchX86_64
- = True
-
- | os == OSAIX
- = True
-
- -- PowerPC Linux: -fPIC or -dynamic
- | osElfTarget os
- , arch == ArchPPC
- = positionIndependent dflags || gopt Opt_ExternalDynamicRefs dflags
-
- -- PowerPC 64 Linux: always
- | osElfTarget os
- , arch == ArchPPC_64 ELF_V1 || arch == ArchPPC_64 ELF_V2
- = True
-
- -- i386 (and others?): -dynamic but not -fPIC
- | osElfTarget os
- , arch /= ArchPPC_64 ELF_V1 && arch /= ArchPPC_64 ELF_V2
- = gopt Opt_ExternalDynamicRefs dflags &&
- not (positionIndependent dflags)
-
- | otherwise
- = False
-
--- gotLabel
--- The label used to refer to our "fake GOT" from
--- position-independent code.
-gotLabel :: CLabel
-gotLabel
- -- HACK: this label isn't really foreign
- = mkForeignLabel
- (fsLit ".LCTOC1")
- Nothing ForeignLabelInThisPackage IsData
-
-
-
---------------------------------------------------------------------------------
--- We don't need to declare any offset tables.
--- However, for PIC on x86, we need a small helper function.
-pprGotDeclaration :: DynFlags -> Arch -> OS -> SDoc
-pprGotDeclaration dflags ArchX86 OSDarwin
- | positionIndependent dflags
- = vcat [
- text ".section __TEXT,__textcoal_nt,coalesced,no_toc",
- text ".weak_definition ___i686.get_pc_thunk.ax",
- text ".private_extern ___i686.get_pc_thunk.ax",
- text "___i686.get_pc_thunk.ax:",
- text "\tmovl (%esp), %eax",
- text "\tret" ]
-
-pprGotDeclaration _ _ OSDarwin
- = empty
-
--- Emit XCOFF TOC section
-pprGotDeclaration _ _ OSAIX
- = vcat $ [ text ".toc"
- , text ".tc ghc_toc_table[TC],.LCTOC1"
- , text ".csect ghc_toc_table[RW]"
- -- See Note [.LCTOC1 in PPC PIC code]
- , text ".set .LCTOC1,$+0x8000"
- ]
-
-
--- PPC 64 ELF v1 needs a Table Of Contents (TOC)
-pprGotDeclaration _ (ArchPPC_64 ELF_V1) _
- = text ".section \".toc\",\"aw\""
--- In ELF v2 we also need to tell the assembler that we want ABI
--- version 2. This would normally be done at the top of the file
--- right after a file directive, but I could not figure out how
--- to do that.
-pprGotDeclaration _ (ArchPPC_64 ELF_V2) _
- = vcat [ text ".abiversion 2",
- text ".section \".toc\",\"aw\""
- ]
-
--- Emit GOT declaration
--- Output whatever needs to be output once per .s file.
-pprGotDeclaration dflags arch os
- | osElfTarget os
- , arch /= ArchPPC_64 ELF_V1 && arch /= ArchPPC_64 ELF_V2
- , not (positionIndependent dflags)
- = empty
-
- | osElfTarget os
- , arch /= ArchPPC_64 ELF_V1 && arch /= ArchPPC_64 ELF_V2
- = vcat [
- -- See Note [.LCTOC1 in PPC PIC code]
- text ".section \".got2\",\"aw\"",
- text ".LCTOC1 = .+32768" ]
-
-pprGotDeclaration _ _ _
- = panic "pprGotDeclaration: no match"
-
-
---------------------------------------------------------------------------------
--- On Darwin, we have to generate our own stub code for lazy binding..
--- For each processor architecture, there are two versions, one for PIC
--- and one for non-PIC.
---
-
-pprImportedSymbol :: DynFlags -> Platform -> CLabel -> SDoc
-pprImportedSymbol dflags (Platform { platformMini = PlatformMini { platformMini_arch = ArchX86, platformMini_os = OSDarwin } }) importedLbl
- | Just (CodeStub, lbl) <- dynamicLinkerLabelInfo importedLbl
- = case positionIndependent dflags of
- False ->
- vcat [
- text ".symbol_stub",
- text "L" <> pprCLabel dflags lbl <> ptext (sLit "$stub:"),
- text "\t.indirect_symbol" <+> pprCLabel dflags lbl,
- text "\tjmp *L" <> pprCLabel dflags lbl
- <> text "$lazy_ptr",
- text "L" <> pprCLabel dflags lbl
- <> text "$stub_binder:",
- text "\tpushl $L" <> pprCLabel dflags lbl
- <> text "$lazy_ptr",
- text "\tjmp dyld_stub_binding_helper"
- ]
- True ->
- vcat [
- text ".section __TEXT,__picsymbolstub2,"
- <> text "symbol_stubs,pure_instructions,25",
- text "L" <> pprCLabel dflags lbl <> ptext (sLit "$stub:"),
- text "\t.indirect_symbol" <+> pprCLabel dflags lbl,
- text "\tcall ___i686.get_pc_thunk.ax",
- text "1:",
- text "\tmovl L" <> pprCLabel dflags lbl
- <> text "$lazy_ptr-1b(%eax),%edx",
- text "\tjmp *%edx",
- text "L" <> pprCLabel dflags lbl
- <> text "$stub_binder:",
- text "\tlea L" <> pprCLabel dflags lbl
- <> text "$lazy_ptr-1b(%eax),%eax",
- text "\tpushl %eax",
- text "\tjmp dyld_stub_binding_helper"
- ]
- $+$ vcat [ text ".section __DATA, __la_sym_ptr"
- <> (if positionIndependent dflags then int 2 else int 3)
- <> text ",lazy_symbol_pointers",
- text "L" <> pprCLabel dflags lbl <> ptext (sLit "$lazy_ptr:"),
- text "\t.indirect_symbol" <+> pprCLabel dflags lbl,
- text "\t.long L" <> pprCLabel dflags lbl
- <> text "$stub_binder"]
-
- | Just (SymbolPtr, lbl) <- dynamicLinkerLabelInfo importedLbl
- = vcat [
- text ".non_lazy_symbol_pointer",
- char 'L' <> pprCLabel dflags lbl <> text "$non_lazy_ptr:",
- text "\t.indirect_symbol" <+> pprCLabel dflags lbl,
- text "\t.long\t0"]
-
- | otherwise
- = empty
-
-
-pprImportedSymbol _ (Platform { platformMini = PlatformMini { platformMini_os = OSDarwin } }) _
- = empty
-
--- XCOFF / AIX
---
--- Similar to PPC64 ELF v1, there's dedicated TOC register (r2). To
--- workaround the limitation of a global TOC we use an indirect TOC
--- with the label `ghc_toc_table`.
---
--- See also GCC's `-mminimal-toc` compilation mode or
--- http://www.ibm.com/developerworks/rational/library/overview-toc-aix/
---
--- NB: No DSO-support yet
-
-pprImportedSymbol dflags (Platform { platformMini = PlatformMini { platformMini_os = OSAIX } }) importedLbl
- = case dynamicLinkerLabelInfo importedLbl of
- Just (SymbolPtr, lbl)
- -> vcat [
- text "LC.." <> pprCLabel dflags lbl <> char ':',
- text "\t.long" <+> pprCLabel dflags lbl ]
- _ -> empty
-
--- ELF / Linux
---
--- In theory, we don't need to generate any stubs or symbol pointers
--- by hand for Linux.
---
--- Reality differs from this in two areas.
---
--- 1) If we just use a dynamically imported symbol directly in a read-only
--- section of the main executable (as GCC does), ld generates R_*_COPY
--- relocations, which are fundamentally incompatible with reversed info
--- tables. Therefore, we need a table of imported addresses in a writable
--- section.
--- The "official" GOT mechanism (label@got) isn't intended to be used
--- in position dependent code, so we have to create our own "fake GOT"
--- when not Opt_PIC && WayDyn `elem` ways dflags.
---
--- 2) PowerPC Linux is just plain broken.
--- While it's theoretically possible to use GOT offsets larger
--- than 16 bit, the standard crt*.o files don't, which leads to
--- linker errors as soon as the GOT size exceeds 16 bit.
--- Also, the assembler doesn't support @gotoff labels.
--- In order to be able to use a larger GOT, we have to circumvent the
--- entire GOT mechanism and do it ourselves (this is also what GCC does).
-
-
--- When needImportedSymbols is defined,
--- the NCG will keep track of all DynamicLinkerLabels it uses
--- and output each of them using pprImportedSymbol.
-
-pprImportedSymbol dflags platform@(Platform { platformMini = PlatformMini { platformMini_arch = ArchPPC_64 _ } })
- importedLbl
- | osElfTarget (platformOS platform)
- = case dynamicLinkerLabelInfo importedLbl of
- Just (SymbolPtr, lbl)
- -> vcat [
- text ".section \".toc\", \"aw\"",
- text ".LC_" <> pprCLabel dflags lbl <> char ':',
- text "\t.quad" <+> pprCLabel dflags lbl ]
- _ -> empty
-
-pprImportedSymbol dflags platform importedLbl
- | osElfTarget (platformOS platform)
- = case dynamicLinkerLabelInfo importedLbl of
- Just (SymbolPtr, lbl)
- -> let symbolSize = case wordWidth dflags of
- W32 -> sLit "\t.long"
- W64 -> sLit "\t.quad"
- _ -> panic "Unknown wordRep in pprImportedSymbol"
-
- in vcat [
- text ".section \".got2\", \"aw\"",
- text ".LC_" <> pprCLabel dflags lbl <> char ':',
- ptext symbolSize <+> pprCLabel dflags lbl ]
-
- -- PLT code stubs are generated automatically by the dynamic linker.
- _ -> empty
-
-pprImportedSymbol _ _ _
- = panic "PIC.pprImportedSymbol: no match"
-
---------------------------------------------------------------------------------
--- Generate code to calculate the address that should be put in the
--- PIC base register.
--- This is called by MachCodeGen for every CmmProc that accessed the
--- PIC base register. It adds the appropriate instructions to the
--- top of the CmmProc.
-
--- It is assumed that the first NatCmmDecl in the input list is a Proc
--- and the rest are CmmDatas.
-
--- Darwin is simple: just fetch the address of a local label.
--- The FETCHPC pseudo-instruction is expanded to multiple instructions
--- during pretty-printing so that we don't have to deal with the
--- local label:
-
--- PowerPC version:
--- bcl 20,31,1f.
--- 1: mflr picReg
-
--- i386 version:
--- call 1f
--- 1: popl %picReg
-
-
-
--- Get a pointer to our own fake GOT, which is defined on a per-module basis.
--- This is exactly how GCC does it in linux.
-
-initializePicBase_ppc
- :: Arch -> OS -> Reg
- -> [NatCmmDecl RawCmmStatics PPC.Instr]
- -> NatM [NatCmmDecl RawCmmStatics PPC.Instr]
-
-initializePicBase_ppc ArchPPC os picReg
- (CmmProc info lab live (ListGraph blocks) : statics)
- | osElfTarget os
- = do
- let
- gotOffset = PPC.ImmConstantDiff
- (PPC.ImmCLbl gotLabel)
- (PPC.ImmCLbl mkPicBaseLabel)
-
- blocks' = case blocks of
- [] -> []
- (b:bs) -> fetchPC b : map maybeFetchPC bs
-
- maybeFetchPC b@(BasicBlock bID _)
- | bID `mapMember` info = fetchPC b
- | otherwise = b
-
- -- GCC does PIC prologs thusly:
- -- bcl 20,31,.L1
- -- .L1:
- -- mflr 30
- -- addis 30,30,.LCTOC1-.L1@ha
- -- addi 30,30,.LCTOC1-.L1@l
- -- TODO: below we use it over temporary register,
- -- it can and should be optimised by picking
- -- correct PIC reg.
- fetchPC (BasicBlock bID insns) =
- BasicBlock bID (PPC.FETCHPC picReg
- : PPC.ADDIS picReg picReg (PPC.HA gotOffset)
- : PPC.ADD picReg picReg
- (PPC.RIImm (PPC.LO gotOffset))
- : PPC.MR PPC.r30 picReg
- : insns)
-
- return (CmmProc info lab live (ListGraph blocks') : statics)
-
--------------------------------------------------------------------------
--- Load TOC into register 2
--- PowerPC 64-bit ELF ABI 2.0 requires the address of the callee
--- in register 12.
--- We pass the label to FETCHTOC and create a .localentry too.
--- TODO: Explain this better and refer to ABI spec!
-{-
-We would like to do approximately this, but spill slot allocation
-might be added before the first BasicBlock. That violates the ABI.
-
-For now we will emit the prologue code in the pretty printer,
-which is also what we do for ELF v1.
-initializePicBase_ppc (ArchPPC_64 ELF_V2) OSLinux picReg
- (CmmProc info lab live (ListGraph (entry:blocks)) : statics)
- = do
- bID <-getUniqueM
- return (CmmProc info lab live (ListGraph (b':entry:blocks))
- : statics)
- where BasicBlock entryID _ = entry
- b' = BasicBlock bID [PPC.FETCHTOC picReg lab,
- PPC.BCC PPC.ALWAYS entryID]
--}
-
-initializePicBase_ppc _ _ _ _
- = panic "initializePicBase_ppc: not needed"
-
-
--- We cheat a bit here by defining a pseudo-instruction named FETCHGOT
--- which pretty-prints as:
--- call 1f
--- 1: popl %picReg
--- addl __GLOBAL_OFFSET_TABLE__+.-1b, %picReg
--- (See PprMach.hs)
-
-initializePicBase_x86
- :: Arch -> OS -> Reg
- -> [NatCmmDecl (Alignment, RawCmmStatics) X86.Instr]
- -> NatM [NatCmmDecl (Alignment, RawCmmStatics) X86.Instr]
-
-initializePicBase_x86 ArchX86 os picReg
- (CmmProc info lab live (ListGraph blocks) : statics)
- | osElfTarget os
- = return (CmmProc info lab live (ListGraph blocks') : statics)
- where blocks' = case blocks of
- [] -> []
- (b:bs) -> fetchGOT b : map maybeFetchGOT bs
-
- -- we want to add a FETCHGOT instruction to the beginning of
- -- every block that is an entry point, which corresponds to
- -- the blocks that have entries in the info-table mapping.
- maybeFetchGOT b@(BasicBlock bID _)
- | bID `mapMember` info = fetchGOT b
- | otherwise = b
-
- fetchGOT (BasicBlock bID insns) =
- BasicBlock bID (X86.FETCHGOT picReg : insns)
-
-initializePicBase_x86 ArchX86 OSDarwin picReg
- (CmmProc info lab live (ListGraph (entry:blocks)) : statics)
- = return (CmmProc info lab live (ListGraph (block':blocks)) : statics)
-
- where BasicBlock bID insns = entry
- block' = BasicBlock bID (X86.FETCHPC picReg : insns)
-
-initializePicBase_x86 _ _ _ _
- = panic "initializePicBase_x86: not needed"
-
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
deleted file mode 100644
index ad47501981..0000000000
--- a/compiler/nativeGen/PPC/CodeGen.hs
+++ /dev/null
@@ -1,2453 +0,0 @@
-{-# LANGUAGE CPP, GADTs #-}
-
------------------------------------------------------------------------------
---
--- Generating machine code (instruction selection)
---
--- (c) The University of Glasgow 1996-2004
---
------------------------------------------------------------------------------
-
--- This is a big module, but, if you pay attention to
--- (a) the sectioning, and (b) the type signatures,
--- the structure should not be too overwhelming.
-
-module PPC.CodeGen (
- cmmTopCodeGen,
- generateJumpTableForInstr,
- InstrBlock
-)
-
-where
-
-#include "HsVersions.h"
-
--- NCG stuff:
-import GhcPrelude
-
-import GHC.Platform.Regs
-import PPC.Instr
-import PPC.Cond
-import PPC.Regs
-import CPrim
-import NCGMonad ( NatM, getNewRegNat, getNewLabelNat
- , getBlockIdNat, getPicBaseNat, getNewRegPairNat
- , getPicBaseMaybeNat )
-import Instruction
-import PIC
-import Format
-import RegClass
-import Reg
-import TargetReg
-import GHC.Platform
-
--- Our intermediate code:
-import GHC.Cmm.BlockId
-import GHC.Cmm.Ppr ( pprExpr )
-import GHC.Cmm
-import GHC.Cmm.Utils
-import GHC.Cmm.Switch
-import GHC.Cmm.CLabel
-import GHC.Cmm.Dataflow.Block
-import GHC.Cmm.Dataflow.Graph
-
--- The rest:
-import OrdList
-import Outputable
-import GHC.Driver.Session
-
-import Control.Monad ( mapAndUnzipM, when )
-import Data.Bits
-import Data.Word
-
-import BasicTypes
-import FastString
-import Util
-
--- -----------------------------------------------------------------------------
--- Top-level of the instruction selector
-
--- | 'InstrBlock's are the insn sequences generated by the insn selectors.
--- They are really trees of insns to facilitate fast appending, where a
--- left-to-right traversal (pre-order?) yields the insns in the correct
--- order.
-
-cmmTopCodeGen
- :: RawCmmDecl
- -> NatM [NatCmmDecl RawCmmStatics Instr]
-
-cmmTopCodeGen (CmmProc info lab live graph) = do
- let blocks = toBlockListEntryFirst graph
- (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
- dflags <- getDynFlags
- let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
- tops = proc : concat statics
- os = platformOS $ targetPlatform dflags
- arch = platformArch $ targetPlatform dflags
- case arch of
- ArchPPC | os == OSAIX -> return tops
- | otherwise -> do
- picBaseMb <- getPicBaseMaybeNat
- case picBaseMb of
- Just picBase -> initializePicBase_ppc arch os picBase tops
- Nothing -> return tops
- ArchPPC_64 ELF_V1 -> fixup_entry tops
- -- generating function descriptor is handled in
- -- pretty printer
- ArchPPC_64 ELF_V2 -> fixup_entry tops
- -- generating function prologue is handled in
- -- pretty printer
- _ -> panic "PPC.cmmTopCodeGen: unknown arch"
- where
- fixup_entry (CmmProc info lab live (ListGraph (entry:blocks)) : statics)
- = do
- let BasicBlock bID insns = entry
- bID' <- if lab == (blockLbl bID)
- then newBlockId
- else return bID
- let b' = BasicBlock bID' insns
- return (CmmProc info lab live (ListGraph (b':blocks)) : statics)
- fixup_entry _ = panic "cmmTopCodegen: Broken CmmProc"
-
-cmmTopCodeGen (CmmData sec dat) = do
- return [CmmData sec dat] -- no translation, we just use CmmStatic
-
-basicBlockCodeGen
- :: Block CmmNode C C
- -> NatM ( [NatBasicBlock Instr]
- , [NatCmmDecl RawCmmStatics Instr])
-
-basicBlockCodeGen block = do
- let (_, nodes, tail) = blockSplit block
- id = entryLabel block
- stmts = blockToList nodes
- mid_instrs <- stmtsToInstrs stmts
- tail_instrs <- stmtToInstrs tail
- let instrs = mid_instrs `appOL` tail_instrs
- -- code generation may introduce new basic block boundaries, which
- -- are indicated by the NEWBLOCK instruction. We must split up the
- -- instruction stream into basic blocks again. Also, we extract
- -- LDATAs here too.
- let
- (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
-
- mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
- = ([], BasicBlock id instrs : blocks, statics)
- mkBlocks (LDATA sec dat) (instrs,blocks,statics)
- = (instrs, blocks, CmmData sec dat:statics)
- mkBlocks instr (instrs,blocks,statics)
- = (instr:instrs, blocks, statics)
- return (BasicBlock id top : other_blocks, statics)
-
-stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
-stmtsToInstrs stmts
- = do instrss <- mapM stmtToInstrs stmts
- return (concatOL instrss)
-
-stmtToInstrs :: CmmNode e x -> NatM InstrBlock
-stmtToInstrs stmt = do
- dflags <- getDynFlags
- case stmt of
- CmmComment s -> return (unitOL (COMMENT s))
- CmmTick {} -> return nilOL
- CmmUnwind {} -> return nilOL
-
- CmmAssign reg src
- | isFloatType ty -> assignReg_FltCode format reg src
- | target32Bit (targetPlatform dflags) &&
- isWord64 ty -> assignReg_I64Code reg src
- | otherwise -> assignReg_IntCode format reg src
- where ty = cmmRegType dflags reg
- format = cmmTypeFormat ty
-
- CmmStore addr src
- | isFloatType ty -> assignMem_FltCode format addr src
- | target32Bit (targetPlatform dflags) &&
- isWord64 ty -> assignMem_I64Code addr src
- | otherwise -> assignMem_IntCode format addr src
- where ty = cmmExprType dflags src
- format = cmmTypeFormat ty
-
- CmmUnsafeForeignCall target result_regs args
- -> genCCall target result_regs args
-
- CmmBranch id -> genBranch id
- CmmCondBranch arg true false prediction -> do
- b1 <- genCondJump true arg prediction
- b2 <- genBranch false
- return (b1 `appOL` b2)
- CmmSwitch arg ids -> do dflags <- getDynFlags
- genSwitch dflags arg ids
- CmmCall { cml_target = arg
- , cml_args_regs = gregs } -> do
- dflags <- getDynFlags
- genJump arg (jumpRegs dflags gregs)
- _ ->
- panic "stmtToInstrs: statement should have been cps'd away"
-
-jumpRegs :: DynFlags -> [GlobalReg] -> [Reg]
-jumpRegs dflags gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ]
- where platform = targetPlatform dflags
-
---------------------------------------------------------------------------------
--- | 'InstrBlock's are the insn sequences generated by the insn selectors.
--- They are really trees of insns to facilitate fast appending, where a
--- left-to-right traversal yields the insns in the correct order.
---
-type InstrBlock
- = OrdList Instr
-
-
--- | Register's passed up the tree. If the stix code forces the register
--- to live in a pre-decided machine register, it comes out as @Fixed@;
--- otherwise, it comes out as @Any@, and the parent can decide which
--- register to put it in.
---
-data Register
- = Fixed Format Reg InstrBlock
- | Any Format (Reg -> InstrBlock)
-
-
-swizzleRegisterRep :: Register -> Format -> Register
-swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code
-swizzleRegisterRep (Any _ codefn) format = Any format codefn
-
-
--- | Grab the Reg for a CmmReg
-getRegisterReg :: Platform -> CmmReg -> Reg
-
-getRegisterReg _ (CmmLocal (LocalReg u pk))
- = RegVirtual $ mkVirtualReg u (cmmTypeFormat pk)
-
-getRegisterReg platform (CmmGlobal mid)
- = case globalRegMaybe platform mid of
- Just reg -> RegReal reg
- Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
- -- By this stage, the only MagicIds remaining should be the
- -- ones which map to a real machine register on this
- -- platform. Hence ...
-
--- | Convert a BlockId to some CmmStatic data
-jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
-jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
-jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
- where blockLabel = blockLbl blockid
-
-
-
--- -----------------------------------------------------------------------------
--- General things for putting together code sequences
-
--- Expand CmmRegOff. ToDo: should we do it this way around, or convert
--- CmmExprs into CmmRegOff?
-mangleIndexTree :: DynFlags -> CmmExpr -> CmmExpr
-mangleIndexTree dflags (CmmRegOff reg off)
- = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
- where width = typeWidth (cmmRegType dflags reg)
-
-mangleIndexTree _ _
- = panic "PPC.CodeGen.mangleIndexTree: no match"
-
--- -----------------------------------------------------------------------------
--- Code gen for 64-bit arithmetic on 32-bit platforms
-
-{-
-Simple support for generating 64-bit code (ie, 64 bit values and 64
-bit assignments) on 32-bit platforms. Unlike the main code generator
-we merely shoot for generating working code as simply as possible, and
-pay little attention to code quality. Specifically, there is no
-attempt to deal cleverly with the fixed-vs-floating register
-distinction; all values are generated into (pairs of) floating
-registers, even if this would mean some redundant reg-reg moves as a
-result. Only one of the VRegUniques is returned, since it will be
-of the VRegUniqueLo form, and the upper-half VReg can be determined
-by applying getHiVRegFromLo to it.
--}
-
-data ChildCode64 -- a.k.a "Register64"
- = ChildCode64
- InstrBlock -- code
- Reg -- the lower 32-bit temporary which contains the
- -- result; use getHiVRegFromLo to find the other
- -- VRegUnique. Rules of this simplified insn
- -- selection game are therefore that the returned
- -- Reg may be modified
-
-
--- | Compute an expression into a register, but
--- we don't mind which one it is.
-getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
-getSomeReg expr = do
- r <- getRegister expr
- case r of
- Any rep code -> do
- tmp <- getNewRegNat rep
- return (tmp, code tmp)
- Fixed _ reg code ->
- return (reg, code)
-
-getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock)
-getI64Amodes addrTree = do
- Amode hi_addr addr_code <- getAmode D addrTree
- case addrOffset hi_addr 4 of
- Just lo_addr -> return (hi_addr, lo_addr, addr_code)
- Nothing -> do (hi_ptr, code) <- getSomeReg addrTree
- return (AddrRegImm hi_ptr (ImmInt 0),
- AddrRegImm hi_ptr (ImmInt 4),
- code)
-
-
-assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
-assignMem_I64Code addrTree valueTree = do
- (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
- ChildCode64 vcode rlo <- iselExpr64 valueTree
- let
- rhi = getHiVRegFromLo rlo
-
- -- Big-endian store
- mov_hi = ST II32 rhi hi_addr
- mov_lo = ST II32 rlo lo_addr
- return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
-
-
-assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
-assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
- ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
- let
- r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
- r_dst_hi = getHiVRegFromLo r_dst_lo
- r_src_hi = getHiVRegFromLo r_src_lo
- mov_lo = MR r_dst_lo r_src_lo
- mov_hi = MR r_dst_hi r_src_hi
- return (
- vcode `snocOL` mov_lo `snocOL` mov_hi
- )
-
-assignReg_I64Code _ _
- = panic "assignReg_I64Code(powerpc): invalid lvalue"
-
-
-iselExpr64 :: CmmExpr -> NatM ChildCode64
-iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
- (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree
- (rlo, rhi) <- getNewRegPairNat II32
- let mov_hi = LD II32 rhi hi_addr
- mov_lo = LD II32 rlo lo_addr
- return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
- rlo
-
-iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
- = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
-
-iselExpr64 (CmmLit (CmmInt i _)) = do
- (rlo,rhi) <- getNewRegPairNat II32
- let
- half0 = fromIntegral (fromIntegral i :: Word16)
- half1 = fromIntegral (fromIntegral (i `shiftR` 16) :: Word16)
- half2 = fromIntegral (fromIntegral (i `shiftR` 32) :: Word16)
- half3 = fromIntegral (fromIntegral (i `shiftR` 48) :: Word16)
-
- code = toOL [
- LIS rlo (ImmInt half1),
- OR rlo rlo (RIImm $ ImmInt half0),
- LIS rhi (ImmInt half3),
- OR rhi rhi (RIImm $ ImmInt half2)
- ]
- return (ChildCode64 code rlo)
-
-iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
- ChildCode64 code1 r1lo <- iselExpr64 e1
- ChildCode64 code2 r2lo <- iselExpr64 e2
- (rlo,rhi) <- getNewRegPairNat II32
- let
- r1hi = getHiVRegFromLo r1lo
- r2hi = getHiVRegFromLo r2lo
- code = code1 `appOL`
- code2 `appOL`
- toOL [ ADDC rlo r1lo r2lo,
- ADDE rhi r1hi r2hi ]
- return (ChildCode64 code rlo)
-
-iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
- ChildCode64 code1 r1lo <- iselExpr64 e1
- ChildCode64 code2 r2lo <- iselExpr64 e2
- (rlo,rhi) <- getNewRegPairNat II32
- let
- r1hi = getHiVRegFromLo r1lo
- r2hi = getHiVRegFromLo r2lo
- code = code1 `appOL`
- code2 `appOL`
- toOL [ SUBFC rlo r2lo (RIReg r1lo),
- SUBFE rhi r2hi r1hi ]
- return (ChildCode64 code rlo)
-
-iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
- (expr_reg,expr_code) <- getSomeReg expr
- (rlo, rhi) <- getNewRegPairNat II32
- let mov_hi = LI rhi (ImmInt 0)
- mov_lo = MR rlo expr_reg
- return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
- rlo
-
-iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do
- (expr_reg,expr_code) <- getSomeReg expr
- (rlo, rhi) <- getNewRegPairNat II32
- let mov_hi = SRA II32 rhi expr_reg (RIImm (ImmInt 31))
- mov_lo = MR rlo expr_reg
- return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
- rlo
-iselExpr64 expr
- = pprPanic "iselExpr64(powerpc)" (pprExpr expr)
-
-
-
-getRegister :: CmmExpr -> NatM Register
-getRegister e = do dflags <- getDynFlags
- getRegister' dflags e
-
-getRegister' :: DynFlags -> CmmExpr -> NatM Register
-
-getRegister' dflags (CmmReg (CmmGlobal PicBaseReg))
- | OSAIX <- platformOS (targetPlatform dflags) = do
- let code dst = toOL [ LD II32 dst tocAddr ]
- tocAddr = AddrRegImm toc (ImmLit (text "ghc_toc_table[TC]"))
- return (Any II32 code)
- | target32Bit (targetPlatform dflags) = do
- reg <- getPicBaseNat $ archWordFormat (target32Bit (targetPlatform dflags))
- return (Fixed (archWordFormat (target32Bit (targetPlatform dflags)))
- reg nilOL)
- | otherwise = return (Fixed II64 toc nilOL)
-
-getRegister' dflags (CmmReg reg)
- = return (Fixed (cmmTypeFormat (cmmRegType dflags reg))
- (getRegisterReg (targetPlatform dflags) reg) nilOL)
-
-getRegister' dflags tree@(CmmRegOff _ _)
- = getRegister' dflags (mangleIndexTree dflags tree)
-
- -- for 32-bit architectures, support some 64 -> 32 bit conversions:
- -- TO_W_(x), TO_W_(x >> 32)
-
-getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32)
- [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
- | target32Bit (targetPlatform dflags) = do
- ChildCode64 code rlo <- iselExpr64 x
- return $ Fixed II32 (getHiVRegFromLo rlo) code
-
-getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32)
- [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
- | target32Bit (targetPlatform dflags) = do
- ChildCode64 code rlo <- iselExpr64 x
- return $ Fixed II32 (getHiVRegFromLo rlo) code
-
-getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32) [x])
- | target32Bit (targetPlatform dflags) = do
- ChildCode64 code rlo <- iselExpr64 x
- return $ Fixed II32 rlo code
-
-getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32) [x])
- | target32Bit (targetPlatform dflags) = do
- ChildCode64 code rlo <- iselExpr64 x
- return $ Fixed II32 rlo code
-
-getRegister' dflags (CmmLoad mem pk)
- | not (isWord64 pk) = do
- let platform = targetPlatform dflags
- Amode addr addr_code <- getAmode D mem
- let code dst = ASSERT((targetClassOfReg platform dst == RcDouble) == isFloatType pk)
- addr_code `snocOL` LD format dst addr
- return (Any format code)
- | not (target32Bit (targetPlatform dflags)) = do
- Amode addr addr_code <- getAmode DS mem
- let code dst = addr_code `snocOL` LD II64 dst addr
- return (Any II64 code)
-
- where format = cmmTypeFormat pk
-
--- catch simple cases of zero- or sign-extended load
-getRegister' _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do
- Amode addr addr_code <- getAmode D mem
- return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
-
-getRegister' _ (CmmMachOp (MO_XX_Conv W8 W32) [CmmLoad mem _]) = do
- Amode addr addr_code <- getAmode D mem
- return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr))
-
-getRegister' _ (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad mem _]) = do
- Amode addr addr_code <- getAmode D mem
- return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr))
-
-getRegister' _ (CmmMachOp (MO_XX_Conv W8 W64) [CmmLoad mem _]) = do
- Amode addr addr_code <- getAmode D mem
- return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr))
-
--- Note: there is no Load Byte Arithmetic instruction, so no signed case here
-
-getRegister' _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do
- Amode addr addr_code <- getAmode D mem
- return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr))
-
-getRegister' _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do
- Amode addr addr_code <- getAmode D mem
- return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr))
-
-getRegister' _ (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad mem _]) = do
- Amode addr addr_code <- getAmode D mem
- return (Any II64 (\dst -> addr_code `snocOL` LD II16 dst addr))
-
-getRegister' _ (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad mem _]) = do
- Amode addr addr_code <- getAmode D mem
- return (Any II64 (\dst -> addr_code `snocOL` LA II16 dst addr))
-
-getRegister' _ (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad mem _]) = do
- Amode addr addr_code <- getAmode D mem
- return (Any II64 (\dst -> addr_code `snocOL` LD II32 dst addr))
-
-getRegister' _ (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad mem _]) = do
- -- lwa is DS-form. See Note [Power instruction format]
- Amode addr addr_code <- getAmode DS mem
- return (Any II64 (\dst -> addr_code `snocOL` LA II32 dst addr))
-
-getRegister' dflags (CmmMachOp mop [x]) -- unary MachOps
- = case mop of
- MO_Not rep -> triv_ucode_int rep NOT
-
- MO_F_Neg w -> triv_ucode_float w FNEG
- MO_S_Neg w -> triv_ucode_int w NEG
-
- MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x
- MO_FF_Conv W32 W64 -> conversionNop FF64 x
-
- MO_FS_Conv from to -> coerceFP2Int from to x
- MO_SF_Conv from to -> coerceInt2FP from to x
-
- MO_SS_Conv from to
- | from >= to -> conversionNop (intFormat to) x
- | otherwise -> triv_ucode_int to (EXTS (intFormat from))
-
- MO_UU_Conv from to
- | from >= to -> conversionNop (intFormat to) x
- | otherwise -> clearLeft from to
-
- MO_XX_Conv _ to -> conversionNop (intFormat to) x
-
- _ -> panic "PPC.CodeGen.getRegister: no match"
-
- where
- triv_ucode_int width instr = trivialUCode (intFormat width) instr x
- triv_ucode_float width instr = trivialUCode (floatFormat width) instr x
-
- conversionNop new_format expr
- = do e_code <- getRegister' dflags expr
- return (swizzleRegisterRep e_code new_format)
-
- clearLeft from to
- = do (src1, code1) <- getSomeReg x
- let arch_fmt = intFormat (wordWidth dflags)
- arch_bits = widthInBits (wordWidth dflags)
- size = widthInBits from
- code dst = code1 `snocOL`
- CLRLI arch_fmt dst src1 (arch_bits - size)
- return (Any (intFormat to) code)
-
-getRegister' _ (CmmMachOp mop [x, y]) -- dyadic PrimOps
- = case mop of
- MO_F_Eq _ -> condFltReg EQQ x y
- MO_F_Ne _ -> condFltReg NE x y
- MO_F_Gt _ -> condFltReg GTT x y
- MO_F_Ge _ -> condFltReg GE x y
- MO_F_Lt _ -> condFltReg LTT x y
- MO_F_Le _ -> condFltReg LE x y
-
- MO_Eq rep -> condIntReg EQQ rep x y
- MO_Ne rep -> condIntReg NE rep x y
-
- MO_S_Gt rep -> condIntReg GTT rep x y
- MO_S_Ge rep -> condIntReg GE rep x y
- MO_S_Lt rep -> condIntReg LTT rep x y
- MO_S_Le rep -> condIntReg LE rep x y
-
- MO_U_Gt rep -> condIntReg GU rep x y
- MO_U_Ge rep -> condIntReg GEU rep x y
- MO_U_Lt rep -> condIntReg LU rep x y
- MO_U_Le rep -> condIntReg LEU rep x y
-
- MO_F_Add w -> triv_float w FADD
- MO_F_Sub w -> triv_float w FSUB
- MO_F_Mul w -> triv_float w FMUL
- MO_F_Quot w -> triv_float w FDIV
-
- -- optimize addition with 32-bit immediate
- -- (needed for PIC)
- MO_Add W32 ->
- case y of
- CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True imm
- -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep)
- CmmLit lit
- -> do
- (src, srcCode) <- getSomeReg x
- let imm = litToImm lit
- code dst = srcCode `appOL` toOL [
- ADDIS dst src (HA imm),
- ADD dst dst (RIImm (LO imm))
- ]
- return (Any II32 code)
- _ -> trivialCode W32 True ADD x y
-
- MO_Add rep -> trivialCode rep True ADD x y
- MO_Sub rep ->
- case y of
- CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm)
- -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep)
- _ -> case x of
- CmmLit (CmmInt imm _)
- | Just _ <- makeImmediate rep True imm
- -- subfi ('subtract from' with immediate) doesn't exist
- -> trivialCode rep True SUBFC y x
- _ -> trivialCodeNoImm' (intFormat rep) SUBF y x
-
- MO_Mul rep -> shiftMulCode rep True MULL x y
- MO_S_MulMayOflo rep -> do
- (src1, code1) <- getSomeReg x
- (src2, code2) <- getSomeReg y
- let
- format = intFormat rep
- code dst = code1 `appOL` code2
- `appOL` toOL [ MULLO format dst src1 src2
- , MFOV format dst
- ]
- return (Any format code)
-
- MO_S_Quot rep -> divCode rep True x y
- MO_U_Quot rep -> divCode rep False x y
-
- MO_S_Rem rep -> remainder rep True x y
- MO_U_Rem rep -> remainder rep False x y
-
- MO_And rep -> case y of
- (CmmLit (CmmInt imm _)) | imm == -8 || imm == -4
- -> do
- (src, srcCode) <- getSomeReg x
- let clear_mask = if imm == -4 then 2 else 3
- fmt = intFormat rep
- code dst = srcCode
- `appOL` unitOL (CLRRI fmt dst src clear_mask)
- return (Any fmt code)
- _ -> trivialCode rep False AND x y
- MO_Or rep -> trivialCode rep False OR x y
- MO_Xor rep -> trivialCode rep False XOR x y
-
- MO_Shl rep -> shiftMulCode rep False SL x y
- MO_S_Shr rep -> srCode rep True SRA x y
- MO_U_Shr rep -> srCode rep False SR x y
- _ -> panic "PPC.CodeGen.getRegister: no match"
-
- where
- triv_float :: Width -> (Format -> Reg -> Reg -> Reg -> Instr) -> NatM Register
- triv_float width instr = trivialCodeNoImm (floatFormat width) instr x y
-
- remainder :: Width -> Bool -> CmmExpr -> CmmExpr -> NatM Register
- remainder rep sgn x y = do
- let fmt = intFormat rep
- tmp <- getNewRegNat fmt
- code <- remainderCode rep sgn tmp x y
- return (Any fmt code)
-
-
-getRegister' _ (CmmLit (CmmInt i rep))
- | Just imm <- makeImmediate rep True i
- = let
- code dst = unitOL (LI dst imm)
- in
- return (Any (intFormat rep) code)
-
-getRegister' _ (CmmLit (CmmFloat f frep)) = do
- lbl <- getNewLabelNat
- dflags <- getDynFlags
- dynRef <- cmmMakeDynamicReference dflags DataReference lbl
- Amode addr addr_code <- getAmode D dynRef
- let format = floatFormat frep
- code dst =
- LDATA (Section ReadOnlyData lbl)
- (RawCmmStatics lbl [CmmStaticLit (CmmFloat f frep)])
- `consOL` (addr_code `snocOL` LD format dst addr)
- return (Any format code)
-
-getRegister' dflags (CmmLit lit)
- | target32Bit (targetPlatform dflags)
- = let rep = cmmLitType dflags lit
- imm = litToImm lit
- code dst = toOL [
- LIS dst (HA imm),
- ADD dst dst (RIImm (LO imm))
- ]
- in return (Any (cmmTypeFormat rep) code)
- | otherwise
- = do lbl <- getNewLabelNat
- dflags <- getDynFlags
- dynRef <- cmmMakeDynamicReference dflags DataReference lbl
- Amode addr addr_code <- getAmode D dynRef
- let rep = cmmLitType dflags lit
- format = cmmTypeFormat rep
- code dst =
- LDATA (Section ReadOnlyData lbl) (RawCmmStatics lbl [CmmStaticLit lit])
- `consOL` (addr_code `snocOL` LD format dst addr)
- return (Any format code)
-
-getRegister' _ other = pprPanic "getRegister(ppc)" (pprExpr other)
-
- -- extend?Rep: wrap integer expression of type `from`
- -- in a conversion to `to`
-extendSExpr :: Width -> Width -> CmmExpr -> CmmExpr
-extendSExpr from to x = CmmMachOp (MO_SS_Conv from to) [x]
-
-extendUExpr :: Width -> Width -> CmmExpr -> CmmExpr
-extendUExpr from to x = CmmMachOp (MO_UU_Conv from to) [x]
-
--- -----------------------------------------------------------------------------
--- The 'Amode' type: Memory addressing modes passed up the tree.
-
-data Amode
- = Amode AddrMode InstrBlock
-
-{-
-Now, given a tree (the argument to a CmmLoad) that references memory,
-produce a suitable addressing mode.
-
-A Rule of the Game (tm) for Amodes: use of the addr bit must
-immediately follow use of the code part, since the code part puts
-values in registers which the addr then refers to. So you can't put
-anything in between, lest it overwrite some of those registers. If
-you need to do some other computation between the code part and use of
-the addr bit, first store the effective address from the amode in a
-temporary, then do the other computation, and then use the temporary:
-
- code
- LEA amode, tmp
- ... other computation ...
- ... (tmp) ...
--}
-
-{- Note [Power instruction format]
-In some instructions the 16 bit offset must be a multiple of 4, i.e.
-the two least significant bits must be zero. The "Power ISA" specification
-calls these instruction formats "DS-FORM" and the instructions with
-arbitrary 16 bit offsets are "D-FORM".
-
-The Power ISA specification document can be obtained from www.power.org.
--}
-data InstrForm = D | DS
-
-getAmode :: InstrForm -> CmmExpr -> NatM Amode
-getAmode inf tree@(CmmRegOff _ _)
- = do dflags <- getDynFlags
- getAmode inf (mangleIndexTree dflags tree)
-
-getAmode _ (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)])
- | Just off <- makeImmediate W32 True (-i)
- = do
- (reg, code) <- getSomeReg x
- return (Amode (AddrRegImm reg off) code)
-
-
-getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)])
- | Just off <- makeImmediate W32 True i
- = do
- (reg, code) <- getSomeReg x
- return (Amode (AddrRegImm reg off) code)
-
-getAmode D (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
- | Just off <- makeImmediate W64 True (-i)
- = do
- (reg, code) <- getSomeReg x
- return (Amode (AddrRegImm reg off) code)
-
-
-getAmode D (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
- | Just off <- makeImmediate W64 True i
- = do
- (reg, code) <- getSomeReg x
- return (Amode (AddrRegImm reg off) code)
-
-getAmode DS (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)])
- | Just off <- makeImmediate W64 True (-i)
- = do
- (reg, code) <- getSomeReg x
- (reg', off', code') <-
- if i `mod` 4 == 0
- then do return (reg, off, code)
- else do
- tmp <- getNewRegNat II64
- return (tmp, ImmInt 0,
- code `snocOL` ADD tmp reg (RIImm off))
- return (Amode (AddrRegImm reg' off') code')
-
-getAmode DS (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)])
- | Just off <- makeImmediate W64 True i
- = do
- (reg, code) <- getSomeReg x
- (reg', off', code') <-
- if i `mod` 4 == 0
- then do return (reg, off, code)
- else do
- tmp <- getNewRegNat II64
- return (tmp, ImmInt 0,
- code `snocOL` ADD tmp reg (RIImm off))
- return (Amode (AddrRegImm reg' off') code')
-
- -- optimize addition with 32-bit immediate
- -- (needed for PIC)
-getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit lit])
- = do
- dflags <- getDynFlags
- (src, srcCode) <- getSomeReg x
- let imm = litToImm lit
- case () of
- _ | OSAIX <- platformOS (targetPlatform dflags)
- , isCmmLabelType lit ->
- -- HA16/LO16 relocations on labels not supported on AIX
- return (Amode (AddrRegImm src imm) srcCode)
- | otherwise -> do
- tmp <- getNewRegNat II32
- let code = srcCode `snocOL` ADDIS tmp src (HA imm)
- return (Amode (AddrRegImm tmp (LO imm)) code)
- where
- isCmmLabelType (CmmLabel {}) = True
- isCmmLabelType (CmmLabelOff {}) = True
- isCmmLabelType (CmmLabelDiffOff {}) = True
- isCmmLabelType _ = False
-
-getAmode _ (CmmLit lit)
- = do
- dflags <- getDynFlags
- case platformArch $ targetPlatform dflags of
- ArchPPC -> do
- tmp <- getNewRegNat II32
- let imm = litToImm lit
- code = unitOL (LIS tmp (HA imm))
- return (Amode (AddrRegImm tmp (LO imm)) code)
- _ -> do -- TODO: Load from TOC,
- -- see getRegister' _ (CmmLit lit)
- tmp <- getNewRegNat II64
- let imm = litToImm lit
- code = toOL [
- LIS tmp (HIGHESTA imm),
- OR tmp tmp (RIImm (HIGHERA imm)),
- SL II64 tmp tmp (RIImm (ImmInt 32)),
- ORIS tmp tmp (HA imm)
- ]
- return (Amode (AddrRegImm tmp (LO imm)) code)
-
-getAmode _ (CmmMachOp (MO_Add W32) [x, y])
- = do
- (regX, codeX) <- getSomeReg x
- (regY, codeY) <- getSomeReg y
- return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
-
-getAmode _ (CmmMachOp (MO_Add W64) [x, y])
- = do
- (regX, codeX) <- getSomeReg x
- (regY, codeY) <- getSomeReg y
- return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
-
-getAmode _ other
- = do
- (reg, code) <- getSomeReg other
- let
- off = ImmInt 0
- return (Amode (AddrRegImm reg off) code)
-
-
--- The 'CondCode' type: Condition codes passed up the tree.
-data CondCode
- = CondCode Bool Cond InstrBlock
-
--- Set up a condition code for a conditional branch.
-
-getCondCode :: CmmExpr -> NatM CondCode
-
--- almost the same as everywhere else - but we need to
--- extend small integers to 32 bit or 64 bit first
-
-getCondCode (CmmMachOp mop [x, y])
- = do
- case mop of
- MO_F_Eq W32 -> condFltCode EQQ x y
- MO_F_Ne W32 -> condFltCode NE x y
- MO_F_Gt W32 -> condFltCode GTT x y
- MO_F_Ge W32 -> condFltCode GE x y
- MO_F_Lt W32 -> condFltCode LTT x y
- MO_F_Le W32 -> condFltCode LE x y
-
- MO_F_Eq W64 -> condFltCode EQQ x y
- MO_F_Ne W64 -> condFltCode NE x y
- MO_F_Gt W64 -> condFltCode GTT x y
- MO_F_Ge W64 -> condFltCode GE x y
- MO_F_Lt W64 -> condFltCode LTT x y
- MO_F_Le W64 -> condFltCode LE x y
-
- MO_Eq rep -> condIntCode EQQ rep x y
- MO_Ne rep -> condIntCode NE rep x y
-
- MO_S_Gt rep -> condIntCode GTT rep x y
- MO_S_Ge rep -> condIntCode GE rep x y
- MO_S_Lt rep -> condIntCode LTT rep x y
- MO_S_Le rep -> condIntCode LE rep x y
-
- MO_U_Gt rep -> condIntCode GU rep x y
- MO_U_Ge rep -> condIntCode GEU rep x y
- MO_U_Lt rep -> condIntCode LU rep x y
- MO_U_Le rep -> condIntCode LEU rep x y
-
- _ -> pprPanic "getCondCode(powerpc)" (pprMachOp mop)
-
-getCondCode _ = panic "getCondCode(2)(powerpc)"
-
-
--- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
--- passed back up the tree.
-
-condIntCode :: Cond -> Width -> CmmExpr -> CmmExpr -> NatM CondCode
-condIntCode cond width x y = do
- dflags <- getDynFlags
- condIntCode' (target32Bit (targetPlatform dflags)) cond width x y
-
-condIntCode' :: Bool -> Cond -> Width -> CmmExpr -> CmmExpr -> NatM CondCode
-
--- simple code for 64-bit on 32-bit platforms
-condIntCode' True cond W64 x y
- | condUnsigned cond
- = do
- ChildCode64 code_x x_lo <- iselExpr64 x
- ChildCode64 code_y y_lo <- iselExpr64 y
- let x_hi = getHiVRegFromLo x_lo
- y_hi = getHiVRegFromLo y_lo
- end_lbl <- getBlockIdNat
- let code = code_x `appOL` code_y `appOL` toOL
- [ CMPL II32 x_hi (RIReg y_hi)
- , BCC NE end_lbl Nothing
- , CMPL II32 x_lo (RIReg y_lo)
- , BCC ALWAYS end_lbl Nothing
-
- , NEWBLOCK end_lbl
- ]
- return (CondCode False cond code)
- | otherwise
- = do
- ChildCode64 code_x x_lo <- iselExpr64 x
- ChildCode64 code_y y_lo <- iselExpr64 y
- let x_hi = getHiVRegFromLo x_lo
- y_hi = getHiVRegFromLo y_lo
- end_lbl <- getBlockIdNat
- cmp_lo <- getBlockIdNat
- let code = code_x `appOL` code_y `appOL` toOL
- [ CMP II32 x_hi (RIReg y_hi)
- , BCC NE end_lbl Nothing
- , CMP II32 x_hi (RIImm (ImmInt 0))
- , BCC LE cmp_lo Nothing
- , CMPL II32 x_lo (RIReg y_lo)
- , BCC ALWAYS end_lbl Nothing
- , NEWBLOCK cmp_lo
- , CMPL II32 y_lo (RIReg x_lo)
- , BCC ALWAYS end_lbl Nothing
-
- , NEWBLOCK end_lbl
- ]
- return (CondCode False cond code)
-
--- optimize pointer tag checks. Operation andi. sets condition register
--- so cmpi ..., 0 is redundant.
-condIntCode' _ cond _ (CmmMachOp (MO_And _) [x, CmmLit (CmmInt imm rep)])
- (CmmLit (CmmInt 0 _))
- | not $ condUnsigned cond,
- Just src2 <- makeImmediate rep False imm
- = do
- (src1, code) <- getSomeReg x
- let code' = code `snocOL` AND r0 src1 (RIImm src2)
- return (CondCode False cond code')
-
-condIntCode' _ cond width x (CmmLit (CmmInt y rep))
- | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y
- = do
- let op_len = max W32 width
- let extend = extendSExpr width op_len
- (src1, code) <- getSomeReg (extend x)
- let format = intFormat op_len
- code' = code `snocOL`
- (if condUnsigned cond then CMPL else CMP) format src1 (RIImm src2)
- return (CondCode False cond code')
-
-condIntCode' _ cond width x y = do
- let op_len = max W32 width
- let extend = if condUnsigned cond then extendUExpr width op_len
- else extendSExpr width op_len
- (src1, code1) <- getSomeReg (extend x)
- (src2, code2) <- getSomeReg (extend y)
- let format = intFormat op_len
- code' = code1 `appOL` code2 `snocOL`
- (if condUnsigned cond then CMPL else CMP) format src1 (RIReg src2)
- return (CondCode False cond code')
-
-condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
-condFltCode cond x y = do
- (src1, code1) <- getSomeReg x
- (src2, code2) <- getSomeReg y
- let
- code' = code1 `appOL` code2 `snocOL` FCMP src1 src2
- code'' = case cond of -- twiddle CR to handle unordered case
- GE -> code' `snocOL` CRNOR ltbit eqbit gtbit
- LE -> code' `snocOL` CRNOR gtbit eqbit ltbit
- _ -> code'
- where
- ltbit = 0 ; eqbit = 2 ; gtbit = 1
- return (CondCode True cond code'')
-
-
-
--- -----------------------------------------------------------------------------
--- Generating assignments
-
--- Assignments are really at the heart of the whole code generation
--- business. Almost all top-level nodes of any real importance are
--- assignments, which correspond to loads, stores, or register
--- transfers. If we're really lucky, some of the register transfers
--- will go away, because we can use the destination register to
--- complete the code generation for the right hand side. This only
--- fails when the right hand side is forced into a fixed register
--- (e.g. the result of a call).
-
-assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
-assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
-
-assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
-assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
-
-assignMem_IntCode pk addr src = do
- (srcReg, code) <- getSomeReg src
- Amode dstAddr addr_code <- case pk of
- II64 -> getAmode DS addr
- _ -> getAmode D addr
- return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
-
--- dst is a reg, but src could be anything
-assignReg_IntCode _ reg src
- = do
- dflags <- getDynFlags
- let dst = getRegisterReg (targetPlatform dflags) reg
- r <- getRegister src
- return $ case r of
- Any _ code -> code dst
- Fixed _ freg fcode -> fcode `snocOL` MR dst freg
-
-
-
--- Easy, isn't it?
-assignMem_FltCode = assignMem_IntCode
-assignReg_FltCode = assignReg_IntCode
-
-
-
-genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock
-
-genJump (CmmLit (CmmLabel lbl)) regs
- = return (unitOL $ JMP lbl regs)
-
-genJump tree gregs
- = do
- dflags <- getDynFlags
- genJump' tree (platformToGCP (targetPlatform dflags)) gregs
-
-genJump' :: CmmExpr -> GenCCallPlatform -> [Reg] -> NatM InstrBlock
-
-genJump' tree (GCP64ELF 1) regs
- = do
- (target,code) <- getSomeReg tree
- return (code
- `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 0))
- `snocOL` LD II64 toc (AddrRegImm target (ImmInt 8))
- `snocOL` MTCTR r11
- `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 16))
- `snocOL` BCTR [] Nothing regs)
-
-genJump' tree (GCP64ELF 2) regs
- = do
- (target,code) <- getSomeReg tree
- return (code
- `snocOL` MR r12 target
- `snocOL` MTCTR r12
- `snocOL` BCTR [] Nothing regs)
-
-genJump' tree _ regs
- = do
- (target,code) <- getSomeReg tree
- return (code `snocOL` MTCTR target `snocOL` BCTR [] Nothing regs)
-
--- -----------------------------------------------------------------------------
--- Unconditional branches
-genBranch :: BlockId -> NatM InstrBlock
-genBranch = return . toOL . mkJumpInstr
-
-
--- -----------------------------------------------------------------------------
--- Conditional jumps
-
-{-
-Conditional jumps are always to local labels, so we can use branch
-instructions. We peek at the arguments to decide what kind of
-comparison to do.
--}
-
-
-genCondJump
- :: BlockId -- the branch target
- -> CmmExpr -- the condition on which to branch
- -> Maybe Bool
- -> NatM InstrBlock
-
-genCondJump id bool prediction = do
- CondCode _ cond code <- getCondCode bool
- return (code `snocOL` BCC cond id prediction)
-
-
-
--- -----------------------------------------------------------------------------
--- Generating C calls
-
--- Now the biggest nightmare---calls. Most of the nastiness is buried in
--- @get_arg@, which moves the arguments to the correct registers/stack
--- locations. Apart from that, the code is easy.
-
-genCCall :: ForeignTarget -- function to call
- -> [CmmFormal] -- where to put the result
- -> [CmmActual] -- arguments (of mixed type)
- -> NatM InstrBlock
-genCCall (PrimTarget MO_ReadBarrier) _ _
- = return $ unitOL LWSYNC
-genCCall (PrimTarget MO_WriteBarrier) _ _
- = return $ unitOL LWSYNC
-
-genCCall (PrimTarget MO_Touch) _ _
- = return $ nilOL
-
-genCCall (PrimTarget (MO_Prefetch_Data _)) _ _
- = return $ nilOL
-
-genCCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n]
- = do dflags <- getDynFlags
- let platform = targetPlatform dflags
- fmt = intFormat width
- reg_dst = getRegisterReg platform (CmmLocal dst)
- (instr, n_code) <- case amop of
- AMO_Add -> getSomeRegOrImm ADD True reg_dst
- AMO_Sub -> case n of
- CmmLit (CmmInt i _)
- | Just imm <- makeImmediate width True (-i)
- -> return (ADD reg_dst reg_dst (RIImm imm), nilOL)
- _
- -> do
- (n_reg, n_code) <- getSomeReg n
- return (SUBF reg_dst n_reg reg_dst, n_code)
- AMO_And -> getSomeRegOrImm AND False reg_dst
- AMO_Nand -> do (n_reg, n_code) <- getSomeReg n
- return (NAND reg_dst reg_dst n_reg, n_code)
- AMO_Or -> getSomeRegOrImm OR False reg_dst
- AMO_Xor -> getSomeRegOrImm XOR False reg_dst
- Amode addr_reg addr_code <- getAmodeIndex addr
- lbl_retry <- getBlockIdNat
- return $ n_code `appOL` addr_code
- `appOL` toOL [ HWSYNC
- , BCC ALWAYS lbl_retry Nothing
-
- , NEWBLOCK lbl_retry
- , LDR fmt reg_dst addr_reg
- , instr
- , STC fmt reg_dst addr_reg
- , BCC NE lbl_retry (Just False)
- , ISYNC
- ]
- where
- getAmodeIndex (CmmMachOp (MO_Add _) [x, y])
- = do
- (regX, codeX) <- getSomeReg x
- (regY, codeY) <- getSomeReg y
- return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
- getAmodeIndex other
- = do
- (reg, code) <- getSomeReg other
- return (Amode (AddrRegReg r0 reg) code) -- NB: r0 is 0 here!
- getSomeRegOrImm op sign dst
- = case n of
- CmmLit (CmmInt i _) | Just imm <- makeImmediate width sign i
- -> return (op dst dst (RIImm imm), nilOL)
- _
- -> do
- (n_reg, n_code) <- getSomeReg n
- return (op dst dst (RIReg n_reg), n_code)
-
-genCCall (PrimTarget (MO_AtomicRead width)) [dst] [addr]
- = do dflags <- getDynFlags
- let platform = targetPlatform dflags
- fmt = intFormat width
- reg_dst = getRegisterReg platform (CmmLocal dst)
- form = if widthInBits width == 64 then DS else D
- Amode addr_reg addr_code <- getAmode form addr
- lbl_end <- getBlockIdNat
- return $ addr_code `appOL` toOL [ HWSYNC
- , LD fmt reg_dst addr_reg
- , CMP fmt reg_dst (RIReg reg_dst)
- , BCC NE lbl_end (Just False)
- , BCC ALWAYS lbl_end Nothing
- -- See Note [Seemingly useless cmp and bne]
- , NEWBLOCK lbl_end
- , ISYNC
- ]
-
--- Note [Seemingly useless cmp and bne]
--- In Power ISA, Book II, Section 4.4.1, Instruction Synchronize Instruction
--- the second paragraph says that isync may complete before storage accesses
--- "associated" with a preceding instruction have been performed. The cmp
--- operation and the following bne introduce a data and control dependency
--- on the load instruction (See also Power ISA, Book II, Appendix B.2.3, Safe
--- Fetch).
--- This is also what gcc does.
-
-
-genCCall (PrimTarget (MO_AtomicWrite width)) [] [addr, val] = do
- code <- assignMem_IntCode (intFormat width) addr val
- return $ unitOL(HWSYNC) `appOL` code
-
-genCCall (PrimTarget (MO_Clz width)) [dst] [src]
- = do dflags <- getDynFlags
- let platform = targetPlatform dflags
- reg_dst = getRegisterReg platform (CmmLocal dst)
- if target32Bit platform && width == W64
- then do
- ChildCode64 code vr_lo <- iselExpr64 src
- lbl1 <- getBlockIdNat
- lbl2 <- getBlockIdNat
- lbl3 <- getBlockIdNat
- let vr_hi = getHiVRegFromLo vr_lo
- cntlz = toOL [ CMPL II32 vr_hi (RIImm (ImmInt 0))
- , BCC NE lbl2 Nothing
- , BCC ALWAYS lbl1 Nothing
-
- , NEWBLOCK lbl1
- , CNTLZ II32 reg_dst vr_lo
- , ADD reg_dst reg_dst (RIImm (ImmInt 32))
- , BCC ALWAYS lbl3 Nothing
-
- , NEWBLOCK lbl2
- , CNTLZ II32 reg_dst vr_hi
- , BCC ALWAYS lbl3 Nothing
-
- , NEWBLOCK lbl3
- ]
- return $ code `appOL` cntlz
- else do
- let format = if width == W64 then II64 else II32
- (s_reg, s_code) <- getSomeReg src
- (pre, reg , post) <-
- case width of
- W64 -> return (nilOL, s_reg, nilOL)
- W32 -> return (nilOL, s_reg, nilOL)
- W16 -> do
- reg_tmp <- getNewRegNat format
- return
- ( unitOL $ AND reg_tmp s_reg (RIImm (ImmInt 65535))
- , reg_tmp
- , unitOL $ ADD reg_dst reg_dst (RIImm (ImmInt (-16)))
- )
- W8 -> do
- reg_tmp <- getNewRegNat format
- return
- ( unitOL $ AND reg_tmp s_reg (RIImm (ImmInt 255))
- , reg_tmp
- , unitOL $ ADD reg_dst reg_dst (RIImm (ImmInt (-24)))
- )
- _ -> panic "genCall: Clz wrong format"
- let cntlz = unitOL (CNTLZ format reg_dst reg)
- return $ s_code `appOL` pre `appOL` cntlz `appOL` post
-
-genCCall (PrimTarget (MO_Ctz width)) [dst] [src]
- = do dflags <- getDynFlags
- let platform = targetPlatform dflags
- reg_dst = getRegisterReg platform (CmmLocal dst)
- if target32Bit platform && width == W64
- then do
- let format = II32
- ChildCode64 code vr_lo <- iselExpr64 src
- lbl1 <- getBlockIdNat
- lbl2 <- getBlockIdNat
- lbl3 <- getBlockIdNat
- x' <- getNewRegNat format
- x'' <- getNewRegNat format
- r' <- getNewRegNat format
- cnttzlo <- cnttz format reg_dst vr_lo
- let vr_hi = getHiVRegFromLo vr_lo
- cnttz64 = toOL [ CMPL format vr_lo (RIImm (ImmInt 0))
- , BCC NE lbl2 Nothing
- , BCC ALWAYS lbl1 Nothing
-
- , NEWBLOCK lbl1
- , ADD x' vr_hi (RIImm (ImmInt (-1)))
- , ANDC x'' x' vr_hi
- , CNTLZ format r' x''
- -- 32 + (32 - clz(x''))
- , SUBFC reg_dst r' (RIImm (ImmInt 64))
- , BCC ALWAYS lbl3 Nothing
-
- , NEWBLOCK lbl2
- ]
- `appOL` cnttzlo `appOL`
- toOL [ BCC ALWAYS lbl3 Nothing
-
- , NEWBLOCK lbl3
- ]
- return $ code `appOL` cnttz64
- else do
- let format = if width == W64 then II64 else II32
- (s_reg, s_code) <- getSomeReg src
- (reg_ctz, pre_code) <-
- case width of
- W64 -> return (s_reg, nilOL)
- W32 -> return (s_reg, nilOL)
- W16 -> do
- reg_tmp <- getNewRegNat format
- return (reg_tmp, unitOL $ ORIS reg_tmp s_reg (ImmInt 1))
- W8 -> do
- reg_tmp <- getNewRegNat format
- return (reg_tmp, unitOL $ OR reg_tmp s_reg (RIImm (ImmInt 256)))
- _ -> panic "genCall: Ctz wrong format"
- ctz_code <- cnttz format reg_dst reg_ctz
- return $ s_code `appOL` pre_code `appOL` ctz_code
- where
- -- cnttz(x) = sizeof(x) - cntlz(~x & (x - 1))
- -- see Henry S. Warren, Hacker's Delight, p 107
- cnttz format dst src = do
- let format_bits = 8 * formatInBytes format
- x' <- getNewRegNat format
- x'' <- getNewRegNat format
- r' <- getNewRegNat format
- return $ toOL [ ADD x' src (RIImm (ImmInt (-1)))
- , ANDC x'' x' src
- , CNTLZ format r' x''
- , SUBFC dst r' (RIImm (ImmInt (format_bits)))
- ]
-
-genCCall target dest_regs argsAndHints
- = do dflags <- getDynFlags
- let platform = targetPlatform dflags
- case target of
- PrimTarget (MO_S_QuotRem width) -> divOp1 platform True width
- dest_regs argsAndHints
- PrimTarget (MO_U_QuotRem width) -> divOp1 platform False width
- dest_regs argsAndHints
- PrimTarget (MO_U_QuotRem2 width) -> divOp2 platform width dest_regs
- argsAndHints
- PrimTarget (MO_U_Mul2 width) -> multOp2 platform width dest_regs
- argsAndHints
- PrimTarget (MO_Add2 _) -> add2Op platform dest_regs argsAndHints
- PrimTarget (MO_AddWordC _) -> addcOp platform dest_regs argsAndHints
- PrimTarget (MO_SubWordC _) -> subcOp platform dest_regs argsAndHints
- PrimTarget (MO_AddIntC width) -> addSubCOp ADDO platform width
- dest_regs argsAndHints
- PrimTarget (MO_SubIntC width) -> addSubCOp SUBFO platform width
- dest_regs argsAndHints
- PrimTarget MO_F64_Fabs -> fabs platform dest_regs argsAndHints
- PrimTarget MO_F32_Fabs -> fabs platform dest_regs argsAndHints
- _ -> genCCall' dflags (platformToGCP platform)
- target dest_regs argsAndHints
- where divOp1 platform signed width [res_q, res_r] [arg_x, arg_y]
- = do let reg_q = getRegisterReg platform (CmmLocal res_q)
- reg_r = getRegisterReg platform (CmmLocal res_r)
- remainderCode width signed reg_q arg_x arg_y
- <*> pure reg_r
-
- divOp1 _ _ _ _ _
- = panic "genCCall: Wrong number of arguments for divOp1"
- divOp2 platform width [res_q, res_r]
- [arg_x_high, arg_x_low, arg_y]
- = do let reg_q = getRegisterReg platform (CmmLocal res_q)
- reg_r = getRegisterReg platform (CmmLocal res_r)
- fmt = intFormat width
- half = 4 * (formatInBytes fmt)
- (xh_reg, xh_code) <- getSomeReg arg_x_high
- (xl_reg, xl_code) <- getSomeReg arg_x_low
- (y_reg, y_code) <- getSomeReg arg_y
- s <- getNewRegNat fmt
- b <- getNewRegNat fmt
- v <- getNewRegNat fmt
- vn1 <- getNewRegNat fmt
- vn0 <- getNewRegNat fmt
- un32 <- getNewRegNat fmt
- tmp <- getNewRegNat fmt
- un10 <- getNewRegNat fmt
- un1 <- getNewRegNat fmt
- un0 <- getNewRegNat fmt
- q1 <- getNewRegNat fmt
- rhat <- getNewRegNat fmt
- tmp1 <- getNewRegNat fmt
- q0 <- getNewRegNat fmt
- un21 <- getNewRegNat fmt
- again1 <- getBlockIdNat
- no1 <- getBlockIdNat
- then1 <- getBlockIdNat
- endif1 <- getBlockIdNat
- again2 <- getBlockIdNat
- no2 <- getBlockIdNat
- then2 <- getBlockIdNat
- endif2 <- getBlockIdNat
- return $ y_code `appOL` xl_code `appOL` xh_code `appOL`
- -- see Hacker's Delight p 196 Figure 9-3
- toOL [ -- b = 2 ^ (bits_in_word / 2)
- LI b (ImmInt 1)
- , SL fmt b b (RIImm (ImmInt half))
- -- s = clz(y)
- , CNTLZ fmt s y_reg
- -- v = y << s
- , SL fmt v y_reg (RIReg s)
- -- vn1 = upper half of v
- , SR fmt vn1 v (RIImm (ImmInt half))
- -- vn0 = lower half of v
- , CLRLI fmt vn0 v half
- -- un32 = (u1 << s)
- -- | (u0 >> (bits_in_word - s))
- , SL fmt un32 xh_reg (RIReg s)
- , SUBFC tmp s
- (RIImm (ImmInt (8 * formatInBytes fmt)))
- , SR fmt tmp xl_reg (RIReg tmp)
- , OR un32 un32 (RIReg tmp)
- -- un10 = u0 << s
- , SL fmt un10 xl_reg (RIReg s)
- -- un1 = upper half of un10
- , SR fmt un1 un10 (RIImm (ImmInt half))
- -- un0 = lower half of un10
- , CLRLI fmt un0 un10 half
- -- q1 = un32/vn1
- , DIV fmt False q1 un32 vn1
- -- rhat = un32 - q1*vn1
- , MULL fmt tmp q1 (RIReg vn1)
- , SUBF rhat tmp un32
- , BCC ALWAYS again1 Nothing
-
- , NEWBLOCK again1
- -- if (q1 >= b || q1*vn0 > b*rhat + un1)
- , CMPL fmt q1 (RIReg b)
- , BCC GEU then1 Nothing
- , BCC ALWAYS no1 Nothing
-
- , NEWBLOCK no1
- , MULL fmt tmp q1 (RIReg vn0)
- , SL fmt tmp1 rhat (RIImm (ImmInt half))
- , ADD tmp1 tmp1 (RIReg un1)
- , CMPL fmt tmp (RIReg tmp1)
- , BCC LEU endif1 Nothing
- , BCC ALWAYS then1 Nothing
-
- , NEWBLOCK then1
- -- q1 = q1 - 1
- , ADD q1 q1 (RIImm (ImmInt (-1)))
- -- rhat = rhat + vn1
- , ADD rhat rhat (RIReg vn1)
- -- if (rhat < b) goto again1
- , CMPL fmt rhat (RIReg b)
- , BCC LTT again1 Nothing
- , BCC ALWAYS endif1 Nothing
-
- , NEWBLOCK endif1
- -- un21 = un32*b + un1 - q1*v
- , SL fmt un21 un32 (RIImm (ImmInt half))
- , ADD un21 un21 (RIReg un1)
- , MULL fmt tmp q1 (RIReg v)
- , SUBF un21 tmp un21
- -- compute second quotient digit
- -- q0 = un21/vn1
- , DIV fmt False q0 un21 vn1
- -- rhat = un21- q0*vn1
- , MULL fmt tmp q0 (RIReg vn1)
- , SUBF rhat tmp un21
- , BCC ALWAYS again2 Nothing
-
- , NEWBLOCK again2
- -- if (q0>b || q0*vn0 > b*rhat + un0)
- , CMPL fmt q0 (RIReg b)
- , BCC GEU then2 Nothing
- , BCC ALWAYS no2 Nothing
-
- , NEWBLOCK no2
- , MULL fmt tmp q0 (RIReg vn0)
- , SL fmt tmp1 rhat (RIImm (ImmInt half))
- , ADD tmp1 tmp1 (RIReg un0)
- , CMPL fmt tmp (RIReg tmp1)
- , BCC LEU endif2 Nothing
- , BCC ALWAYS then2 Nothing
-
- , NEWBLOCK then2
- -- q0 = q0 - 1
- , ADD q0 q0 (RIImm (ImmInt (-1)))
- -- rhat = rhat + vn1
- , ADD rhat rhat (RIReg vn1)
- -- if (rhat<b) goto again2
- , CMPL fmt rhat (RIReg b)
- , BCC LTT again2 Nothing
- , BCC ALWAYS endif2 Nothing
-
- , NEWBLOCK endif2
- -- compute remainder
- -- r = (un21*b + un0 - q0*v) >> s
- , SL fmt reg_r un21 (RIImm (ImmInt half))
- , ADD reg_r reg_r (RIReg un0)
- , MULL fmt tmp q0 (RIReg v)
- , SUBF reg_r tmp reg_r
- , SR fmt reg_r reg_r (RIReg s)
- -- compute quotient
- -- q = q1*b + q0
- , SL fmt reg_q q1 (RIImm (ImmInt half))
- , ADD reg_q reg_q (RIReg q0)
- ]
- divOp2 _ _ _ _
- = panic "genCCall: Wrong number of arguments for divOp2"
- multOp2 platform width [res_h, res_l] [arg_x, arg_y]
- = do let reg_h = getRegisterReg platform (CmmLocal res_h)
- reg_l = getRegisterReg platform (CmmLocal res_l)
- fmt = intFormat width
- (x_reg, x_code) <- getSomeReg arg_x
- (y_reg, y_code) <- getSomeReg arg_y
- return $ y_code `appOL` x_code
- `appOL` toOL [ MULL fmt reg_l x_reg (RIReg y_reg)
- , MULHU fmt reg_h x_reg y_reg
- ]
- multOp2 _ _ _ _
- = panic "genCall: Wrong number of arguments for multOp2"
- add2Op platform [res_h, res_l] [arg_x, arg_y]
- = do let reg_h = getRegisterReg platform (CmmLocal res_h)
- reg_l = getRegisterReg platform (CmmLocal res_l)
- (x_reg, x_code) <- getSomeReg arg_x
- (y_reg, y_code) <- getSomeReg arg_y
- return $ y_code `appOL` x_code
- `appOL` toOL [ LI reg_h (ImmInt 0)
- , ADDC reg_l x_reg y_reg
- , ADDZE reg_h reg_h
- ]
- add2Op _ _ _
- = panic "genCCall: Wrong number of arguments/results for add2"
-
- addcOp platform [res_r, res_c] [arg_x, arg_y]
- = add2Op platform [res_c {-hi-}, res_r {-lo-}] [arg_x, arg_y]
- addcOp _ _ _
- = panic "genCCall: Wrong number of arguments/results for addc"
-
- -- PowerPC subfc sets the carry for rT = ~(rA) + rB + 1,
- -- which is 0 for borrow and 1 otherwise. We need 1 and 0
- -- so xor with 1.
- subcOp platform [res_r, res_c] [arg_x, arg_y]
- = do let reg_r = getRegisterReg platform (CmmLocal res_r)
- reg_c = getRegisterReg platform (CmmLocal res_c)
- (x_reg, x_code) <- getSomeReg arg_x
- (y_reg, y_code) <- getSomeReg arg_y
- return $ y_code `appOL` x_code
- `appOL` toOL [ LI reg_c (ImmInt 0)
- , SUBFC reg_r y_reg (RIReg x_reg)
- , ADDZE reg_c reg_c
- , XOR reg_c reg_c (RIImm (ImmInt 1))
- ]
- subcOp _ _ _
- = panic "genCCall: Wrong number of arguments/results for subc"
- addSubCOp instr platform width [res_r, res_c] [arg_x, arg_y]
- = do let reg_r = getRegisterReg platform (CmmLocal res_r)
- reg_c = getRegisterReg platform (CmmLocal res_c)
- (x_reg, x_code) <- getSomeReg arg_x
- (y_reg, y_code) <- getSomeReg arg_y
- return $ y_code `appOL` x_code
- `appOL` toOL [ instr reg_r y_reg x_reg,
- -- SUBFO argument order reversed!
- MFOV (intFormat width) reg_c
- ]
- addSubCOp _ _ _ _ _
- = panic "genCall: Wrong number of arguments/results for addC"
- fabs platform [res] [arg]
- = do let res_r = getRegisterReg platform (CmmLocal res)
- (arg_reg, arg_code) <- getSomeReg arg
- return $ arg_code `snocOL` FABS res_r arg_reg
- fabs _ _ _
- = panic "genCall: Wrong number of arguments/results for fabs"
-
--- TODO: replace 'Int' by an enum such as 'PPC_64ABI'
-data GenCCallPlatform = GCP32ELF | GCP64ELF !Int | GCPAIX
-
-platformToGCP :: Platform -> GenCCallPlatform
-platformToGCP platform
- = case platformOS platform of
- OSAIX -> GCPAIX
- _ -> case platformArch platform of
- ArchPPC -> GCP32ELF
- ArchPPC_64 ELF_V1 -> GCP64ELF 1
- ArchPPC_64 ELF_V2 -> GCP64ELF 2
- _ -> panic "platformToGCP: Not PowerPC"
-
-
-genCCall'
- :: DynFlags
- -> GenCCallPlatform
- -> ForeignTarget -- function to call
- -> [CmmFormal] -- where to put the result
- -> [CmmActual] -- arguments (of mixed type)
- -> NatM InstrBlock
-
-{-
- PowerPC Linux uses the System V Release 4 Calling Convention
- for PowerPC. It is described in the
- "System V Application Binary Interface PowerPC Processor Supplement".
-
- PowerPC 64 Linux uses the System V Release 4 Calling Convention for
- 64-bit PowerPC. It is specified in
- "64-bit PowerPC ELF Application Binary Interface Supplement 1.9"
- (PPC64 ELF v1.9).
-
- PowerPC 64 Linux in little endian mode uses the "Power Architecture 64-Bit
- ELF V2 ABI Specification -- OpenPOWER ABI for Linux Supplement"
- (PPC64 ELF v2).
-
- AIX follows the "PowerOpen ABI: Application Binary Interface Big-Endian
- 32-Bit Hardware Implementation"
-
- All four conventions are similar:
- Parameters may be passed in general-purpose registers starting at r3, in
- floating point registers starting at f1, or on the stack.
-
- But there are substantial differences:
- * The number of registers used for parameter passing and the exact set of
- nonvolatile registers differs (see MachRegs.hs).
- * On AIX and 64-bit ELF, stack space is always reserved for parameters,
- even if they are passed in registers. The called routine may choose to
- save parameters from registers to the corresponding space on the stack.
- * On AIX and 64-bit ELF, a corresponding amount of GPRs is skipped when
- a floating point parameter is passed in an FPR.
- * SysV insists on either passing I64 arguments on the stack, or in two GPRs,
- starting with an odd-numbered GPR. It may skip a GPR to achieve this.
- AIX just treats an I64 likt two separate I32s (high word first).
- * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only
- 4-byte aligned like everything else on AIX.
- * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on
- PowerPC Linux does not agree, so neither do we.
-
- According to all conventions, the parameter area should be part of the
- caller's stack frame, allocated in the caller's prologue code (large enough
- to hold the parameter lists for all called routines). The NCG already
- uses the stack for register spilling, leaving 64 bytes free at the top.
- If we need a larger parameter area than that, we increase the size
- of the stack frame just before ccalling.
--}
-
-
-genCCall' dflags gcp target dest_regs args
- = do
- (finalStack,passArgumentsCode,usedRegs) <- passArguments
- (zip3 args argReps argHints)
- allArgRegs
- (allFPArgRegs platform)
- initialStackOffset
- nilOL []
-
- (labelOrExpr, reduceToFF32) <- case target of
- ForeignTarget (CmmLit (CmmLabel lbl)) _ -> do
- uses_pic_base_implicitly
- return (Left lbl, False)
- ForeignTarget expr _ -> do
- uses_pic_base_implicitly
- return (Right expr, False)
- PrimTarget mop -> outOfLineMachOp mop
-
- let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode
- codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32
-
- case labelOrExpr of
- Left lbl -> do -- the linker does all the work for us
- return ( codeBefore
- `snocOL` BL lbl usedRegs
- `appOL` maybeNOP -- some ABI require a NOP after BL
- `appOL` codeAfter)
- Right dyn -> do -- implement call through function pointer
- (dynReg, dynCode) <- getSomeReg dyn
- case gcp of
- GCP64ELF 1 -> return ( dynCode
- `appOL` codeBefore
- `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 40))
- `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 0))
- `snocOL` LD II64 toc (AddrRegImm dynReg (ImmInt 8))
- `snocOL` MTCTR r11
- `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 16))
- `snocOL` BCTRL usedRegs
- `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 40))
- `appOL` codeAfter)
- GCP64ELF 2 -> return ( dynCode
- `appOL` codeBefore
- `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 24))
- `snocOL` MR r12 dynReg
- `snocOL` MTCTR r12
- `snocOL` BCTRL usedRegs
- `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 24))
- `appOL` codeAfter)
- GCPAIX -> return ( dynCode
- -- AIX/XCOFF follows the PowerOPEN ABI
- -- which is quite similar to LinuxPPC64/ELFv1
- `appOL` codeBefore
- `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 20))
- `snocOL` LD II32 r11 (AddrRegImm dynReg (ImmInt 0))
- `snocOL` LD II32 toc (AddrRegImm dynReg (ImmInt 4))
- `snocOL` MTCTR r11
- `snocOL` LD II32 r11 (AddrRegImm dynReg (ImmInt 8))
- `snocOL` BCTRL usedRegs
- `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 20))
- `appOL` codeAfter)
- _ -> return ( dynCode
- `snocOL` MTCTR dynReg
- `appOL` codeBefore
- `snocOL` BCTRL usedRegs
- `appOL` codeAfter)
- where
- platform = targetPlatform dflags
-
- uses_pic_base_implicitly = do
- -- See Note [implicit register in PPC PIC code]
- -- on why we claim to use PIC register here
- when (positionIndependent dflags && target32Bit platform) $ do
- _ <- getPicBaseNat $ archWordFormat True
- return ()
-
- initialStackOffset = case gcp of
- GCPAIX -> 24
- GCP32ELF -> 8
- GCP64ELF 1 -> 48
- GCP64ELF 2 -> 32
- _ -> panic "genCall': unknown calling convention"
- -- size of linkage area + size of arguments, in bytes
- stackDelta finalStack = case gcp of
- GCPAIX ->
- roundTo 16 $ (24 +) $ max 32 $ sum $
- map (widthInBytes . typeWidth) argReps
- GCP32ELF -> roundTo 16 finalStack
- GCP64ELF 1 ->
- roundTo 16 $ (48 +) $ max 64 $ sum $
- map (roundTo 8 . widthInBytes . typeWidth)
- argReps
- GCP64ELF 2 ->
- roundTo 16 $ (32 +) $ max 64 $ sum $
- map (roundTo 8 . widthInBytes . typeWidth)
- argReps
- _ -> panic "genCall': unknown calling conv."
-
- argReps = map (cmmExprType dflags) args
- (argHints, _) = foreignTargetHints target
-
- roundTo a x | x `mod` a == 0 = x
- | otherwise = x + a - (x `mod` a)
-
- spFormat = if target32Bit platform then II32 else II64
-
- -- TODO: Do not create a new stack frame if delta is too large.
- move_sp_down finalStack
- | delta > stackFrameHeaderSize dflags =
- toOL [STU spFormat sp (AddrRegImm sp (ImmInt (-delta))),
- DELTA (-delta)]
- | otherwise = nilOL
- where delta = stackDelta finalStack
- move_sp_up finalStack
- | delta > stackFrameHeaderSize dflags =
- toOL [ADD sp sp (RIImm (ImmInt delta)),
- DELTA 0]
- | otherwise = nilOL
- where delta = stackDelta finalStack
-
- -- A NOP instruction is required after a call (bl instruction)
- -- on AIX and 64-Bit Linux.
- -- If the call is to a function with a different TOC (r2) the
- -- link editor replaces the NOP instruction with a load of the TOC
- -- from the stack to restore the TOC.
- maybeNOP = case gcp of
- GCP32ELF -> nilOL
- -- See Section 3.9.4 of OpenPower ABI
- GCPAIX -> unitOL NOP
- -- See Section 3.5.11 of PPC64 ELF v1.9
- GCP64ELF 1 -> unitOL NOP
- -- See Section 2.3.6 of PPC64 ELF v2
- GCP64ELF 2 -> unitOL NOP
- _ -> panic "maybeNOP: Unknown PowerPC 64-bit ABI"
-
- passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed)
- passArguments ((arg,arg_ty,_):args) gprs fprs stackOffset
- accumCode accumUsed | isWord64 arg_ty
- && target32Bit (targetPlatform dflags) =
- do
- ChildCode64 code vr_lo <- iselExpr64 arg
- let vr_hi = getHiVRegFromLo vr_lo
-
- case gcp of
- GCPAIX ->
- do let storeWord vr (gpr:_) _ = MR gpr vr
- storeWord vr [] offset
- = ST II32 vr (AddrRegImm sp (ImmInt offset))
- passArguments args
- (drop 2 gprs)
- fprs
- (stackOffset+8)
- (accumCode `appOL` code
- `snocOL` storeWord vr_hi gprs stackOffset
- `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4))
- ((take 2 gprs) ++ accumUsed)
- GCP32ELF ->
- do let stackOffset' = roundTo 8 stackOffset
- stackCode = accumCode `appOL` code
- `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset'))
- `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4)))
- regCode hireg loreg =
- accumCode `appOL` code
- `snocOL` MR hireg vr_hi
- `snocOL` MR loreg vr_lo
-
- case gprs of
- hireg : loreg : regs | even (length gprs) ->
- passArguments args regs fprs stackOffset
- (regCode hireg loreg) (hireg : loreg : accumUsed)
- _skipped : hireg : loreg : regs ->
- passArguments args regs fprs stackOffset
- (regCode hireg loreg) (hireg : loreg : accumUsed)
- _ -> -- only one or no regs left
- passArguments args [] fprs (stackOffset'+8)
- stackCode accumUsed
- GCP64ELF _ -> panic "passArguments: 32 bit code"
-
- passArguments ((arg,rep,hint):args) gprs fprs stackOffset accumCode accumUsed
- | reg : _ <- regs = do
- register <- getRegister arg_pro
- let code = case register of
- Fixed _ freg fcode -> fcode `snocOL` MR reg freg
- Any _ acode -> acode reg
- stackOffsetRes = case gcp of
- -- The PowerOpen ABI requires that we
- -- reserve stack slots for register
- -- parameters
- GCPAIX -> stackOffset + stackBytes
- -- ... the SysV ABI 32-bit doesn't.
- GCP32ELF -> stackOffset
- -- ... but SysV ABI 64-bit does.
- GCP64ELF _ -> stackOffset + stackBytes
- passArguments args
- (drop nGprs gprs)
- (drop nFprs fprs)
- stackOffsetRes
- (accumCode `appOL` code)
- (reg : accumUsed)
- | otherwise = do
- (vr, code) <- getSomeReg arg_pro
- passArguments args
- (drop nGprs gprs)
- (drop nFprs fprs)
- (stackOffset' + stackBytes)
- (accumCode `appOL` code
- `snocOL` ST format_pro vr stackSlot)
- accumUsed
- where
- arg_pro
- | isBitsType rep = CmmMachOp (conv_op (typeWidth rep) (wordWidth dflags)) [arg]
- | otherwise = arg
- format_pro
- | isBitsType rep = intFormat (wordWidth dflags)
- | otherwise = cmmTypeFormat rep
- conv_op = case hint of
- SignedHint -> MO_SS_Conv
- _ -> MO_UU_Conv
-
- stackOffset' = case gcp of
- GCPAIX ->
- -- The 32bit PowerOPEN ABI is happy with
- -- 32bit-alignment ...
- stackOffset
- GCP32ELF
- -- ... the SysV ABI requires 8-byte
- -- alignment for doubles.
- | isFloatType rep && typeWidth rep == W64 ->
- roundTo 8 stackOffset
- | otherwise ->
- stackOffset
- GCP64ELF _ ->
- -- Everything on the stack is mapped to
- -- 8-byte aligned doublewords
- stackOffset
- stackOffset''
- | isFloatType rep && typeWidth rep == W32 =
- case gcp of
- -- The ELF v1 ABI Section 3.2.3 requires:
- -- "Single precision floating point values
- -- are mapped to the second word in a single
- -- doubleword"
- GCP64ELF 1 -> stackOffset' + 4
- _ -> stackOffset'
- | otherwise = stackOffset'
-
- stackSlot = AddrRegImm sp (ImmInt stackOffset'')
- (nGprs, nFprs, stackBytes, regs)
- = case gcp of
- GCPAIX ->
- case cmmTypeFormat rep of
- II8 -> (1, 0, 4, gprs)
- II16 -> (1, 0, 4, gprs)
- II32 -> (1, 0, 4, gprs)
- -- The PowerOpen ABI requires that we skip a
- -- corresponding number of GPRs when we use
- -- the FPRs.
- --
- -- E.g. for a `double` two GPRs are skipped,
- -- whereas for a `float` one GPR is skipped
- -- when parameters are assigned to
- -- registers.
- --
- -- The PowerOpen ABI specification can be found at
- -- ftp://www.sourceware.org/pub/binutils/ppc-docs/ppc-poweropen/
- FF32 -> (1, 1, 4, fprs)
- FF64 -> (2, 1, 8, fprs)
- II64 -> panic "genCCall' passArguments II64"
-
- GCP32ELF ->
- case cmmTypeFormat rep of
- II8 -> (1, 0, 4, gprs)
- II16 -> (1, 0, 4, gprs)
- II32 -> (1, 0, 4, gprs)
- -- ... the SysV ABI doesn't.
- FF32 -> (0, 1, 4, fprs)
- FF64 -> (0, 1, 8, fprs)
- II64 -> panic "genCCall' passArguments II64"
- GCP64ELF _ ->
- case cmmTypeFormat rep of
- II8 -> (1, 0, 8, gprs)
- II16 -> (1, 0, 8, gprs)
- II32 -> (1, 0, 8, gprs)
- II64 -> (1, 0, 8, gprs)
- -- The ELFv1 ABI requires that we skip a
- -- corresponding number of GPRs when we use
- -- the FPRs.
- FF32 -> (1, 1, 8, fprs)
- FF64 -> (1, 1, 8, fprs)
-
- moveResult reduceToFF32 =
- case dest_regs of
- [] -> nilOL
- [dest]
- | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1)
- | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1)
- | isWord64 rep && target32Bit (targetPlatform dflags)
- -> toOL [MR (getHiVRegFromLo r_dest) r3,
- MR r_dest r4]
- | otherwise -> unitOL (MR r_dest r3)
- where rep = cmmRegType dflags (CmmLocal dest)
- r_dest = getRegisterReg platform (CmmLocal dest)
- _ -> panic "genCCall' moveResult: Bad dest_regs"
-
- outOfLineMachOp mop =
- do
- dflags <- getDynFlags
- mopExpr <- cmmMakeDynamicReference dflags CallReference $
- mkForeignLabel functionName Nothing ForeignLabelInThisPackage IsFunction
- let mopLabelOrExpr = case mopExpr of
- CmmLit (CmmLabel lbl) -> Left lbl
- _ -> Right mopExpr
- return (mopLabelOrExpr, reduce)
- where
- (functionName, reduce) = case mop of
- MO_F32_Exp -> (fsLit "exp", True)
- MO_F32_ExpM1 -> (fsLit "expm1", True)
- MO_F32_Log -> (fsLit "log", True)
- MO_F32_Log1P -> (fsLit "log1p", True)
- MO_F32_Sqrt -> (fsLit "sqrt", True)
- MO_F32_Fabs -> unsupported
-
- MO_F32_Sin -> (fsLit "sin", True)
- MO_F32_Cos -> (fsLit "cos", True)
- MO_F32_Tan -> (fsLit "tan", True)
-
- MO_F32_Asin -> (fsLit "asin", True)
- MO_F32_Acos -> (fsLit "acos", True)
- MO_F32_Atan -> (fsLit "atan", True)
-
- MO_F32_Sinh -> (fsLit "sinh", True)
- MO_F32_Cosh -> (fsLit "cosh", True)
- MO_F32_Tanh -> (fsLit "tanh", True)
- MO_F32_Pwr -> (fsLit "pow", True)
-
- MO_F32_Asinh -> (fsLit "asinh", True)
- MO_F32_Acosh -> (fsLit "acosh", True)
- MO_F32_Atanh -> (fsLit "atanh", True)
-
- MO_F64_Exp -> (fsLit "exp", False)
- MO_F64_ExpM1 -> (fsLit "expm1", False)
- MO_F64_Log -> (fsLit "log", False)
- MO_F64_Log1P -> (fsLit "log1p", False)
- MO_F64_Sqrt -> (fsLit "sqrt", False)
- MO_F64_Fabs -> unsupported
-
- MO_F64_Sin -> (fsLit "sin", False)
- MO_F64_Cos -> (fsLit "cos", False)
- MO_F64_Tan -> (fsLit "tan", False)
-
- MO_F64_Asin -> (fsLit "asin", False)
- MO_F64_Acos -> (fsLit "acos", False)
- MO_F64_Atan -> (fsLit "atan", False)
-
- MO_F64_Sinh -> (fsLit "sinh", False)
- MO_F64_Cosh -> (fsLit "cosh", False)
- MO_F64_Tanh -> (fsLit "tanh", False)
- MO_F64_Pwr -> (fsLit "pow", False)
-
- MO_F64_Asinh -> (fsLit "asinh", False)
- MO_F64_Acosh -> (fsLit "acosh", False)
- MO_F64_Atanh -> (fsLit "atanh", False)
-
- MO_UF_Conv w -> (fsLit $ word2FloatLabel w, False)
-
- MO_Memcpy _ -> (fsLit "memcpy", False)
- MO_Memset _ -> (fsLit "memset", False)
- MO_Memmove _ -> (fsLit "memmove", False)
- MO_Memcmp _ -> (fsLit "memcmp", False)
-
- MO_BSwap w -> (fsLit $ bSwapLabel w, False)
- MO_BRev w -> (fsLit $ bRevLabel w, False)
- MO_PopCnt w -> (fsLit $ popCntLabel w, False)
- MO_Pdep w -> (fsLit $ pdepLabel w, False)
- MO_Pext w -> (fsLit $ pextLabel w, False)
- MO_Clz _ -> unsupported
- MO_Ctz _ -> unsupported
- MO_AtomicRMW {} -> unsupported
- MO_Cmpxchg w -> (fsLit $ cmpxchgLabel w, False)
- MO_AtomicRead _ -> unsupported
- MO_AtomicWrite _ -> unsupported
-
- MO_S_Mul2 {} -> unsupported
- MO_S_QuotRem {} -> unsupported
- MO_U_QuotRem {} -> unsupported
- MO_U_QuotRem2 {} -> unsupported
- MO_Add2 {} -> unsupported
- MO_AddWordC {} -> unsupported
- MO_SubWordC {} -> unsupported
- MO_AddIntC {} -> unsupported
- MO_SubIntC {} -> unsupported
- MO_U_Mul2 {} -> unsupported
- MO_ReadBarrier -> unsupported
- MO_WriteBarrier -> unsupported
- MO_Touch -> unsupported
- MO_Prefetch_Data _ -> unsupported
- unsupported = panic ("outOfLineCmmOp: " ++ show mop
- ++ " not supported")
-
--- -----------------------------------------------------------------------------
--- Generating a table-branch
-
-genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock
-genSwitch dflags expr targets
- | OSAIX <- platformOS (targetPlatform dflags)
- = do
- (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
- let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
- sha = if target32Bit $ targetPlatform dflags then 2 else 3
- tmp <- getNewRegNat fmt
- lbl <- getNewLabelNat
- dynRef <- cmmMakeDynamicReference dflags DataReference lbl
- (tableReg,t_code) <- getSomeReg $ dynRef
- let code = e_code `appOL` t_code `appOL` toOL [
- SL fmt tmp reg (RIImm (ImmInt sha)),
- LD fmt tmp (AddrRegReg tableReg tmp),
- MTCTR tmp,
- BCTR ids (Just lbl) []
- ]
- return code
-
- | (positionIndependent dflags) || (not $ target32Bit $ targetPlatform dflags)
- = do
- (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
- let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
- sha = if target32Bit $ targetPlatform dflags then 2 else 3
- tmp <- getNewRegNat fmt
- lbl <- getNewLabelNat
- dynRef <- cmmMakeDynamicReference dflags DataReference lbl
- (tableReg,t_code) <- getSomeReg $ dynRef
- let code = e_code `appOL` t_code `appOL` toOL [
- SL fmt tmp reg (RIImm (ImmInt sha)),
- LD fmt tmp (AddrRegReg tableReg tmp),
- ADD tmp tmp (RIReg tableReg),
- MTCTR tmp,
- BCTR ids (Just lbl) []
- ]
- return code
- | otherwise
- = do
- (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
- let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
- sha = if target32Bit $ targetPlatform dflags then 2 else 3
- tmp <- getNewRegNat fmt
- lbl <- getNewLabelNat
- let code = e_code `appOL` toOL [
- SL fmt tmp reg (RIImm (ImmInt sha)),
- ADDIS tmp tmp (HA (ImmCLbl lbl)),
- LD fmt tmp (AddrRegImm tmp (LO (ImmCLbl lbl))),
- MTCTR tmp,
- BCTR ids (Just lbl) []
- ]
- return code
- where (offset, ids) = switchTargetsToTable targets
-
-generateJumpTableForInstr :: DynFlags -> Instr
- -> Maybe (NatCmmDecl RawCmmStatics Instr)
-generateJumpTableForInstr dflags (BCTR ids (Just lbl) _) =
- let jumpTable
- | (positionIndependent dflags)
- || (not $ target32Bit $ targetPlatform dflags)
- = map jumpTableEntryRel ids
- | otherwise = map (jumpTableEntry dflags) ids
- where jumpTableEntryRel Nothing
- = CmmStaticLit (CmmInt 0 (wordWidth dflags))
- jumpTableEntryRel (Just blockid)
- = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0
- (wordWidth dflags))
- where blockLabel = blockLbl blockid
- in Just (CmmData (Section ReadOnlyData lbl) (RawCmmStatics lbl jumpTable))
-generateJumpTableForInstr _ _ = Nothing
-
--- -----------------------------------------------------------------------------
--- 'condIntReg' and 'condFltReg': condition codes into registers
-
--- Turn those condition codes into integers now (when they appear on
--- the right hand side of an assignment).
-
-
-
-condReg :: NatM CondCode -> NatM Register
-condReg getCond = do
- CondCode _ cond cond_code <- getCond
- dflags <- getDynFlags
- let
- code dst = cond_code
- `appOL` negate_code
- `appOL` toOL [
- MFCR dst,
- RLWINM dst dst (bit + 1) 31 31
- ]
-
- negate_code | do_negate = unitOL (CRNOR bit bit bit)
- | otherwise = nilOL
-
- (bit, do_negate) = case cond of
- LTT -> (0, False)
- LE -> (1, True)
- EQQ -> (2, False)
- GE -> (0, True)
- GTT -> (1, False)
-
- NE -> (2, True)
-
- LU -> (0, False)
- LEU -> (1, True)
- GEU -> (0, True)
- GU -> (1, False)
- _ -> panic "PPC.CodeGen.codeReg: no match"
-
- format = archWordFormat $ target32Bit $ targetPlatform dflags
- return (Any format code)
-
-condIntReg :: Cond -> Width -> CmmExpr -> CmmExpr -> NatM Register
-condIntReg cond width x y = condReg (condIntCode cond width x y)
-condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
-condFltReg cond x y = condReg (condFltCode cond x y)
-
-
-
--- -----------------------------------------------------------------------------
--- 'trivial*Code': deal with trivial instructions
-
--- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
--- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
--- Only look for constants on the right hand side, because that's
--- where the generic optimizer will have put them.
-
--- Similarly, for unary instructions, we don't have to worry about
--- matching an StInt as the argument, because genericOpt will already
--- have handled the constant-folding.
-
-
-
-{-
-Wolfgang's PowerPC version of The Rules:
-
-A slightly modified version of The Rules to take advantage of the fact
-that PowerPC instructions work on all registers and don't implicitly
-clobber any fixed registers.
-
-* The only expression for which getRegister returns Fixed is (CmmReg reg).
-
-* If getRegister returns Any, then the code it generates may modify only:
- (a) fresh temporaries
- (b) the destination register
- It may *not* modify global registers, unless the global
- register happens to be the destination register.
- It may not clobber any other registers. In fact, only ccalls clobber any
- fixed registers.
- Also, it may not modify the counter register (used by genCCall).
-
- Corollary: If a getRegister for a subexpression returns Fixed, you need
- not move it to a fresh temporary before evaluating the next subexpression.
- The Fixed register won't be modified.
- Therefore, we don't need a counterpart for the x86's getStableReg on PPC.
-
-* SDM's First Rule is valid for PowerPC, too: subexpressions can depend on
- the value of the destination register.
--}
-
-trivialCode
- :: Width
- -> Bool
- -> (Reg -> Reg -> RI -> Instr)
- -> CmmExpr
- -> CmmExpr
- -> NatM Register
-
-trivialCode rep signed instr x (CmmLit (CmmInt y _))
- | Just imm <- makeImmediate rep signed y
- = do
- (src1, code1) <- getSomeReg x
- let code dst = code1 `snocOL` instr dst src1 (RIImm imm)
- return (Any (intFormat rep) code)
-
-trivialCode rep _ instr x y = do
- (src1, code1) <- getSomeReg x
- (src2, code2) <- getSomeReg y
- let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2)
- return (Any (intFormat rep) code)
-
-shiftMulCode
- :: Width
- -> Bool
- -> (Format-> Reg -> Reg -> RI -> Instr)
- -> CmmExpr
- -> CmmExpr
- -> NatM Register
-shiftMulCode width sign instr x (CmmLit (CmmInt y _))
- | Just imm <- makeImmediate width sign y
- = do
- (src1, code1) <- getSomeReg x
- let format = intFormat width
- let ins_fmt = intFormat (max W32 width)
- let code dst = code1 `snocOL` instr ins_fmt dst src1 (RIImm imm)
- return (Any format code)
-
-shiftMulCode width _ instr x y = do
- (src1, code1) <- getSomeReg x
- (src2, code2) <- getSomeReg y
- let format = intFormat width
- let ins_fmt = intFormat (max W32 width)
- let code dst = code1 `appOL` code2
- `snocOL` instr ins_fmt dst src1 (RIReg src2)
- return (Any format code)
-
-trivialCodeNoImm' :: Format -> (Reg -> Reg -> Reg -> Instr)
- -> CmmExpr -> CmmExpr -> NatM Register
-trivialCodeNoImm' format instr x y = do
- (src1, code1) <- getSomeReg x
- (src2, code2) <- getSomeReg y
- let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2
- return (Any format code)
-
-trivialCodeNoImm :: Format -> (Format -> Reg -> Reg -> Reg -> Instr)
- -> CmmExpr -> CmmExpr -> NatM Register
-trivialCodeNoImm format instr x y
- = trivialCodeNoImm' format (instr format) x y
-
-srCode :: Width -> Bool -> (Format-> Reg -> Reg -> RI -> Instr)
- -> CmmExpr -> CmmExpr -> NatM Register
-srCode width sgn instr x (CmmLit (CmmInt y _))
- | Just imm <- makeImmediate width sgn y
- = do
- let op_len = max W32 width
- extend = if sgn then extendSExpr else extendUExpr
- (src1, code1) <- getSomeReg (extend width op_len x)
- let code dst = code1 `snocOL`
- instr (intFormat op_len) dst src1 (RIImm imm)
- return (Any (intFormat width) code)
-
-srCode width sgn instr x y = do
- let op_len = max W32 width
- extend = if sgn then extendSExpr else extendUExpr
- (src1, code1) <- getSomeReg (extend width op_len x)
- (src2, code2) <- getSomeReg (extendUExpr width op_len y)
- -- Note: Shift amount `y` is unsigned
- let code dst = code1 `appOL` code2 `snocOL`
- instr (intFormat op_len) dst src1 (RIReg src2)
- return (Any (intFormat width) code)
-
-divCode :: Width -> Bool -> CmmExpr -> CmmExpr -> NatM Register
-divCode width sgn x y = do
- let op_len = max W32 width
- extend = if sgn then extendSExpr else extendUExpr
- (src1, code1) <- getSomeReg (extend width op_len x)
- (src2, code2) <- getSomeReg (extend width op_len y)
- let code dst = code1 `appOL` code2 `snocOL`
- DIV (intFormat op_len) sgn dst src1 src2
- return (Any (intFormat width) code)
-
-
-trivialUCode :: Format
- -> (Reg -> Reg -> Instr)
- -> CmmExpr
- -> NatM Register
-trivialUCode rep instr x = do
- (src, code) <- getSomeReg x
- let code' dst = code `snocOL` instr dst src
- return (Any rep code')
-
--- There is no "remainder" instruction on the PPC, so we have to do
--- it the hard way.
--- The "sgn" parameter is the signedness for the division instruction
-
-remainderCode :: Width -> Bool -> Reg -> CmmExpr -> CmmExpr
- -> NatM (Reg -> InstrBlock)
-remainderCode rep sgn reg_q arg_x arg_y = do
- let op_len = max W32 rep
- fmt = intFormat op_len
- extend = if sgn then extendSExpr else extendUExpr
- (x_reg, x_code) <- getSomeReg (extend rep op_len arg_x)
- (y_reg, y_code) <- getSomeReg (extend rep op_len arg_y)
- return $ \reg_r -> y_code `appOL` x_code
- `appOL` toOL [ DIV fmt sgn reg_q x_reg y_reg
- , MULL fmt reg_r reg_q (RIReg y_reg)
- , SUBF reg_r reg_r x_reg
- ]
-
-
-coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
-coerceInt2FP fromRep toRep x = do
- dflags <- getDynFlags
- let arch = platformArch $ targetPlatform dflags
- coerceInt2FP' arch fromRep toRep x
-
-coerceInt2FP' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
-coerceInt2FP' ArchPPC fromRep toRep x = do
- (src, code) <- getSomeReg x
- lbl <- getNewLabelNat
- itmp <- getNewRegNat II32
- ftmp <- getNewRegNat FF64
- dflags <- getDynFlags
- dynRef <- cmmMakeDynamicReference dflags DataReference lbl
- Amode addr addr_code <- getAmode D dynRef
- let
- code' dst = code `appOL` maybe_exts `appOL` toOL [
- LDATA (Section ReadOnlyData lbl) $ RawCmmStatics lbl
- [CmmStaticLit (CmmInt 0x43300000 W32),
- CmmStaticLit (CmmInt 0x80000000 W32)],
- XORIS itmp src (ImmInt 0x8000),
- ST II32 itmp (spRel dflags 3),
- LIS itmp (ImmInt 0x4330),
- ST II32 itmp (spRel dflags 2),
- LD FF64 ftmp (spRel dflags 2)
- ] `appOL` addr_code `appOL` toOL [
- LD FF64 dst addr,
- FSUB FF64 dst ftmp dst
- ] `appOL` maybe_frsp dst
-
- maybe_exts = case fromRep of
- W8 -> unitOL $ EXTS II8 src src
- W16 -> unitOL $ EXTS II16 src src
- W32 -> nilOL
- _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
-
- maybe_frsp dst
- = case toRep of
- W32 -> unitOL $ FRSP dst dst
- W64 -> nilOL
- _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
-
- return (Any (floatFormat toRep) code')
-
--- On an ELF v1 Linux we use the compiler doubleword in the stack frame
--- this is the TOC pointer doubleword on ELF v2 Linux. The latter is only
--- set right before a call and restored right after return from the call.
--- So it is fine.
-coerceInt2FP' (ArchPPC_64 _) fromRep toRep x = do
- (src, code) <- getSomeReg x
- dflags <- getDynFlags
- let
- code' dst = code `appOL` maybe_exts `appOL` toOL [
- ST II64 src (spRel dflags 3),
- LD FF64 dst (spRel dflags 3),
- FCFID dst dst
- ] `appOL` maybe_frsp dst
-
- maybe_exts = case fromRep of
- W8 -> unitOL $ EXTS II8 src src
- W16 -> unitOL $ EXTS II16 src src
- W32 -> unitOL $ EXTS II32 src src
- W64 -> nilOL
- _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
-
- maybe_frsp dst
- = case toRep of
- W32 -> unitOL $ FRSP dst dst
- W64 -> nilOL
- _ -> panic "PPC.CodeGen.coerceInt2FP: no match"
-
- return (Any (floatFormat toRep) code')
-
-coerceInt2FP' _ _ _ _ = panic "PPC.CodeGen.coerceInt2FP: unknown arch"
-
-
-coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
-coerceFP2Int fromRep toRep x = do
- dflags <- getDynFlags
- let arch = platformArch $ targetPlatform dflags
- coerceFP2Int' arch fromRep toRep x
-
-coerceFP2Int' :: Arch -> Width -> Width -> CmmExpr -> NatM Register
-coerceFP2Int' ArchPPC _ toRep x = do
- dflags <- getDynFlags
- -- the reps don't really matter: F*->FF64 and II32->I* are no-ops
- (src, code) <- getSomeReg x
- tmp <- getNewRegNat FF64
- let
- code' dst = code `appOL` toOL [
- -- convert to int in FP reg
- FCTIWZ tmp src,
- -- store value (64bit) from FP to stack
- ST FF64 tmp (spRel dflags 2),
- -- read low word of value (high word is undefined)
- LD II32 dst (spRel dflags 3)]
- return (Any (intFormat toRep) code')
-
-coerceFP2Int' (ArchPPC_64 _) _ toRep x = do
- dflags <- getDynFlags
- -- the reps don't really matter: F*->FF64 and II64->I* are no-ops
- (src, code) <- getSomeReg x
- tmp <- getNewRegNat FF64
- let
- code' dst = code `appOL` toOL [
- -- convert to int in FP reg
- FCTIDZ tmp src,
- -- store value (64bit) from FP to compiler word on stack
- ST FF64 tmp (spRel dflags 3),
- LD II64 dst (spRel dflags 3)]
- return (Any (intFormat toRep) code')
-
-coerceFP2Int' _ _ _ _ = panic "PPC.CodeGen.coerceFP2Int: unknown arch"
-
--- Note [.LCTOC1 in PPC PIC code]
--- The .LCTOC1 label is defined to point 32768 bytes into the GOT table
--- to make the most of the PPC's 16-bit displacements.
--- As 16-bit signed offset is used (usually via addi/lwz instructions)
--- first element will have '-32768' offset against .LCTOC1.
-
--- Note [implicit register in PPC PIC code]
--- PPC generates calls by labels in assembly
--- in form of:
--- bl puts+32768@plt
--- in this form it's not seen directly (by GHC NCG)
--- that r30 (PicBaseReg) is used,
--- but r30 is a required part of PLT code setup:
--- puts+32768@plt:
--- lwz r11,-30484(r30) ; offset in .LCTOC1
--- mtctr r11
--- bctr
diff --git a/compiler/nativeGen/PPC/Cond.hs b/compiler/nativeGen/PPC/Cond.hs
deleted file mode 100644
index bd8bdee81a..0000000000
--- a/compiler/nativeGen/PPC/Cond.hs
+++ /dev/null
@@ -1,63 +0,0 @@
-module PPC.Cond (
- Cond(..),
- condNegate,
- condUnsigned,
- condToSigned,
- condToUnsigned,
-)
-
-where
-
-import GhcPrelude
-
-import Panic
-
-data Cond
- = ALWAYS
- | EQQ
- | GE
- | GEU
- | GTT
- | GU
- | LE
- | LEU
- | LTT
- | LU
- | NE
- deriving Eq
-
-
-condNegate :: Cond -> Cond
-condNegate ALWAYS = panic "condNegate: ALWAYS"
-condNegate EQQ = NE
-condNegate GE = LTT
-condNegate GEU = LU
-condNegate GTT = LE
-condNegate GU = LEU
-condNegate LE = GTT
-condNegate LEU = GU
-condNegate LTT = GE
-condNegate LU = GEU
-condNegate NE = EQQ
-
--- Condition utils
-condUnsigned :: Cond -> Bool
-condUnsigned GU = True
-condUnsigned LU = True
-condUnsigned GEU = True
-condUnsigned LEU = True
-condUnsigned _ = False
-
-condToSigned :: Cond -> Cond
-condToSigned GU = GTT
-condToSigned LU = LTT
-condToSigned GEU = GE
-condToSigned LEU = LE
-condToSigned x = x
-
-condToUnsigned :: Cond -> Cond
-condToUnsigned GTT = GU
-condToUnsigned LTT = LU
-condToUnsigned GE = GEU
-condToUnsigned LE = LEU
-condToUnsigned x = x
diff --git a/compiler/nativeGen/PPC/Instr.hs b/compiler/nativeGen/PPC/Instr.hs
deleted file mode 100644
index ad2039d463..0000000000
--- a/compiler/nativeGen/PPC/Instr.hs
+++ /dev/null
@@ -1,713 +0,0 @@
-{-# LANGUAGE CPP #-}
-
-{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
-
------------------------------------------------------------------------------
---
--- Machine-dependent assembly language
---
--- (c) The University of Glasgow 1993-2004
---
------------------------------------------------------------------------------
-
-#include "HsVersions.h"
-
-module PPC.Instr (
- archWordFormat,
- RI(..),
- Instr(..),
- stackFrameHeaderSize,
- maxSpillSlots,
- allocMoreStack,
- makeFarBranches
-)
-
-where
-
-import GhcPrelude
-
-import PPC.Regs
-import PPC.Cond
-import Instruction
-import Format
-import TargetReg
-import RegClass
-import Reg
-
-import GHC.Platform.Regs
-import GHC.Cmm.BlockId
-import GHC.Cmm.Dataflow.Collections
-import GHC.Cmm.Dataflow.Label
-import GHC.Driver.Session
-import GHC.Cmm
-import GHC.Cmm.Info
-import FastString
-import GHC.Cmm.CLabel
-import Outputable
-import GHC.Platform
-import UniqFM (listToUFM, lookupUFM)
-import UniqSupply
-
-import Control.Monad (replicateM)
-import Data.Maybe (fromMaybe)
-
---------------------------------------------------------------------------------
--- Format of a PPC memory address.
---
-archWordFormat :: Bool -> Format
-archWordFormat is32Bit
- | is32Bit = II32
- | otherwise = II64
-
-
--- | Instruction instance for powerpc
-instance Instruction Instr where
- regUsageOfInstr = ppc_regUsageOfInstr
- patchRegsOfInstr = ppc_patchRegsOfInstr
- isJumpishInstr = ppc_isJumpishInstr
- jumpDestsOfInstr = ppc_jumpDestsOfInstr
- patchJumpInstr = ppc_patchJumpInstr
- mkSpillInstr = ppc_mkSpillInstr
- mkLoadInstr = ppc_mkLoadInstr
- takeDeltaInstr = ppc_takeDeltaInstr
- isMetaInstr = ppc_isMetaInstr
- mkRegRegMoveInstr _ = ppc_mkRegRegMoveInstr
- takeRegRegMoveInstr = ppc_takeRegRegMoveInstr
- mkJumpInstr = ppc_mkJumpInstr
- mkStackAllocInstr = ppc_mkStackAllocInstr
- mkStackDeallocInstr = ppc_mkStackDeallocInstr
-
-
-ppc_mkStackAllocInstr :: Platform -> Int -> [Instr]
-ppc_mkStackAllocInstr platform amount
- = ppc_mkStackAllocInstr' platform (-amount)
-
-ppc_mkStackDeallocInstr :: Platform -> Int -> [Instr]
-ppc_mkStackDeallocInstr platform amount
- = ppc_mkStackAllocInstr' platform amount
-
-ppc_mkStackAllocInstr' :: Platform -> Int -> [Instr]
-ppc_mkStackAllocInstr' platform amount
- | fits16Bits amount
- = [ LD fmt r0 (AddrRegImm sp zero)
- , STU fmt r0 (AddrRegImm sp immAmount)
- ]
- | otherwise
- = [ LD fmt r0 (AddrRegImm sp zero)
- , ADDIS tmp sp (HA immAmount)
- , ADD tmp tmp (RIImm (LO immAmount))
- , STU fmt r0 (AddrRegReg sp tmp)
- ]
- where
- fmt = intFormat $ widthFromBytes (platformWordSizeInBytes platform)
- zero = ImmInt 0
- tmp = tmpReg platform
- immAmount = ImmInt amount
-
---
--- See note [extra spill slots] in X86/Instr.hs
---
-allocMoreStack
- :: Platform
- -> Int
- -> NatCmmDecl statics PPC.Instr.Instr
- -> UniqSM (NatCmmDecl statics PPC.Instr.Instr, [(BlockId,BlockId)])
-
-allocMoreStack _ _ top@(CmmData _ _) = return (top,[])
-allocMoreStack platform slots (CmmProc info lbl live (ListGraph code)) = do
- let
- infos = mapKeys info
- entries = case code of
- [] -> infos
- BasicBlock entry _ : _ -- first block is the entry point
- | entry `elem` infos -> infos
- | otherwise -> entry : infos
-
- uniqs <- replicateM (length entries) getUniqueM
-
- let
- delta = ((x + stackAlign - 1) `quot` stackAlign) * stackAlign -- round up
- where x = slots * spillSlotSize -- sp delta
-
- alloc = mkStackAllocInstr platform delta
- dealloc = mkStackDeallocInstr platform delta
-
- retargetList = (zip entries (map mkBlockId uniqs))
-
- new_blockmap :: LabelMap BlockId
- new_blockmap = mapFromList retargetList
-
- insert_stack_insns (BasicBlock id insns)
- | Just new_blockid <- mapLookup id new_blockmap
- = [ BasicBlock id $ alloc ++ [BCC ALWAYS new_blockid Nothing]
- , BasicBlock new_blockid block'
- ]
- | otherwise
- = [ BasicBlock id block' ]
- where
- block' = foldr insert_dealloc [] insns
-
- insert_dealloc insn r
- -- BCTR might or might not be a non-local jump. For
- -- "labeled-goto" we use JMP, and for "computed-goto" we
- -- use MTCTR followed by BCTR. See 'PPC.CodeGen.genJump'.
- = case insn of
- JMP _ _ -> dealloc ++ (insn : r)
- BCTR [] Nothing _ -> dealloc ++ (insn : r)
- BCTR ids label rs -> BCTR (map (fmap retarget) ids) label rs : r
- BCCFAR cond b p -> BCCFAR cond (retarget b) p : r
- BCC cond b p -> BCC cond (retarget b) p : r
- _ -> insn : r
- -- BL and BCTRL are call-like instructions rather than
- -- jumps, and are used only for C calls.
-
- retarget :: BlockId -> BlockId
- retarget b
- = fromMaybe b (mapLookup b new_blockmap)
-
- new_code
- = concatMap insert_stack_insns code
-
- -- in
- return (CmmProc info lbl live (ListGraph new_code),retargetList)
-
-
--- -----------------------------------------------------------------------------
--- Machine's assembly language
-
--- We have a few common "instructions" (nearly all the pseudo-ops) but
--- mostly all of 'Instr' is machine-specific.
-
--- Register or immediate
-data RI
- = RIReg Reg
- | RIImm Imm
-
-data Instr
- -- comment pseudo-op
- = COMMENT FastString
-
- -- some static data spat out during code
- -- generation. Will be extracted before
- -- pretty-printing.
- | LDATA Section RawCmmStatics
-
- -- start a new basic block. Useful during
- -- codegen, removed later. Preceding
- -- instruction should be a jump, as per the
- -- invariants for a BasicBlock (see Cmm).
- | NEWBLOCK BlockId
-
- -- specify current stack offset for
- -- benefit of subsequent passes
- | DELTA Int
-
- -- Loads and stores.
- | LD Format Reg AddrMode -- Load format, dst, src
- | LDFAR Format Reg AddrMode -- Load format, dst, src 32 bit offset
- | LDR Format Reg AddrMode -- Load and reserve format, dst, src
- | LA Format Reg AddrMode -- Load arithmetic format, dst, src
- | ST Format Reg AddrMode -- Store format, src, dst
- | STFAR Format Reg AddrMode -- Store format, src, dst 32 bit offset
- | STU Format Reg AddrMode -- Store with Update format, src, dst
- | STC Format Reg AddrMode -- Store conditional format, src, dst
- | LIS Reg Imm -- Load Immediate Shifted dst, src
- | LI Reg Imm -- Load Immediate dst, src
- | MR Reg Reg -- Move Register dst, src -- also for fmr
-
- | CMP Format Reg RI -- format, src1, src2
- | CMPL Format Reg RI -- format, src1, src2
-
- | BCC Cond BlockId (Maybe Bool) -- cond, block, hint
- | BCCFAR Cond BlockId (Maybe Bool) -- cond, block, hint
- -- hint:
- -- Just True: branch likely taken
- -- Just False: branch likely not taken
- -- Nothing: no hint
- | JMP CLabel [Reg] -- same as branch,
- -- but with CLabel instead of block ID
- -- and live global registers
- | MTCTR Reg
- | BCTR [Maybe BlockId] (Maybe CLabel) [Reg]
- -- with list of local destinations, and
- -- jump table location if necessary
- | BL CLabel [Reg] -- with list of argument regs
- | BCTRL [Reg]
-
- | ADD Reg Reg RI -- dst, src1, src2
- | ADDO Reg Reg Reg -- add and set overflow
- | ADDC Reg Reg Reg -- (carrying) dst, src1, src2
- | ADDE Reg Reg Reg -- (extended) dst, src1, src2
- | ADDZE Reg Reg -- (to zero extended) dst, src
- | ADDIS Reg Reg Imm -- Add Immediate Shifted dst, src1, src2
- | SUBF Reg Reg Reg -- dst, src1, src2 ; dst = src2 - src1
- | SUBFO Reg Reg Reg -- subtract from and set overflow
- | SUBFC Reg Reg RI -- (carrying) dst, src1, src2 ;
- -- dst = src2 - src1
- | SUBFE Reg Reg Reg -- (extended) dst, src1, src2 ;
- -- dst = src2 - src1
- | MULL Format Reg Reg RI
- | MULLO Format Reg Reg Reg -- multiply and set overflow
- | MFOV Format Reg -- move overflow bit (1|33) to register
- -- pseudo-instruction; pretty printed as
- -- mfxer dst
- -- extr[w|d]i dst, dst, 1, [1|33]
- | MULHU Format Reg Reg Reg
- | DIV Format Bool Reg Reg Reg
- | AND Reg Reg RI -- dst, src1, src2
- | ANDC Reg Reg Reg -- AND with complement, dst = src1 & ~ src2
- | NAND Reg Reg Reg -- dst, src1, src2
- | OR Reg Reg RI -- dst, src1, src2
- | ORIS Reg Reg Imm -- OR Immediate Shifted dst, src1, src2
- | XOR Reg Reg RI -- dst, src1, src2
- | XORIS Reg Reg Imm -- XOR Immediate Shifted dst, src1, src2
-
- | EXTS Format Reg Reg
- | CNTLZ Format Reg Reg
-
- | NEG Reg Reg
- | NOT Reg Reg
-
- | SL Format Reg Reg RI -- shift left
- | SR Format Reg Reg RI -- shift right
- | SRA Format Reg Reg RI -- shift right arithmetic
-
- | RLWINM Reg Reg Int Int Int -- Rotate Left Word Immediate then AND with Mask
- | CLRLI Format Reg Reg Int -- clear left immediate (extended mnemonic)
- | CLRRI Format Reg Reg Int -- clear right immediate (extended mnemonic)
-
- | FADD Format Reg Reg Reg
- | FSUB Format Reg Reg Reg
- | FMUL Format Reg Reg Reg
- | FDIV Format Reg Reg Reg
- | FABS Reg Reg -- abs is the same for single and double
- | FNEG Reg Reg -- negate is the same for single and double prec.
-
- | FCMP Reg Reg
-
- | FCTIWZ Reg Reg -- convert to integer word
- | FCTIDZ Reg Reg -- convert to integer double word
- | FCFID Reg Reg -- convert from integer double word
- | FRSP Reg Reg -- reduce to single precision
- -- (but destination is a FP register)
-
- | CRNOR Int Int Int -- condition register nor
- | MFCR Reg -- move from condition register
-
- | MFLR Reg -- move from link register
- | FETCHPC Reg -- pseudo-instruction:
- -- bcl to next insn, mflr reg
- | HWSYNC -- heavy weight sync
- | ISYNC -- instruction synchronize
- | LWSYNC -- memory barrier
- | NOP -- no operation, PowerPC 64 bit
- -- needs this as place holder to
- -- reload TOC pointer
-
--- | Get the registers that are being used by this instruction.
--- regUsage doesn't need to do any trickery for jumps and such.
--- Just state precisely the regs read and written by that insn.
--- The consequences of control flow transfers, as far as register
--- allocation goes, are taken care of by the register allocator.
---
-ppc_regUsageOfInstr :: Platform -> Instr -> RegUsage
-ppc_regUsageOfInstr platform instr
- = case instr of
- LD _ reg addr -> usage (regAddr addr, [reg])
- LDFAR _ reg addr -> usage (regAddr addr, [reg])
- LDR _ reg addr -> usage (regAddr addr, [reg])
- LA _ reg addr -> usage (regAddr addr, [reg])
- ST _ reg addr -> usage (reg : regAddr addr, [])
- STFAR _ reg addr -> usage (reg : regAddr addr, [])
- STU _ reg addr -> usage (reg : regAddr addr, [])
- STC _ reg addr -> usage (reg : regAddr addr, [])
- LIS reg _ -> usage ([], [reg])
- LI reg _ -> usage ([], [reg])
- MR reg1 reg2 -> usage ([reg2], [reg1])
- CMP _ reg ri -> usage (reg : regRI ri,[])
- CMPL _ reg ri -> usage (reg : regRI ri,[])
- BCC _ _ _ -> noUsage
- BCCFAR _ _ _ -> noUsage
- JMP _ regs -> usage (regs, [])
- MTCTR reg -> usage ([reg],[])
- BCTR _ _ regs -> usage (regs, [])
- BL _ params -> usage (params, callClobberedRegs platform)
- BCTRL params -> usage (params, callClobberedRegs platform)
-
- ADD reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
- ADDO reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
- ADDC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
- ADDE reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
- ADDZE reg1 reg2 -> usage ([reg2], [reg1])
- ADDIS reg1 reg2 _ -> usage ([reg2], [reg1])
- SUBF reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
- SUBFO reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
- SUBFC reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
- SUBFE reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
- MULL _ reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
- MULLO _ reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
- MFOV _ reg -> usage ([], [reg])
- MULHU _ reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
- DIV _ _ reg1 reg2 reg3
- -> usage ([reg2,reg3], [reg1])
-
- AND reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
- ANDC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
- NAND reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
- OR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
- ORIS reg1 reg2 _ -> usage ([reg2], [reg1])
- XOR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
- XORIS reg1 reg2 _ -> usage ([reg2], [reg1])
- EXTS _ reg1 reg2 -> usage ([reg2], [reg1])
- CNTLZ _ reg1 reg2 -> usage ([reg2], [reg1])
- NEG reg1 reg2 -> usage ([reg2], [reg1])
- NOT reg1 reg2 -> usage ([reg2], [reg1])
- SL _ reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
- SR _ reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
- SRA _ reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
- RLWINM reg1 reg2 _ _ _ -> usage ([reg2], [reg1])
- CLRLI _ reg1 reg2 _ -> usage ([reg2], [reg1])
- CLRRI _ reg1 reg2 _ -> usage ([reg2], [reg1])
-
- FADD _ r1 r2 r3 -> usage ([r2,r3], [r1])
- FSUB _ r1 r2 r3 -> usage ([r2,r3], [r1])
- FMUL _ r1 r2 r3 -> usage ([r2,r3], [r1])
- FDIV _ r1 r2 r3 -> usage ([r2,r3], [r1])
- FABS r1 r2 -> usage ([r2], [r1])
- FNEG r1 r2 -> usage ([r2], [r1])
- FCMP r1 r2 -> usage ([r1,r2], [])
- FCTIWZ r1 r2 -> usage ([r2], [r1])
- FCTIDZ r1 r2 -> usage ([r2], [r1])
- FCFID r1 r2 -> usage ([r2], [r1])
- FRSP r1 r2 -> usage ([r2], [r1])
- MFCR reg -> usage ([], [reg])
- MFLR reg -> usage ([], [reg])
- FETCHPC reg -> usage ([], [reg])
- _ -> noUsage
- where
- usage (src, dst) = RU (filter (interesting platform) src)
- (filter (interesting platform) dst)
- regAddr (AddrRegReg r1 r2) = [r1, r2]
- regAddr (AddrRegImm r1 _) = [r1]
-
- regRI (RIReg r) = [r]
- regRI _ = []
-
-interesting :: Platform -> Reg -> Bool
-interesting _ (RegVirtual _) = True
-interesting platform (RegReal (RealRegSingle i)) = freeReg platform i
-interesting _ (RegReal (RealRegPair{}))
- = panic "PPC.Instr.interesting: no reg pairs on this arch"
-
-
-
--- | Apply a given mapping to all the register references in this
--- instruction.
-ppc_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
-ppc_patchRegsOfInstr instr env
- = case instr of
- LD fmt reg addr -> LD fmt (env reg) (fixAddr addr)
- LDFAR fmt reg addr -> LDFAR fmt (env reg) (fixAddr addr)
- LDR fmt reg addr -> LDR fmt (env reg) (fixAddr addr)
- LA fmt reg addr -> LA fmt (env reg) (fixAddr addr)
- ST fmt reg addr -> ST fmt (env reg) (fixAddr addr)
- STFAR fmt reg addr -> STFAR fmt (env reg) (fixAddr addr)
- STU fmt reg addr -> STU fmt (env reg) (fixAddr addr)
- STC fmt reg addr -> STC fmt (env reg) (fixAddr addr)
- LIS reg imm -> LIS (env reg) imm
- LI reg imm -> LI (env reg) imm
- MR reg1 reg2 -> MR (env reg1) (env reg2)
- CMP fmt reg ri -> CMP fmt (env reg) (fixRI ri)
- CMPL fmt reg ri -> CMPL fmt (env reg) (fixRI ri)
- BCC cond lbl p -> BCC cond lbl p
- BCCFAR cond lbl p -> BCCFAR cond lbl p
- JMP l regs -> JMP l regs -- global regs will not be remapped
- MTCTR reg -> MTCTR (env reg)
- BCTR targets lbl rs -> BCTR targets lbl rs
- BL imm argRegs -> BL imm argRegs -- argument regs
- BCTRL argRegs -> BCTRL argRegs -- cannot be remapped
- ADD reg1 reg2 ri -> ADD (env reg1) (env reg2) (fixRI ri)
- ADDO reg1 reg2 reg3 -> ADDO (env reg1) (env reg2) (env reg3)
- ADDC reg1 reg2 reg3 -> ADDC (env reg1) (env reg2) (env reg3)
- ADDE reg1 reg2 reg3 -> ADDE (env reg1) (env reg2) (env reg3)
- ADDZE reg1 reg2 -> ADDZE (env reg1) (env reg2)
- ADDIS reg1 reg2 imm -> ADDIS (env reg1) (env reg2) imm
- SUBF reg1 reg2 reg3 -> SUBF (env reg1) (env reg2) (env reg3)
- SUBFO reg1 reg2 reg3 -> SUBFO (env reg1) (env reg2) (env reg3)
- SUBFC reg1 reg2 ri -> SUBFC (env reg1) (env reg2) (fixRI ri)
- SUBFE reg1 reg2 reg3 -> SUBFE (env reg1) (env reg2) (env reg3)
- MULL fmt reg1 reg2 ri
- -> MULL fmt (env reg1) (env reg2) (fixRI ri)
- MULLO fmt reg1 reg2 reg3
- -> MULLO fmt (env reg1) (env reg2) (env reg3)
- MFOV fmt reg -> MFOV fmt (env reg)
- MULHU fmt reg1 reg2 reg3
- -> MULHU fmt (env reg1) (env reg2) (env reg3)
- DIV fmt sgn reg1 reg2 reg3
- -> DIV fmt sgn (env reg1) (env reg2) (env reg3)
-
- AND reg1 reg2 ri -> AND (env reg1) (env reg2) (fixRI ri)
- ANDC reg1 reg2 reg3 -> ANDC (env reg1) (env reg2) (env reg3)
- NAND reg1 reg2 reg3 -> NAND (env reg1) (env reg2) (env reg3)
- OR reg1 reg2 ri -> OR (env reg1) (env reg2) (fixRI ri)
- ORIS reg1 reg2 imm -> ORIS (env reg1) (env reg2) imm
- XOR reg1 reg2 ri -> XOR (env reg1) (env reg2) (fixRI ri)
- XORIS reg1 reg2 imm -> XORIS (env reg1) (env reg2) imm
- EXTS fmt reg1 reg2 -> EXTS fmt (env reg1) (env reg2)
- CNTLZ fmt reg1 reg2 -> CNTLZ fmt (env reg1) (env reg2)
- NEG reg1 reg2 -> NEG (env reg1) (env reg2)
- NOT reg1 reg2 -> NOT (env reg1) (env reg2)
- SL fmt reg1 reg2 ri
- -> SL fmt (env reg1) (env reg2) (fixRI ri)
- SR fmt reg1 reg2 ri
- -> SR fmt (env reg1) (env reg2) (fixRI ri)
- SRA fmt reg1 reg2 ri
- -> SRA fmt (env reg1) (env reg2) (fixRI ri)
- RLWINM reg1 reg2 sh mb me
- -> RLWINM (env reg1) (env reg2) sh mb me
- CLRLI fmt reg1 reg2 n -> CLRLI fmt (env reg1) (env reg2) n
- CLRRI fmt reg1 reg2 n -> CLRRI fmt (env reg1) (env reg2) n
- FADD fmt r1 r2 r3 -> FADD fmt (env r1) (env r2) (env r3)
- FSUB fmt r1 r2 r3 -> FSUB fmt (env r1) (env r2) (env r3)
- FMUL fmt r1 r2 r3 -> FMUL fmt (env r1) (env r2) (env r3)
- FDIV fmt r1 r2 r3 -> FDIV fmt (env r1) (env r2) (env r3)
- FABS r1 r2 -> FABS (env r1) (env r2)
- FNEG r1 r2 -> FNEG (env r1) (env r2)
- FCMP r1 r2 -> FCMP (env r1) (env r2)
- FCTIWZ r1 r2 -> FCTIWZ (env r1) (env r2)
- FCTIDZ r1 r2 -> FCTIDZ (env r1) (env r2)
- FCFID r1 r2 -> FCFID (env r1) (env r2)
- FRSP r1 r2 -> FRSP (env r1) (env r2)
- MFCR reg -> MFCR (env reg)
- MFLR reg -> MFLR (env reg)
- FETCHPC reg -> FETCHPC (env reg)
- _ -> instr
- where
- fixAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2)
- fixAddr (AddrRegImm r1 i) = AddrRegImm (env r1) i
-
- fixRI (RIReg r) = RIReg (env r)
- fixRI other = other
-
-
---------------------------------------------------------------------------------
--- | Checks whether this instruction is a jump/branch instruction.
--- One that can change the flow of control in a way that the
--- register allocator needs to worry about.
-ppc_isJumpishInstr :: Instr -> Bool
-ppc_isJumpishInstr instr
- = case instr of
- BCC{} -> True
- BCCFAR{} -> True
- BCTR{} -> True
- BCTRL{} -> True
- BL{} -> True
- JMP{} -> True
- _ -> False
-
-
--- | Checks whether this instruction is a jump/branch instruction.
--- One that can change the flow of control in a way that the
--- register allocator needs to worry about.
-ppc_jumpDestsOfInstr :: Instr -> [BlockId]
-ppc_jumpDestsOfInstr insn
- = case insn of
- BCC _ id _ -> [id]
- BCCFAR _ id _ -> [id]
- BCTR targets _ _ -> [id | Just id <- targets]
- _ -> []
-
-
--- | Change the destination of this jump instruction.
--- Used in the linear allocator when adding fixup blocks for join
--- points.
-ppc_patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr
-ppc_patchJumpInstr insn patchF
- = case insn of
- BCC cc id p -> BCC cc (patchF id) p
- BCCFAR cc id p -> BCCFAR cc (patchF id) p
- BCTR ids lbl rs -> BCTR (map (fmap patchF) ids) lbl rs
- _ -> insn
-
-
--- -----------------------------------------------------------------------------
-
--- | An instruction to spill a register into a spill slot.
-ppc_mkSpillInstr
- :: DynFlags
- -> Reg -- register to spill
- -> Int -- current stack delta
- -> Int -- spill slot to use
- -> Instr
-
-ppc_mkSpillInstr dflags reg delta slot
- = let platform = targetPlatform dflags
- off = spillSlotToOffset dflags slot
- arch = platformArch platform
- in
- let fmt = case targetClassOfReg platform reg of
- RcInteger -> case arch of
- ArchPPC -> II32
- _ -> II64
- RcDouble -> FF64
- _ -> panic "PPC.Instr.mkSpillInstr: no match"
- instr = case makeImmediate W32 True (off-delta) of
- Just _ -> ST
- Nothing -> STFAR -- pseudo instruction: 32 bit offsets
-
- in instr fmt reg (AddrRegImm sp (ImmInt (off-delta)))
-
-
-ppc_mkLoadInstr
- :: DynFlags
- -> Reg -- register to load
- -> Int -- current stack delta
- -> Int -- spill slot to use
- -> Instr
-
-ppc_mkLoadInstr dflags reg delta slot
- = let platform = targetPlatform dflags
- off = spillSlotToOffset dflags slot
- arch = platformArch platform
- in
- let fmt = case targetClassOfReg platform reg of
- RcInteger -> case arch of
- ArchPPC -> II32
- _ -> II64
- RcDouble -> FF64
- _ -> panic "PPC.Instr.mkLoadInstr: no match"
- instr = case makeImmediate W32 True (off-delta) of
- Just _ -> LD
- Nothing -> LDFAR -- pseudo instruction: 32 bit offsets
-
- in instr fmt reg (AddrRegImm sp (ImmInt (off-delta)))
-
-
--- | The size of a minimal stackframe header including minimal
--- parameter save area.
-stackFrameHeaderSize :: DynFlags -> Int
-stackFrameHeaderSize dflags
- = case platformOS platform of
- OSAIX -> 24 + 8 * 4
- _ -> case platformArch platform of
- -- header + parameter save area
- ArchPPC -> 64 -- TODO: check ABI spec
- ArchPPC_64 ELF_V1 -> 48 + 8 * 8
- ArchPPC_64 ELF_V2 -> 32 + 8 * 8
- _ -> panic "PPC.stackFrameHeaderSize: not defined for this OS"
- where platform = targetPlatform dflags
-
--- | The maximum number of bytes required to spill a register. PPC32
--- has 32-bit GPRs and 64-bit FPRs, while PPC64 has 64-bit GPRs and
--- 64-bit FPRs. So the maximum is 8 regardless of platforms unlike
--- x86. Note that AltiVec's vector registers are 128-bit wide so we
--- must not use this to spill them.
-spillSlotSize :: Int
-spillSlotSize = 8
-
--- | The number of spill slots available without allocating more.
-maxSpillSlots :: DynFlags -> Int
-maxSpillSlots dflags
- = ((rESERVED_C_STACK_BYTES dflags - stackFrameHeaderSize dflags)
- `div` spillSlotSize) - 1
--- = 0 -- useful for testing allocMoreStack
-
--- | The number of bytes that the stack pointer should be aligned
--- to. This is 16 both on PPC32 and PPC64 ELF (see ELF processor
--- specific supplements).
-stackAlign :: Int
-stackAlign = 16
-
--- | Convert a spill slot number to a *byte* offset, with no sign.
-spillSlotToOffset :: DynFlags -> Int -> Int
-spillSlotToOffset dflags slot
- = stackFrameHeaderSize dflags + spillSlotSize * slot
-
-
---------------------------------------------------------------------------------
--- | See if this instruction is telling us the current C stack delta
-ppc_takeDeltaInstr
- :: Instr
- -> Maybe Int
-
-ppc_takeDeltaInstr instr
- = case instr of
- DELTA i -> Just i
- _ -> Nothing
-
-
-ppc_isMetaInstr
- :: Instr
- -> Bool
-
-ppc_isMetaInstr instr
- = case instr of
- COMMENT{} -> True
- LDATA{} -> True
- NEWBLOCK{} -> True
- DELTA{} -> True
- _ -> False
-
-
--- | Copy the value in a register to another one.
--- Must work for all register classes.
-ppc_mkRegRegMoveInstr
- :: Reg
- -> Reg
- -> Instr
-
-ppc_mkRegRegMoveInstr src dst
- = MR dst src
-
-
--- | Make an unconditional jump instruction.
-ppc_mkJumpInstr
- :: BlockId
- -> [Instr]
-
-ppc_mkJumpInstr id
- = [BCC ALWAYS id Nothing]
-
-
--- | Take the source and destination from this reg -> reg move instruction
--- or Nothing if it's not one
-ppc_takeRegRegMoveInstr :: Instr -> Maybe (Reg,Reg)
-ppc_takeRegRegMoveInstr (MR dst src) = Just (src,dst)
-ppc_takeRegRegMoveInstr _ = Nothing
-
--- -----------------------------------------------------------------------------
--- Making far branches
-
--- Conditional branches on PowerPC are limited to +-32KB; if our Procs get too
--- big, we have to work around this limitation.
-
-makeFarBranches
- :: LabelMap RawCmmStatics
- -> [NatBasicBlock Instr]
- -> [NatBasicBlock Instr]
-makeFarBranches info_env blocks
- | last blockAddresses < nearLimit = blocks
- | otherwise = zipWith handleBlock blockAddresses blocks
- where
- blockAddresses = scanl (+) 0 $ map blockLen blocks
- blockLen (BasicBlock _ instrs) = length instrs
-
- handleBlock addr (BasicBlock id instrs)
- = BasicBlock id (zipWith makeFar [addr..] instrs)
-
- makeFar _ (BCC ALWAYS tgt _) = BCC ALWAYS tgt Nothing
- makeFar addr (BCC cond tgt p)
- | abs (addr - targetAddr) >= nearLimit
- = BCCFAR cond tgt p
- | otherwise
- = BCC cond tgt p
- where Just targetAddr = lookupUFM blockAddressMap tgt
- makeFar _ other = other
-
- -- 8192 instructions are allowed; let's keep some distance, as
- -- we have a few pseudo-insns that are pretty-printed as
- -- multiple instructions, and it's just not worth the effort
- -- to calculate things exactly
- nearLimit = 7000 - mapSize info_env * maxRetInfoTableSizeW
-
- blockAddressMap = listToUFM $ zip (map blockId blocks) blockAddresses
diff --git a/compiler/nativeGen/PPC/Ppr.hs b/compiler/nativeGen/PPC/Ppr.hs
deleted file mode 100644
index 8b81274db9..0000000000
--- a/compiler/nativeGen/PPC/Ppr.hs
+++ /dev/null
@@ -1,994 +0,0 @@
------------------------------------------------------------------------------
---
--- Pretty-printing assembly language
---
--- (c) The University of Glasgow 1993-2005
---
------------------------------------------------------------------------------
-
-{-# OPTIONS_GHC -fno-warn-orphans #-}
-module PPC.Ppr (pprNatCmmDecl) where
-
-import GhcPrelude
-
-import PPC.Regs
-import PPC.Instr
-import PPC.Cond
-import PprBase
-import Instruction
-import Format
-import Reg
-import RegClass
-import TargetReg
-
-import GHC.Cmm hiding (topInfoTable)
-import GHC.Cmm.Dataflow.Collections
-import GHC.Cmm.Dataflow.Label
-
-import GHC.Cmm.BlockId
-import GHC.Cmm.CLabel
-import GHC.Cmm.Ppr.Expr () -- For Outputable instances
-
-import Unique ( pprUniqueAlways, getUnique )
-import GHC.Platform
-import FastString
-import Outputable
-import GHC.Driver.Session
-
-import Data.Word
-import Data.Int
-import Data.Bits
-
--- -----------------------------------------------------------------------------
--- Printing this stuff out
-
-pprNatCmmDecl :: NatCmmDecl RawCmmStatics Instr -> SDoc
-pprNatCmmDecl (CmmData section dats) =
- pprSectionAlign section $$ pprDatas dats
-
-pprNatCmmDecl proc@(CmmProc top_info lbl _ (ListGraph blocks)) =
- case topInfoTable proc of
- Nothing ->
- sdocWithPlatform $ \platform ->
- -- special case for code without info table:
- pprSectionAlign (Section Text lbl) $$
- (case platformArch platform of
- ArchPPC_64 ELF_V1 -> pprFunctionDescriptor lbl
- ArchPPC_64 ELF_V2 -> pprFunctionPrologue lbl
- _ -> pprLabel lbl) $$ -- blocks guaranteed not null,
- -- so label needed
- vcat (map (pprBasicBlock top_info) blocks)
-
- Just (RawCmmStatics info_lbl _) ->
- sdocWithPlatform $ \platform ->
- pprSectionAlign (Section Text info_lbl) $$
- (if platformHasSubsectionsViaSymbols platform
- then ppr (mkDeadStripPreventer info_lbl) <> char ':'
- else empty) $$
- vcat (map (pprBasicBlock top_info) blocks) $$
- -- above: Even the first block gets a label, because with branch-chain
- -- elimination, it might be the target of a goto.
- (if platformHasSubsectionsViaSymbols platform
- then
- -- See Note [Subsections Via Symbols] in X86/Ppr.hs
- text "\t.long "
- <+> ppr info_lbl
- <+> char '-'
- <+> ppr (mkDeadStripPreventer info_lbl)
- else empty)
-
-pprFunctionDescriptor :: CLabel -> SDoc
-pprFunctionDescriptor lab = pprGloblDecl lab
- $$ text "\t.section \".opd\", \"aw\""
- $$ text "\t.align 3"
- $$ ppr lab <> char ':'
- $$ text "\t.quad ."
- <> ppr lab
- <> text ",.TOC.@tocbase,0"
- $$ text "\t.previous"
- $$ text "\t.type"
- <+> ppr lab
- <> text ", @function"
- $$ char '.' <> ppr lab <> char ':'
-
-pprFunctionPrologue :: CLabel ->SDoc
-pprFunctionPrologue lab = pprGloblDecl lab
- $$ text ".type "
- <> ppr lab
- <> text ", @function"
- $$ ppr lab <> char ':'
- $$ text "0:\taddis\t" <> pprReg toc
- <> text ",12,.TOC.-0b@ha"
- $$ text "\taddi\t" <> pprReg toc
- <> char ',' <> pprReg toc <> text ",.TOC.-0b@l"
- $$ text "\t.localentry\t" <> ppr lab
- <> text ",.-" <> ppr lab
-
-pprBasicBlock :: LabelMap RawCmmStatics -> NatBasicBlock Instr -> SDoc
-pprBasicBlock info_env (BasicBlock blockid instrs)
- = maybe_infotable $$
- pprLabel (blockLbl blockid) $$
- vcat (map pprInstr instrs)
- where
- maybe_infotable = case mapLookup blockid info_env of
- Nothing -> empty
- Just (RawCmmStatics info_lbl info) ->
- pprAlignForSection Text $$
- vcat (map pprData info) $$
- pprLabel info_lbl
-
-
-
-pprDatas :: RawCmmStatics -> SDoc
--- See note [emit-time elimination of static indirections] in CLabel.
-pprDatas (RawCmmStatics alias [CmmStaticLit (CmmLabel lbl), CmmStaticLit ind, _, _])
- | lbl == mkIndStaticInfoLabel
- , let labelInd (CmmLabelOff l _) = Just l
- labelInd (CmmLabel l) = Just l
- labelInd _ = Nothing
- , Just ind' <- labelInd ind
- , alias `mayRedirectTo` ind'
- = pprGloblDecl alias
- $$ text ".equiv" <+> ppr alias <> comma <> ppr (CmmLabel ind')
-pprDatas (RawCmmStatics lbl dats) = vcat (pprLabel lbl : map pprData dats)
-
-pprData :: CmmStatic -> SDoc
-pprData (CmmString str) = pprBytes str
-pprData (CmmUninitialised bytes) = text ".space " <> int bytes
-pprData (CmmStaticLit lit) = pprDataItem lit
-
-pprGloblDecl :: CLabel -> SDoc
-pprGloblDecl lbl
- | not (externallyVisibleCLabel lbl) = empty
- | otherwise = text ".globl " <> ppr lbl
-
-pprTypeAndSizeDecl :: CLabel -> SDoc
-pprTypeAndSizeDecl lbl
- = sdocWithPlatform $ \platform ->
- if platformOS platform == OSLinux && externallyVisibleCLabel lbl
- then text ".type " <>
- ppr lbl <> text ", @object"
- else empty
-
-pprLabel :: CLabel -> SDoc
-pprLabel lbl = pprGloblDecl lbl
- $$ pprTypeAndSizeDecl lbl
- $$ (ppr lbl <> char ':')
-
--- -----------------------------------------------------------------------------
--- pprInstr: print an 'Instr'
-
-instance Outputable Instr where
- ppr instr = pprInstr instr
-
-
-pprReg :: Reg -> SDoc
-
-pprReg r
- = case r of
- RegReal (RealRegSingle i) -> ppr_reg_no i
- RegReal (RealRegPair{}) -> panic "PPC.pprReg: no reg pairs on this arch"
- RegVirtual (VirtualRegI u) -> text "%vI_" <> pprUniqueAlways u
- RegVirtual (VirtualRegHi u) -> text "%vHi_" <> pprUniqueAlways u
- RegVirtual (VirtualRegF u) -> text "%vF_" <> pprUniqueAlways u
- RegVirtual (VirtualRegD u) -> text "%vD_" <> pprUniqueAlways u
-
- where
- ppr_reg_no :: Int -> SDoc
- ppr_reg_no i
- | i <= 31 = int i -- GPRs
- | i <= 63 = int (i-32) -- FPRs
- | otherwise = text "very naughty powerpc register"
-
-
-
-pprFormat :: Format -> SDoc
-pprFormat x
- = ptext (case x of
- II8 -> sLit "b"
- II16 -> sLit "h"
- II32 -> sLit "w"
- II64 -> sLit "d"
- FF32 -> sLit "fs"
- FF64 -> sLit "fd")
-
-
-pprCond :: Cond -> SDoc
-pprCond c
- = ptext (case c of {
- ALWAYS -> sLit "";
- EQQ -> sLit "eq"; NE -> sLit "ne";
- LTT -> sLit "lt"; GE -> sLit "ge";
- GTT -> sLit "gt"; LE -> sLit "le";
- LU -> sLit "lt"; GEU -> sLit "ge";
- GU -> sLit "gt"; LEU -> sLit "le"; })
-
-
-pprImm :: Imm -> SDoc
-
-pprImm (ImmInt i) = int i
-pprImm (ImmInteger i) = integer i
-pprImm (ImmCLbl l) = ppr l
-pprImm (ImmIndex l i) = ppr l <> char '+' <> int i
-pprImm (ImmLit s) = s
-
-pprImm (ImmFloat _) = text "naughty float immediate"
-pprImm (ImmDouble _) = text "naughty double immediate"
-
-pprImm (ImmConstantSum a b) = pprImm a <> char '+' <> pprImm b
-pprImm (ImmConstantDiff a b) = pprImm a <> char '-'
- <> lparen <> pprImm b <> rparen
-
-pprImm (LO (ImmInt i)) = pprImm (LO (ImmInteger (toInteger i)))
-pprImm (LO (ImmInteger i)) = pprImm (ImmInteger (toInteger lo16))
- where
- lo16 = fromInteger (i .&. 0xffff) :: Int16
-
-pprImm (LO i)
- = pprImm i <> text "@l"
-
-pprImm (HI i)
- = pprImm i <> text "@h"
-
-pprImm (HA (ImmInt i)) = pprImm (HA (ImmInteger (toInteger i)))
-pprImm (HA (ImmInteger i)) = pprImm (ImmInteger ha16)
- where
- ha16 = if lo16 >= 0x8000 then hi16+1 else hi16
- hi16 = (i `shiftR` 16)
- lo16 = i .&. 0xffff
-
-pprImm (HA i)
- = pprImm i <> text "@ha"
-
-pprImm (HIGHERA i)
- = pprImm i <> text "@highera"
-
-pprImm (HIGHESTA i)
- = pprImm i <> text "@highesta"
-
-
-pprAddr :: AddrMode -> SDoc
-pprAddr (AddrRegReg r1 r2)
- = pprReg r1 <> char ',' <+> pprReg r2
-pprAddr (AddrRegImm r1 (ImmInt i))
- = hcat [ int i, char '(', pprReg r1, char ')' ]
-pprAddr (AddrRegImm r1 (ImmInteger i))
- = hcat [ integer i, char '(', pprReg r1, char ')' ]
-pprAddr (AddrRegImm r1 imm)
- = hcat [ pprImm imm, char '(', pprReg r1, char ')' ]
-
-
-pprSectionAlign :: Section -> SDoc
-pprSectionAlign sec@(Section seg _) =
- sdocWithPlatform $ \platform ->
- pprSectionHeader platform sec $$
- pprAlignForSection seg
-
--- | Print appropriate alignment for the given section type.
-pprAlignForSection :: SectionType -> SDoc
-pprAlignForSection seg =
- sdocWithPlatform $ \platform ->
- let ppc64 = not $ target32Bit platform
- in ptext $ case seg of
- Text -> sLit ".align 2"
- Data
- | ppc64 -> sLit ".align 3"
- | otherwise -> sLit ".align 2"
- ReadOnlyData
- | ppc64 -> sLit ".align 3"
- | otherwise -> sLit ".align 2"
- RelocatableReadOnlyData
- | ppc64 -> sLit ".align 3"
- | otherwise -> sLit ".align 2"
- UninitialisedData
- | ppc64 -> sLit ".align 3"
- | otherwise -> sLit ".align 2"
- ReadOnlyData16 -> sLit ".align 4"
- -- TODO: This is copied from the ReadOnlyData case, but it can likely be
- -- made more efficient.
- CString
- | ppc64 -> sLit ".align 3"
- | otherwise -> sLit ".align 2"
- OtherSection _ -> panic "PprMach.pprSectionAlign: unknown section"
-
-pprDataItem :: CmmLit -> SDoc
-pprDataItem lit
- = sdocWithDynFlags $ \dflags ->
- vcat (ppr_item (cmmTypeFormat $ cmmLitType dflags lit) lit dflags)
- where
- imm = litToImm lit
- archPPC_64 dflags = not $ target32Bit $ targetPlatform dflags
-
- ppr_item II8 _ _ = [text "\t.byte\t" <> pprImm imm]
-
- ppr_item II32 _ _ = [text "\t.long\t" <> pprImm imm]
-
- ppr_item II64 _ dflags
- | archPPC_64 dflags = [text "\t.quad\t" <> pprImm imm]
-
-
- ppr_item FF32 (CmmFloat r _) _
- = let bs = floatToBytes (fromRational r)
- in map (\b -> text "\t.byte\t" <> pprImm (ImmInt b)) bs
-
- ppr_item FF64 (CmmFloat r _) _
- = let bs = doubleToBytes (fromRational r)
- in map (\b -> text "\t.byte\t" <> pprImm (ImmInt b)) bs
-
- ppr_item II16 _ _ = [text "\t.short\t" <> pprImm imm]
-
- ppr_item II64 (CmmInt x _) dflags
- | not(archPPC_64 dflags) =
- [text "\t.long\t"
- <> int (fromIntegral
- (fromIntegral (x `shiftR` 32) :: Word32)),
- text "\t.long\t"
- <> int (fromIntegral (fromIntegral x :: Word32))]
-
- ppr_item _ _ _
- = panic "PPC.Ppr.pprDataItem: no match"
-
-
-pprInstr :: Instr -> SDoc
-
-pprInstr (COMMENT _) = empty -- nuke 'em
-{-
-pprInstr (COMMENT s) =
- if platformOS platform == OSLinux
- then text "# " <> ftext s
- else text "; " <> ftext s
--}
-pprInstr (DELTA d)
- = pprInstr (COMMENT (mkFastString ("\tdelta = " ++ show d)))
-
-pprInstr (NEWBLOCK _)
- = panic "PprMach.pprInstr: NEWBLOCK"
-
-pprInstr (LDATA _ _)
- = panic "PprMach.pprInstr: LDATA"
-
-{-
-pprInstr (SPILL reg slot)
- = hcat [
- text "\tSPILL",
- char '\t',
- pprReg reg,
- comma,
- text "SLOT" <> parens (int slot)]
-
-pprInstr (RELOAD slot reg)
- = hcat [
- text "\tRELOAD",
- char '\t',
- text "SLOT" <> parens (int slot),
- comma,
- pprReg reg]
--}
-
-pprInstr (LD fmt reg addr) = hcat [
- char '\t',
- text "l",
- ptext (case fmt of
- II8 -> sLit "bz"
- II16 -> sLit "hz"
- II32 -> sLit "wz"
- II64 -> sLit "d"
- FF32 -> sLit "fs"
- FF64 -> sLit "fd"
- ),
- case addr of AddrRegImm _ _ -> empty
- AddrRegReg _ _ -> char 'x',
- char '\t',
- pprReg reg,
- text ", ",
- pprAddr addr
- ]
-
-pprInstr (LDFAR fmt reg (AddrRegImm source off)) =
- sdocWithPlatform $ \platform -> vcat [
- pprInstr (ADDIS (tmpReg platform) source (HA off)),
- pprInstr (LD fmt reg (AddrRegImm (tmpReg platform) (LO off)))
- ]
-pprInstr (LDFAR _ _ _) =
- panic "PPC.Ppr.pprInstr LDFAR: no match"
-
-pprInstr (LDR fmt reg1 addr) = hcat [
- text "\tl",
- case fmt of
- II32 -> char 'w'
- II64 -> char 'd'
- _ -> panic "PPC.Ppr.Instr LDR: no match",
- text "arx\t",
- pprReg reg1,
- text ", ",
- pprAddr addr
- ]
-
-pprInstr (LA fmt reg addr) = hcat [
- char '\t',
- text "l",
- ptext (case fmt of
- II8 -> sLit "ba"
- II16 -> sLit "ha"
- II32 -> sLit "wa"
- II64 -> sLit "d"
- FF32 -> sLit "fs"
- FF64 -> sLit "fd"
- ),
- case addr of AddrRegImm _ _ -> empty
- AddrRegReg _ _ -> char 'x',
- char '\t',
- pprReg reg,
- text ", ",
- pprAddr addr
- ]
-pprInstr (ST fmt reg addr) = hcat [
- char '\t',
- text "st",
- pprFormat fmt,
- case addr of AddrRegImm _ _ -> empty
- AddrRegReg _ _ -> char 'x',
- char '\t',
- pprReg reg,
- text ", ",
- pprAddr addr
- ]
-pprInstr (STFAR fmt reg (AddrRegImm source off)) =
- sdocWithPlatform $ \platform -> vcat [
- pprInstr (ADDIS (tmpReg platform) source (HA off)),
- pprInstr (ST fmt reg (AddrRegImm (tmpReg platform) (LO off)))
- ]
-pprInstr (STFAR _ _ _) =
- panic "PPC.Ppr.pprInstr STFAR: no match"
-pprInstr (STU fmt reg addr) = hcat [
- char '\t',
- text "st",
- pprFormat fmt,
- char 'u',
- case addr of AddrRegImm _ _ -> empty
- AddrRegReg _ _ -> char 'x',
- char '\t',
- pprReg reg,
- text ", ",
- pprAddr addr
- ]
-pprInstr (STC fmt reg1 addr) = hcat [
- text "\tst",
- case fmt of
- II32 -> char 'w'
- II64 -> char 'd'
- _ -> panic "PPC.Ppr.Instr STC: no match",
- text "cx.\t",
- pprReg reg1,
- text ", ",
- pprAddr addr
- ]
-pprInstr (LIS reg imm) = hcat [
- char '\t',
- text "lis",
- char '\t',
- pprReg reg,
- text ", ",
- pprImm imm
- ]
-pprInstr (LI reg imm) = hcat [
- char '\t',
- text "li",
- char '\t',
- pprReg reg,
- text ", ",
- pprImm imm
- ]
-pprInstr (MR reg1 reg2)
- | reg1 == reg2 = empty
- | otherwise = hcat [
- char '\t',
- sdocWithPlatform $ \platform ->
- case targetClassOfReg platform reg1 of
- RcInteger -> text "mr"
- _ -> text "fmr",
- char '\t',
- pprReg reg1,
- text ", ",
- pprReg reg2
- ]
-pprInstr (CMP fmt reg ri) = hcat [
- char '\t',
- op,
- char '\t',
- pprReg reg,
- text ", ",
- pprRI ri
- ]
- where
- op = hcat [
- text "cmp",
- pprFormat fmt,
- case ri of
- RIReg _ -> empty
- RIImm _ -> char 'i'
- ]
-pprInstr (CMPL fmt reg ri) = hcat [
- char '\t',
- op,
- char '\t',
- pprReg reg,
- text ", ",
- pprRI ri
- ]
- where
- op = hcat [
- text "cmpl",
- pprFormat fmt,
- case ri of
- RIReg _ -> empty
- RIImm _ -> char 'i'
- ]
-pprInstr (BCC cond blockid prediction) = hcat [
- char '\t',
- text "b",
- pprCond cond,
- pprPrediction prediction,
- char '\t',
- ppr lbl
- ]
- where lbl = mkLocalBlockLabel (getUnique blockid)
- pprPrediction p = case p of
- Nothing -> empty
- Just True -> char '+'
- Just False -> char '-'
-
-pprInstr (BCCFAR cond blockid prediction) = vcat [
- hcat [
- text "\tb",
- pprCond (condNegate cond),
- neg_prediction,
- text "\t$+8"
- ],
- hcat [
- text "\tb\t",
- ppr lbl
- ]
- ]
- where lbl = mkLocalBlockLabel (getUnique blockid)
- neg_prediction = case prediction of
- Nothing -> empty
- Just True -> char '-'
- Just False -> char '+'
-
-pprInstr (JMP lbl _)
- -- We never jump to ForeignLabels; if we ever do, c.f. handling for "BL"
- | isForeignLabel lbl = panic "PPC.Ppr.pprInstr: JMP to ForeignLabel"
- | otherwise =
- hcat [ -- an alias for b that takes a CLabel
- char '\t',
- text "b",
- char '\t',
- ppr lbl
- ]
-
-pprInstr (MTCTR reg) = hcat [
- char '\t',
- text "mtctr",
- char '\t',
- pprReg reg
- ]
-pprInstr (BCTR _ _ _) = hcat [
- char '\t',
- text "bctr"
- ]
-pprInstr (BL lbl _) = do
- sdocWithPlatform $ \platform -> case platformOS platform of
- OSAIX ->
- -- On AIX, "printf" denotes a function-descriptor (for use
- -- by function pointers), whereas the actual entry-code
- -- address is denoted by the dot-prefixed ".printf" label.
- -- Moreover, the PPC NCG only ever emits a BL instruction
- -- for calling C ABI functions. Most of the time these calls
- -- originate from FFI imports and have a 'ForeignLabel',
- -- but when profiling the codegen inserts calls via
- -- 'emitRtsCallGen' which are 'CmmLabel's even though
- -- they'd technically be more like 'ForeignLabel's.
- hcat [
- text "\tbl\t.",
- ppr lbl
- ]
- _ ->
- hcat [
- text "\tbl\t",
- ppr lbl
- ]
-pprInstr (BCTRL _) = hcat [
- char '\t',
- text "bctrl"
- ]
-pprInstr (ADD reg1 reg2 ri) = pprLogic (sLit "add") reg1 reg2 ri
-pprInstr (ADDIS reg1 reg2 imm) = hcat [
- char '\t',
- text "addis",
- char '\t',
- pprReg reg1,
- text ", ",
- pprReg reg2,
- text ", ",
- pprImm imm
- ]
-
-pprInstr (ADDO reg1 reg2 reg3) = pprLogic (sLit "addo") reg1 reg2 (RIReg reg3)
-pprInstr (ADDC reg1 reg2 reg3) = pprLogic (sLit "addc") reg1 reg2 (RIReg reg3)
-pprInstr (ADDE reg1 reg2 reg3) = pprLogic (sLit "adde") reg1 reg2 (RIReg reg3)
-pprInstr (ADDZE reg1 reg2) = pprUnary (sLit "addze") reg1 reg2
-pprInstr (SUBF reg1 reg2 reg3) = pprLogic (sLit "subf") reg1 reg2 (RIReg reg3)
-pprInstr (SUBFO reg1 reg2 reg3) = pprLogic (sLit "subfo") reg1 reg2 (RIReg reg3)
-pprInstr (SUBFC reg1 reg2 ri) = hcat [
- char '\t',
- text "subf",
- case ri of
- RIReg _ -> empty
- RIImm _ -> char 'i',
- text "c\t",
- pprReg reg1,
- text ", ",
- pprReg reg2,
- text ", ",
- pprRI ri
- ]
-pprInstr (SUBFE reg1 reg2 reg3) = pprLogic (sLit "subfe") reg1 reg2 (RIReg reg3)
-pprInstr (MULL fmt reg1 reg2 ri) = pprMul fmt reg1 reg2 ri
-pprInstr (MULLO fmt reg1 reg2 reg3) = hcat [
- char '\t',
- text "mull",
- case fmt of
- II32 -> char 'w'
- II64 -> char 'd'
- _ -> panic "PPC: illegal format",
- text "o\t",
- pprReg reg1,
- text ", ",
- pprReg reg2,
- text ", ",
- pprReg reg3
- ]
-pprInstr (MFOV fmt reg) = vcat [
- hcat [
- char '\t',
- text "mfxer",
- char '\t',
- pprReg reg
- ],
- hcat [
- char '\t',
- text "extr",
- case fmt of
- II32 -> char 'w'
- II64 -> char 'd'
- _ -> panic "PPC: illegal format",
- text "i\t",
- pprReg reg,
- text ", ",
- pprReg reg,
- text ", 1, ",
- case fmt of
- II32 -> text "1"
- II64 -> text "33"
- _ -> panic "PPC: illegal format"
- ]
- ]
-
-pprInstr (MULHU fmt reg1 reg2 reg3) = hcat [
- char '\t',
- text "mulh",
- case fmt of
- II32 -> char 'w'
- II64 -> char 'd'
- _ -> panic "PPC: illegal format",
- text "u\t",
- pprReg reg1,
- text ", ",
- pprReg reg2,
- text ", ",
- pprReg reg3
- ]
-
-pprInstr (DIV fmt sgn reg1 reg2 reg3) = pprDiv fmt sgn reg1 reg2 reg3
-
- -- for some reason, "andi" doesn't exist.
- -- we'll use "andi." instead.
-pprInstr (AND reg1 reg2 (RIImm imm)) = hcat [
- char '\t',
- text "andi.",
- char '\t',
- pprReg reg1,
- text ", ",
- pprReg reg2,
- text ", ",
- pprImm imm
- ]
-pprInstr (AND reg1 reg2 ri) = pprLogic (sLit "and") reg1 reg2 ri
-pprInstr (ANDC reg1 reg2 reg3) = pprLogic (sLit "andc") reg1 reg2 (RIReg reg3)
-pprInstr (NAND reg1 reg2 reg3) = pprLogic (sLit "nand") reg1 reg2 (RIReg reg3)
-
-pprInstr (OR reg1 reg2 ri) = pprLogic (sLit "or") reg1 reg2 ri
-pprInstr (XOR reg1 reg2 ri) = pprLogic (sLit "xor") reg1 reg2 ri
-
-pprInstr (ORIS reg1 reg2 imm) = hcat [
- char '\t',
- text "oris",
- char '\t',
- pprReg reg1,
- text ", ",
- pprReg reg2,
- text ", ",
- pprImm imm
- ]
-
-pprInstr (XORIS reg1 reg2 imm) = hcat [
- char '\t',
- text "xoris",
- char '\t',
- pprReg reg1,
- text ", ",
- pprReg reg2,
- text ", ",
- pprImm imm
- ]
-
-pprInstr (EXTS fmt reg1 reg2) = hcat [
- char '\t',
- text "exts",
- pprFormat fmt,
- char '\t',
- pprReg reg1,
- text ", ",
- pprReg reg2
- ]
-pprInstr (CNTLZ fmt reg1 reg2) = hcat [
- char '\t',
- text "cntlz",
- case fmt of
- II32 -> char 'w'
- II64 -> char 'd'
- _ -> panic "PPC: illegal format",
- char '\t',
- pprReg reg1,
- text ", ",
- pprReg reg2
- ]
-
-pprInstr (NEG reg1 reg2) = pprUnary (sLit "neg") reg1 reg2
-pprInstr (NOT reg1 reg2) = pprUnary (sLit "not") reg1 reg2
-
-pprInstr (SR II32 reg1 reg2 (RIImm (ImmInt i))) | i < 0 || i > 31 =
- -- Handle the case where we are asked to shift a 32 bit register by
- -- less than zero or more than 31 bits. We convert this into a clear
- -- of the destination register.
- -- Fixes ticket https://gitlab.haskell.org/ghc/ghc/issues/5900
- pprInstr (XOR reg1 reg2 (RIReg reg2))
-
-pprInstr (SL II32 reg1 reg2 (RIImm (ImmInt i))) | i < 0 || i > 31 =
- -- As above for SR, but for left shifts.
- -- Fixes ticket https://gitlab.haskell.org/ghc/ghc/issues/10870
- pprInstr (XOR reg1 reg2 (RIReg reg2))
-
-pprInstr (SRA II32 reg1 reg2 (RIImm (ImmInt i))) | i > 31 =
- -- PT: I don't know what to do for negative shift amounts:
- -- For now just panic.
- --
- -- For shift amounts greater than 31 set all bit to the
- -- value of the sign bit, this also what sraw does.
- pprInstr (SRA II32 reg1 reg2 (RIImm (ImmInt 31)))
-
-pprInstr (SL fmt reg1 reg2 ri) =
- let op = case fmt of
- II32 -> "slw"
- II64 -> "sld"
- _ -> panic "PPC.Ppr.pprInstr: shift illegal size"
- in pprLogic (sLit op) reg1 reg2 (limitShiftRI fmt ri)
-
-pprInstr (SR fmt reg1 reg2 ri) =
- let op = case fmt of
- II32 -> "srw"
- II64 -> "srd"
- _ -> panic "PPC.Ppr.pprInstr: shift illegal size"
- in pprLogic (sLit op) reg1 reg2 (limitShiftRI fmt ri)
-
-pprInstr (SRA fmt reg1 reg2 ri) =
- let op = case fmt of
- II32 -> "sraw"
- II64 -> "srad"
- _ -> panic "PPC.Ppr.pprInstr: shift illegal size"
- in pprLogic (sLit op) reg1 reg2 (limitShiftRI fmt ri)
-
-pprInstr (RLWINM reg1 reg2 sh mb me) = hcat [
- text "\trlwinm\t",
- pprReg reg1,
- text ", ",
- pprReg reg2,
- text ", ",
- int sh,
- text ", ",
- int mb,
- text ", ",
- int me
- ]
-
-pprInstr (CLRLI fmt reg1 reg2 n) = hcat [
- text "\tclrl",
- pprFormat fmt,
- text "i ",
- pprReg reg1,
- text ", ",
- pprReg reg2,
- text ", ",
- int n
- ]
-pprInstr (CLRRI fmt reg1 reg2 n) = hcat [
- text "\tclrr",
- pprFormat fmt,
- text "i ",
- pprReg reg1,
- text ", ",
- pprReg reg2,
- text ", ",
- int n
- ]
-
-pprInstr (FADD fmt reg1 reg2 reg3) = pprBinaryF (sLit "fadd") fmt reg1 reg2 reg3
-pprInstr (FSUB fmt reg1 reg2 reg3) = pprBinaryF (sLit "fsub") fmt reg1 reg2 reg3
-pprInstr (FMUL fmt reg1 reg2 reg3) = pprBinaryF (sLit "fmul") fmt reg1 reg2 reg3
-pprInstr (FDIV fmt reg1 reg2 reg3) = pprBinaryF (sLit "fdiv") fmt reg1 reg2 reg3
-pprInstr (FABS reg1 reg2) = pprUnary (sLit "fabs") reg1 reg2
-pprInstr (FNEG reg1 reg2) = pprUnary (sLit "fneg") reg1 reg2
-
-pprInstr (FCMP reg1 reg2) = hcat [
- char '\t',
- text "fcmpu\t0, ",
- -- Note: we're using fcmpu, not fcmpo
- -- The difference is with fcmpo, compare with NaN is an invalid operation.
- -- We don't handle invalid fp ops, so we don't care.
- -- Moreover, we use `fcmpu 0, ...` rather than `fcmpu cr0, ...` for
- -- better portability since some non-GNU assembler (such as
- -- IBM's `as`) tend not to support the symbolic register name cr0.
- -- This matches the syntax that GCC seems to emit for PPC targets.
- pprReg reg1,
- text ", ",
- pprReg reg2
- ]
-
-pprInstr (FCTIWZ reg1 reg2) = pprUnary (sLit "fctiwz") reg1 reg2
-pprInstr (FCTIDZ reg1 reg2) = pprUnary (sLit "fctidz") reg1 reg2
-pprInstr (FCFID reg1 reg2) = pprUnary (sLit "fcfid") reg1 reg2
-pprInstr (FRSP reg1 reg2) = pprUnary (sLit "frsp") reg1 reg2
-
-pprInstr (CRNOR dst src1 src2) = hcat [
- text "\tcrnor\t",
- int dst,
- text ", ",
- int src1,
- text ", ",
- int src2
- ]
-
-pprInstr (MFCR reg) = hcat [
- char '\t',
- text "mfcr",
- char '\t',
- pprReg reg
- ]
-
-pprInstr (MFLR reg) = hcat [
- char '\t',
- text "mflr",
- char '\t',
- pprReg reg
- ]
-
-pprInstr (FETCHPC reg) = vcat [
- text "\tbcl\t20,31,1f",
- hcat [ text "1:\tmflr\t", pprReg reg ]
- ]
-
-pprInstr HWSYNC = text "\tsync"
-
-pprInstr ISYNC = text "\tisync"
-
-pprInstr LWSYNC = text "\tlwsync"
-
-pprInstr NOP = text "\tnop"
-
-
-pprLogic :: PtrString -> Reg -> Reg -> RI -> SDoc
-pprLogic op reg1 reg2 ri = hcat [
- char '\t',
- ptext op,
- case ri of
- RIReg _ -> empty
- RIImm _ -> char 'i',
- char '\t',
- pprReg reg1,
- text ", ",
- pprReg reg2,
- text ", ",
- pprRI ri
- ]
-
-
-pprMul :: Format -> Reg -> Reg -> RI -> SDoc
-pprMul fmt reg1 reg2 ri = hcat [
- char '\t',
- text "mull",
- case ri of
- RIReg _ -> case fmt of
- II32 -> char 'w'
- II64 -> char 'd'
- _ -> panic "PPC: illegal format"
- RIImm _ -> char 'i',
- char '\t',
- pprReg reg1,
- text ", ",
- pprReg reg2,
- text ", ",
- pprRI ri
- ]
-
-
-pprDiv :: Format -> Bool -> Reg -> Reg -> Reg -> SDoc
-pprDiv fmt sgn reg1 reg2 reg3 = hcat [
- char '\t',
- text "div",
- case fmt of
- II32 -> char 'w'
- II64 -> char 'd'
- _ -> panic "PPC: illegal format",
- if sgn then empty else char 'u',
- char '\t',
- pprReg reg1,
- text ", ",
- pprReg reg2,
- text ", ",
- pprReg reg3
- ]
-
-
-pprUnary :: PtrString -> Reg -> Reg -> SDoc
-pprUnary op reg1 reg2 = hcat [
- char '\t',
- ptext op,
- char '\t',
- pprReg reg1,
- text ", ",
- pprReg reg2
- ]
-
-
-pprBinaryF :: PtrString -> Format -> Reg -> Reg -> Reg -> SDoc
-pprBinaryF op fmt reg1 reg2 reg3 = hcat [
- char '\t',
- ptext op,
- pprFFormat fmt,
- char '\t',
- pprReg reg1,
- text ", ",
- pprReg reg2,
- text ", ",
- pprReg reg3
- ]
-
-pprRI :: RI -> SDoc
-pprRI (RIReg r) = pprReg r
-pprRI (RIImm r) = pprImm r
-
-
-pprFFormat :: Format -> SDoc
-pprFFormat FF64 = empty
-pprFFormat FF32 = char 's'
-pprFFormat _ = panic "PPC.Ppr.pprFFormat: no match"
-
- -- limit immediate argument for shift instruction to range 0..63
- -- for 64 bit size and 0..32 otherwise
-limitShiftRI :: Format -> RI -> RI
-limitShiftRI II64 (RIImm (ImmInt i)) | i > 63 || i < 0 =
- panic $ "PPC.Ppr: Shift by " ++ show i ++ " bits is not allowed."
-limitShiftRI II32 (RIImm (ImmInt i)) | i > 31 || i < 0 =
- panic $ "PPC.Ppr: 32 bit: Shift by " ++ show i ++ " bits is not allowed."
-limitShiftRI _ x = x
diff --git a/compiler/nativeGen/PPC/RegInfo.hs b/compiler/nativeGen/PPC/RegInfo.hs
deleted file mode 100644
index c1a4e73e3d..0000000000
--- a/compiler/nativeGen/PPC/RegInfo.hs
+++ /dev/null
@@ -1,80 +0,0 @@
-{-# LANGUAGE CPP #-}
-
------------------------------------------------------------------------------
---
--- Machine-specific parts of the register allocator
---
--- (c) The University of Glasgow 1996-2004
---
------------------------------------------------------------------------------
-module PPC.RegInfo (
- JumpDest( DestBlockId ), getJumpDestBlockId,
- canShortcut,
- shortcutJump,
-
- shortcutStatics
-)
-
-where
-
-#include "HsVersions.h"
-
-import GhcPrelude
-
-import PPC.Instr
-
-import GHC.Cmm.BlockId
-import GHC.Cmm
-import GHC.Cmm.CLabel
-
-import Unique
-import Outputable (ppr, text, Outputable, (<>))
-
-data JumpDest = DestBlockId BlockId
-
--- Debug Instance
-instance Outputable JumpDest where
- ppr (DestBlockId bid) = text "jd<blk>:" <> ppr bid
-
-getJumpDestBlockId :: JumpDest -> Maybe BlockId
-getJumpDestBlockId (DestBlockId bid) = Just bid
-
-canShortcut :: Instr -> Maybe JumpDest
-canShortcut _ = Nothing
-
-shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
-shortcutJump _ other = other
-
-
--- Here because it knows about JumpDest
-shortcutStatics :: (BlockId -> Maybe JumpDest) -> RawCmmStatics -> RawCmmStatics
-shortcutStatics fn (RawCmmStatics lbl statics)
- = RawCmmStatics lbl $ map (shortcutStatic fn) statics
- -- we need to get the jump tables, so apply the mapping to the entries
- -- of a CmmData too.
-
-shortcutLabel :: (BlockId -> Maybe JumpDest) -> CLabel -> CLabel
-shortcutLabel fn lab
- | Just blkId <- maybeLocalBlockLabel lab = shortBlockId fn blkId
- | otherwise = lab
-
-shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
-shortcutStatic fn (CmmStaticLit (CmmLabel lab))
- = CmmStaticLit (CmmLabel (shortcutLabel fn lab))
-shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off w))
- = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off w)
- -- slightly dodgy, we're ignoring the second label, but this
- -- works with the way we use CmmLabelDiffOff for jump tables now.
-shortcutStatic _ other_static
- = other_static
-
-shortBlockId
- :: (BlockId -> Maybe JumpDest)
- -> BlockId
- -> CLabel
-
-shortBlockId fn blockid =
- case fn blockid of
- Nothing -> mkLocalBlockLabel uq
- Just (DestBlockId blockid') -> shortBlockId fn blockid'
- where uq = getUnique blockid
diff --git a/compiler/nativeGen/PPC/Regs.hs b/compiler/nativeGen/PPC/Regs.hs
deleted file mode 100644
index ff3ec639be..0000000000
--- a/compiler/nativeGen/PPC/Regs.hs
+++ /dev/null
@@ -1,333 +0,0 @@
-{-# LANGUAGE CPP #-}
-
--- -----------------------------------------------------------------------------
---
--- (c) The University of Glasgow 1994-2004
---
--- -----------------------------------------------------------------------------
-
-module PPC.Regs (
- -- squeeze functions
- virtualRegSqueeze,
- realRegSqueeze,
-
- mkVirtualReg,
- regDotColor,
-
- -- immediates
- Imm(..),
- strImmLit,
- litToImm,
-
- -- addressing modes
- AddrMode(..),
- addrOffset,
-
- -- registers
- spRel,
- argRegs,
- allArgRegs,
- callClobberedRegs,
- allMachRegNos,
- classOfRealReg,
- showReg,
-
- -- machine specific
- allFPArgRegs,
- fits16Bits,
- makeImmediate,
- fReg,
- r0, sp, toc, r3, r4, r11, r12, r30,
- tmpReg,
- f1,
-
- allocatableRegs
-
-)
-
-where
-
-#include "HsVersions.h"
-
-import GhcPrelude
-
-import Reg
-import RegClass
-import Format
-
-import GHC.Cmm
-import GHC.Cmm.CLabel ( CLabel )
-import Unique
-
-import GHC.Platform.Regs
-import GHC.Driver.Session
-import Outputable
-import GHC.Platform
-
-import Data.Word ( Word8, Word16, Word32, Word64 )
-import Data.Int ( Int8, Int16, Int32, Int64 )
-
-
--- squeese functions for the graph allocator -----------------------------------
-
--- | regSqueeze_class reg
--- Calculate the maximum number of register colors that could be
--- denied to a node of this class due to having this reg
--- as a neighbour.
---
-{-# INLINE virtualRegSqueeze #-}
-virtualRegSqueeze :: RegClass -> VirtualReg -> Int
-virtualRegSqueeze cls vr
- = case cls of
- RcInteger
- -> case vr of
- VirtualRegI{} -> 1
- VirtualRegHi{} -> 1
- _other -> 0
-
- RcDouble
- -> case vr of
- VirtualRegD{} -> 1
- VirtualRegF{} -> 0
- _other -> 0
-
- _other -> 0
-
-{-# INLINE realRegSqueeze #-}
-realRegSqueeze :: RegClass -> RealReg -> Int
-realRegSqueeze cls rr
- = case cls of
- RcInteger
- -> case rr of
- RealRegSingle regNo
- | regNo < 32 -> 1 -- first fp reg is 32
- | otherwise -> 0
-
- RealRegPair{} -> 0
-
- RcDouble
- -> case rr of
- RealRegSingle regNo
- | regNo < 32 -> 0
- | otherwise -> 1
-
- RealRegPair{} -> 0
-
- _other -> 0
-
-mkVirtualReg :: Unique -> Format -> VirtualReg
-mkVirtualReg u format
- | not (isFloatFormat format) = VirtualRegI u
- | otherwise
- = case format of
- FF32 -> VirtualRegD u
- FF64 -> VirtualRegD u
- _ -> panic "mkVirtualReg"
-
-regDotColor :: RealReg -> SDoc
-regDotColor reg
- = case classOfRealReg reg of
- RcInteger -> text "blue"
- RcFloat -> text "red"
- RcDouble -> text "green"
-
-
-
--- immediates ------------------------------------------------------------------
-data Imm
- = ImmInt Int
- | ImmInteger Integer -- Sigh.
- | ImmCLbl CLabel -- AbstractC Label (with baggage)
- | ImmLit SDoc -- Simple string
- | ImmIndex CLabel Int
- | ImmFloat Rational
- | ImmDouble Rational
- | ImmConstantSum Imm Imm
- | ImmConstantDiff Imm Imm
- | LO Imm
- | HI Imm
- | HA Imm {- high halfword adjusted -}
- | HIGHERA Imm
- | HIGHESTA Imm
-
-
-strImmLit :: String -> Imm
-strImmLit s = ImmLit (text s)
-
-
-litToImm :: CmmLit -> Imm
-litToImm (CmmInt i w) = ImmInteger (narrowS w i)
- -- narrow to the width: a CmmInt might be out of
- -- range, but we assume that ImmInteger only contains
- -- in-range values. A signed value should be fine here.
-litToImm (CmmFloat f W32) = ImmFloat f
-litToImm (CmmFloat f W64) = ImmDouble f
-litToImm (CmmLabel l) = ImmCLbl l
-litToImm (CmmLabelOff l off) = ImmIndex l off
-litToImm (CmmLabelDiffOff l1 l2 off _)
- = ImmConstantSum
- (ImmConstantDiff (ImmCLbl l1) (ImmCLbl l2))
- (ImmInt off)
-litToImm _ = panic "PPC.Regs.litToImm: no match"
-
-
--- addressing modes ------------------------------------------------------------
-
-data AddrMode
- = AddrRegReg Reg Reg
- | AddrRegImm Reg Imm
-
-
-addrOffset :: AddrMode -> Int -> Maybe AddrMode
-addrOffset addr off
- = case addr of
- AddrRegImm r (ImmInt n)
- | fits16Bits n2 -> Just (AddrRegImm r (ImmInt n2))
- | otherwise -> Nothing
- where n2 = n + off
-
- AddrRegImm r (ImmInteger n)
- | fits16Bits n2 -> Just (AddrRegImm r (ImmInt (fromInteger n2)))
- | otherwise -> Nothing
- where n2 = n + toInteger off
-
- _ -> Nothing
-
-
--- registers -------------------------------------------------------------------
--- @spRel@ gives us a stack relative addressing mode for volatile
--- temporaries and for excess call arguments. @fpRel@, where
--- applicable, is the same but for the frame pointer.
-
-spRel :: DynFlags
- -> Int -- desired stack offset in words, positive or negative
- -> AddrMode
-
-spRel dflags n = AddrRegImm sp (ImmInt (n * wORD_SIZE dflags))
-
-
--- argRegs is the set of regs which are read for an n-argument call to C.
--- For archs which pass all args on the stack (x86), is empty.
--- Sparc passes up to the first 6 args in regs.
-argRegs :: RegNo -> [Reg]
-argRegs 0 = []
-argRegs 1 = map regSingle [3]
-argRegs 2 = map regSingle [3,4]
-argRegs 3 = map regSingle [3..5]
-argRegs 4 = map regSingle [3..6]
-argRegs 5 = map regSingle [3..7]
-argRegs 6 = map regSingle [3..8]
-argRegs 7 = map regSingle [3..9]
-argRegs 8 = map regSingle [3..10]
-argRegs _ = panic "MachRegs.argRegs(powerpc): don't know about >8 arguments!"
-
-
-allArgRegs :: [Reg]
-allArgRegs = map regSingle [3..10]
-
-
--- these are the regs which we cannot assume stay alive over a C call.
-callClobberedRegs :: Platform -> [Reg]
-callClobberedRegs _platform
- = map regSingle (0:[2..12] ++ map fReg [0..13])
-
-
-allMachRegNos :: [RegNo]
-allMachRegNos = [0..63]
-
-
-{-# INLINE classOfRealReg #-}
-classOfRealReg :: RealReg -> RegClass
-classOfRealReg (RealRegSingle i)
- | i < 32 = RcInteger
- | otherwise = RcDouble
-
-classOfRealReg (RealRegPair{})
- = panic "regClass(ppr): no reg pairs on this architecture"
-
-showReg :: RegNo -> String
-showReg n
- | n >= 0 && n <= 31 = "%r" ++ show n
- | n >= 32 && n <= 63 = "%f" ++ show (n - 32)
- | otherwise = "%unknown_powerpc_real_reg_" ++ show n
-
-
-
--- machine specific ------------------------------------------------------------
-
-allFPArgRegs :: Platform -> [Reg]
-allFPArgRegs platform
- = case platformOS platform of
- OSAIX -> map (regSingle . fReg) [1..13]
- _ -> case platformArch platform of
- ArchPPC -> map (regSingle . fReg) [1..8]
- ArchPPC_64 _ -> map (regSingle . fReg) [1..13]
- _ -> panic "PPC.Regs.allFPArgRegs: unknown PPC Linux"
-
-fits16Bits :: Integral a => a -> Bool
-fits16Bits x = x >= -32768 && x < 32768
-
-makeImmediate :: Integral a => Width -> Bool -> a -> Maybe Imm
-makeImmediate rep signed x = fmap ImmInt (toI16 rep signed)
- where
- narrow W64 False = fromIntegral (fromIntegral x :: Word64)
- narrow W32 False = fromIntegral (fromIntegral x :: Word32)
- narrow W16 False = fromIntegral (fromIntegral x :: Word16)
- narrow W8 False = fromIntegral (fromIntegral x :: Word8)
- narrow W64 True = fromIntegral (fromIntegral x :: Int64)
- narrow W32 True = fromIntegral (fromIntegral x :: Int32)
- narrow W16 True = fromIntegral (fromIntegral x :: Int16)
- narrow W8 True = fromIntegral (fromIntegral x :: Int8)
- narrow _ _ = panic "PPC.Regs.narrow: no match"
-
- narrowed = narrow rep signed
-
- toI16 W32 True
- | narrowed >= -32768 && narrowed < 32768 = Just narrowed
- | otherwise = Nothing
- toI16 W32 False
- | narrowed >= 0 && narrowed < 65536 = Just narrowed
- | otherwise = Nothing
- toI16 W64 True
- | narrowed >= -32768 && narrowed < 32768 = Just narrowed
- | otherwise = Nothing
- toI16 W64 False
- | narrowed >= 0 && narrowed < 65536 = Just narrowed
- | otherwise = Nothing
- toI16 _ _ = Just narrowed
-
-
-{-
-The PowerPC has 64 registers of interest; 32 integer registers and 32 floating
-point registers.
--}
-
-fReg :: Int -> RegNo
-fReg x = (32 + x)
-
-r0, sp, toc, r3, r4, r11, r12, r30, f1 :: Reg
-r0 = regSingle 0
-sp = regSingle 1
-toc = regSingle 2
-r3 = regSingle 3
-r4 = regSingle 4
-r11 = regSingle 11
-r12 = regSingle 12
-r30 = regSingle 30
-f1 = regSingle $ fReg 1
-
--- allocatableRegs is allMachRegNos with the fixed-use regs removed.
--- i.e., these are the regs for which we are prepared to allow the
--- register allocator to attempt to map VRegs to.
-allocatableRegs :: Platform -> [RealReg]
-allocatableRegs platform
- = let isFree i = freeReg platform i
- in map RealRegSingle $ filter isFree allMachRegNos
-
--- temporary register for compiler use
-tmpReg :: Platform -> Reg
-tmpReg platform =
- case platformArch platform of
- ArchPPC -> regSingle 13
- ArchPPC_64 _ -> regSingle 30
- _ -> panic "PPC.Regs.tmpReg: unknown arch"
diff --git a/compiler/nativeGen/PprBase.hs b/compiler/nativeGen/PprBase.hs
deleted file mode 100644
index dc6e65bd70..0000000000
--- a/compiler/nativeGen/PprBase.hs
+++ /dev/null
@@ -1,275 +0,0 @@
-{-# LANGUAGE MagicHash #-}
-
------------------------------------------------------------------------------
---
--- Pretty-printing assembly language
---
--- (c) The University of Glasgow 1993-2005
---
------------------------------------------------------------------------------
-
-module PprBase (
- castFloatToWord8Array,
- castDoubleToWord8Array,
- floatToBytes,
- doubleToBytes,
- pprASCII,
- pprBytes,
- pprSectionHeader
-)
-
-where
-
-import GhcPrelude
-
-import AsmUtils
-import GHC.Cmm.CLabel
-import GHC.Cmm
-import GHC.Driver.Session
-import FastString
-import Outputable
-import GHC.Platform
-import FileCleanup
-
-import qualified Data.Array.Unsafe as U ( castSTUArray )
-import Data.Array.ST
-
-import Control.Monad.ST
-
-import Data.Word
-import Data.Bits
-import Data.ByteString (ByteString)
-import qualified Data.ByteString as BS
-import GHC.Exts
-import GHC.Word
-import System.IO.Unsafe
-
-
-
--- -----------------------------------------------------------------------------
--- Converting floating-point literals to integrals for printing
-
-castFloatToWord8Array :: STUArray s Int Float -> ST s (STUArray s Int Word8)
-castFloatToWord8Array = U.castSTUArray
-
-castDoubleToWord8Array :: STUArray s Int Double -> ST s (STUArray s Int Word8)
-castDoubleToWord8Array = U.castSTUArray
-
--- floatToBytes and doubleToBytes convert to the host's byte
--- order. Providing that we're not cross-compiling for a
--- target with the opposite endianness, this should work ok
--- on all targets.
-
--- ToDo: this stuff is very similar to the shenanigans in PprAbs,
--- could they be merged?
-
-floatToBytes :: Float -> [Int]
-floatToBytes f
- = runST (do
- arr <- newArray_ ((0::Int),3)
- writeArray arr 0 f
- arr <- castFloatToWord8Array arr
- i0 <- readArray arr 0
- i1 <- readArray arr 1
- i2 <- readArray arr 2
- i3 <- readArray arr 3
- return (map fromIntegral [i0,i1,i2,i3])
- )
-
-doubleToBytes :: Double -> [Int]
-doubleToBytes d
- = runST (do
- arr <- newArray_ ((0::Int),7)
- writeArray arr 0 d
- arr <- castDoubleToWord8Array arr
- i0 <- readArray arr 0
- i1 <- readArray arr 1
- i2 <- readArray arr 2
- i3 <- readArray arr 3
- i4 <- readArray arr 4
- i5 <- readArray arr 5
- i6 <- readArray arr 6
- i7 <- readArray arr 7
- return (map fromIntegral [i0,i1,i2,i3,i4,i5,i6,i7])
- )
-
--- ---------------------------------------------------------------------------
--- Printing ASCII strings.
---
--- Print as a string and escape non-printable characters.
--- This is similar to charToC in Utils.
-
-pprASCII :: ByteString -> SDoc
-pprASCII str
- -- Transform this given literal bytestring to escaped string and construct
- -- the literal SDoc directly.
- -- See #14741
- -- and Note [Pretty print ASCII when AsmCodeGen]
- = text $ BS.foldr (\w s -> do1 w ++ s) "" str
- where
- do1 :: Word8 -> String
- do1 w | 0x09 == w = "\\t"
- | 0x0A == w = "\\n"
- | 0x22 == w = "\\\""
- | 0x5C == w = "\\\\"
- -- ASCII printable characters range
- | w >= 0x20 && w <= 0x7E = [chr' w]
- | otherwise = '\\' : octal w
-
- -- we know that the Chars we create are in the ASCII range
- -- so we bypass the check in "chr"
- chr' :: Word8 -> Char
- chr' (W8# w#) = C# (chr# (word2Int# w#))
-
- octal :: Word8 -> String
- octal w = [ chr' (ord0 + (w `unsafeShiftR` 6) .&. 0x07)
- , chr' (ord0 + (w `unsafeShiftR` 3) .&. 0x07)
- , chr' (ord0 + w .&. 0x07)
- ]
- ord0 = 0x30 -- = ord '0'
-
--- | Pretty print binary data.
---
--- Use either the ".string" directive or a ".incbin" directive.
--- See Note [Embedding large binary blobs]
---
--- A NULL byte is added after the binary data.
---
-pprBytes :: ByteString -> SDoc
-pprBytes bs = sdocWithDynFlags $ \dflags ->
- if binBlobThreshold dflags == 0
- || fromIntegral (BS.length bs) <= binBlobThreshold dflags
- then text "\t.string " <> doubleQuotes (pprASCII bs)
- else unsafePerformIO $ do
- bFile <- newTempName dflags TFL_CurrentModule ".dat"
- BS.writeFile bFile bs
- return $ text "\t.incbin "
- <> pprFilePathString bFile -- proper escape (see #16389)
- <> text "\n\t.byte 0"
-
-{-
-Note [Embedding large binary blobs]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-To embed a blob of binary data (e.g. an UTF-8 encoded string) into the generated
-code object, we have several options:
-
- 1. Generate a ".byte" directive for each byte. This is what was done in the past
- (see Note [Pretty print ASCII when AsmCodeGen]).
-
- 2. Generate a single ".string"/".asciz" directive for the whole sequence of
- bytes. Bytes in the ASCII printable range are rendered as characters and
- other values are escaped (e.g., "\t", "\077", etc.).
-
- 3. Create a temporary file into which we dump the binary data and generate a
- single ".incbin" directive. The assembler will include the binary file for
- us in the generated output object.
-
-Now the code generator uses either (2) or (3), depending on the binary blob
-size. Using (3) for small blobs adds too much overhead (see benchmark results
-in #16190), so we only do it when the size is above a threshold (500K at the
-time of writing).
-
-The threshold is configurable via the `-fbinary-blob-threshold` flag.
-
--}
-
-
-{-
-Note [Pretty print ASCII when AsmCodeGen]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Previously, when generating assembly code, we created SDoc with
-`(ptext . sLit)` for every bytes in literal bytestring, then
-combine them using `hcat`.
-
-When handling literal bytestrings with millions of bytes,
-millions of SDoc would be created and to combine, leading to
-high memory usage.
-
-Now we escape the given bytestring to string directly and construct
-SDoc only once. This improvement could dramatically decrease the
-memory allocation from 4.7GB to 1.3GB when embedding a 3MB literal
-string in source code. See #14741 for profiling results.
--}
-
--- ----------------------------------------------------------------------------
--- Printing section headers.
---
--- If -split-section was specified, include the suffix label, otherwise just
--- print the section type. For Darwin, where subsections-for-symbols are
--- used instead, only print section type.
---
--- For string literals, additional flags are specified to enable merging of
--- identical strings in the linker. With -split-sections each string also gets
--- a unique section to allow strings from unused code to be GC'd.
-
-pprSectionHeader :: Platform -> Section -> SDoc
-pprSectionHeader platform (Section t suffix) =
- case platformOS platform of
- OSAIX -> pprXcoffSectionHeader t
- OSDarwin -> pprDarwinSectionHeader t
- OSMinGW32 -> pprGNUSectionHeader (char '$') t suffix
- _ -> pprGNUSectionHeader (char '.') t suffix
-
-pprGNUSectionHeader :: SDoc -> SectionType -> CLabel -> SDoc
-pprGNUSectionHeader sep t suffix = sdocWithDynFlags $ \dflags ->
- let splitSections = gopt Opt_SplitSections dflags
- subsection | splitSections = sep <> ppr suffix
- | otherwise = empty
- in text ".section " <> ptext (header dflags) <> subsection <>
- flags dflags
- where
- header dflags = case t of
- Text -> sLit ".text"
- Data -> sLit ".data"
- ReadOnlyData | OSMinGW32 <- platformOS (targetPlatform dflags)
- -> sLit ".rdata"
- | otherwise -> sLit ".rodata"
- RelocatableReadOnlyData | OSMinGW32 <- platformOS (targetPlatform dflags)
- -- Concept does not exist on Windows,
- -- So map these to R/O data.
- -> sLit ".rdata$rel.ro"
- | otherwise -> sLit ".data.rel.ro"
- UninitialisedData -> sLit ".bss"
- ReadOnlyData16 | OSMinGW32 <- platformOS (targetPlatform dflags)
- -> sLit ".rdata$cst16"
- | otherwise -> sLit ".rodata.cst16"
- CString
- | OSMinGW32 <- platformOS (targetPlatform dflags)
- -> sLit ".rdata"
- | otherwise -> sLit ".rodata.str"
- OtherSection _ ->
- panic "PprBase.pprGNUSectionHeader: unknown section type"
- flags dflags = case t of
- CString
- | OSMinGW32 <- platformOS (targetPlatform dflags)
- -> empty
- | otherwise -> text ",\"aMS\"," <> sectionType "progbits" <> text ",1"
- _ -> empty
-
--- XCOFF doesn't support relocating label-differences, so we place all
--- RO sections into .text[PR] sections
-pprXcoffSectionHeader :: SectionType -> SDoc
-pprXcoffSectionHeader t = text $ case t of
- Text -> ".csect .text[PR]"
- Data -> ".csect .data[RW]"
- ReadOnlyData -> ".csect .text[PR] # ReadOnlyData"
- RelocatableReadOnlyData -> ".csect .text[PR] # RelocatableReadOnlyData"
- ReadOnlyData16 -> ".csect .text[PR] # ReadOnlyData16"
- CString -> ".csect .text[PR] # CString"
- UninitialisedData -> ".csect .data[BS]"
- OtherSection _ ->
- panic "PprBase.pprXcoffSectionHeader: unknown section type"
-
-pprDarwinSectionHeader :: SectionType -> SDoc
-pprDarwinSectionHeader t =
- ptext $ case t of
- Text -> sLit ".text"
- Data -> sLit ".data"
- ReadOnlyData -> sLit ".const"
- RelocatableReadOnlyData -> sLit ".const_data"
- UninitialisedData -> sLit ".data"
- ReadOnlyData16 -> sLit ".const"
- CString -> sLit ".section\t__TEXT,__cstring,cstring_literals"
- OtherSection _ ->
- panic "PprBase.pprDarwinSectionHeader: unknown section type"
diff --git a/compiler/nativeGen/Reg.hs b/compiler/nativeGen/Reg.hs
deleted file mode 100644
index 3808434d62..0000000000
--- a/compiler/nativeGen/Reg.hs
+++ /dev/null
@@ -1,241 +0,0 @@
--- | An architecture independent description of a register.
--- This needs to stay architecture independent because it is used
--- by NCGMonad and the register allocators, which are shared
--- by all architectures.
---
-module Reg (
- RegNo,
- Reg(..),
- regPair,
- regSingle,
- isRealReg, takeRealReg,
- isVirtualReg, takeVirtualReg,
-
- VirtualReg(..),
- renameVirtualReg,
- classOfVirtualReg,
- getHiVirtualRegFromLo,
- getHiVRegFromLo,
-
- RealReg(..),
- regNosOfRealReg,
- realRegsAlias,
-
- liftPatchFnToRegReg
-)
-
-where
-
-import GhcPrelude
-
-import Outputable
-import Unique
-import RegClass
-import Data.List (intersect)
-
--- | An identifier for a primitive real machine register.
-type RegNo
- = Int
-
--- VirtualRegs are virtual registers. The register allocator will
--- eventually have to map them into RealRegs, or into spill slots.
---
--- VirtualRegs are allocated on the fly, usually to represent a single
--- value in the abstract assembly code (i.e. dynamic registers are
--- usually single assignment).
---
--- The single assignment restriction isn't necessary to get correct code,
--- although a better register allocation will result if single
--- assignment is used -- because the allocator maps a VirtualReg into
--- a single RealReg, even if the VirtualReg has multiple live ranges.
---
--- Virtual regs can be of either class, so that info is attached.
---
-data VirtualReg
- = VirtualRegI {-# UNPACK #-} !Unique
- | VirtualRegHi {-# UNPACK #-} !Unique -- High part of 2-word register
- | VirtualRegF {-# UNPACK #-} !Unique
- | VirtualRegD {-# UNPACK #-} !Unique
-
- deriving (Eq, Show)
-
--- This is laborious, but necessary. We can't derive Ord because
--- Unique doesn't have an Ord instance. Note nonDetCmpUnique in the
--- implementation. See Note [No Ord for Unique]
--- This is non-deterministic but we do not currently support deterministic
--- code-generation. See Note [Unique Determinism and code generation]
-instance Ord VirtualReg where
- compare (VirtualRegI a) (VirtualRegI b) = nonDetCmpUnique a b
- compare (VirtualRegHi a) (VirtualRegHi b) = nonDetCmpUnique a b
- compare (VirtualRegF a) (VirtualRegF b) = nonDetCmpUnique a b
- compare (VirtualRegD a) (VirtualRegD b) = nonDetCmpUnique a b
-
- compare VirtualRegI{} _ = LT
- compare _ VirtualRegI{} = GT
- compare VirtualRegHi{} _ = LT
- compare _ VirtualRegHi{} = GT
- compare VirtualRegF{} _ = LT
- compare _ VirtualRegF{} = GT
-
-
-
-instance Uniquable VirtualReg where
- getUnique reg
- = case reg of
- VirtualRegI u -> u
- VirtualRegHi u -> u
- VirtualRegF u -> u
- VirtualRegD u -> u
-
-instance Outputable VirtualReg where
- ppr reg
- = case reg of
- VirtualRegI u -> text "%vI_" <> pprUniqueAlways u
- VirtualRegHi u -> text "%vHi_" <> pprUniqueAlways u
- -- this code is kinda wrong on x86
- -- because float and double occupy the same register set
- -- namely SSE2 register xmm0 .. xmm15
- VirtualRegF u -> text "%vFloat_" <> pprUniqueAlways u
- VirtualRegD u -> text "%vDouble_" <> pprUniqueAlways u
-
-
-
-renameVirtualReg :: Unique -> VirtualReg -> VirtualReg
-renameVirtualReg u r
- = case r of
- VirtualRegI _ -> VirtualRegI u
- VirtualRegHi _ -> VirtualRegHi u
- VirtualRegF _ -> VirtualRegF u
- VirtualRegD _ -> VirtualRegD u
-
-
-classOfVirtualReg :: VirtualReg -> RegClass
-classOfVirtualReg vr
- = case vr of
- VirtualRegI{} -> RcInteger
- VirtualRegHi{} -> RcInteger
- VirtualRegF{} -> RcFloat
- VirtualRegD{} -> RcDouble
-
-
-
--- Determine the upper-half vreg for a 64-bit quantity on a 32-bit platform
--- when supplied with the vreg for the lower-half of the quantity.
--- (NB. Not reversible).
-getHiVirtualRegFromLo :: VirtualReg -> VirtualReg
-getHiVirtualRegFromLo reg
- = case reg of
- -- makes a pseudo-unique with tag 'H'
- VirtualRegI u -> VirtualRegHi (newTagUnique u 'H')
- _ -> panic "Reg.getHiVirtualRegFromLo"
-
-getHiVRegFromLo :: Reg -> Reg
-getHiVRegFromLo reg
- = case reg of
- RegVirtual vr -> RegVirtual (getHiVirtualRegFromLo vr)
- RegReal _ -> panic "Reg.getHiVRegFromLo"
-
-
-------------------------------------------------------------------------------------
--- | RealRegs are machine regs which are available for allocation, in
--- the usual way. We know what class they are, because that's part of
--- the processor's architecture.
---
--- RealRegPairs are pairs of real registers that are allocated together
--- to hold a larger value, such as with Double regs on SPARC.
---
-data RealReg
- = RealRegSingle {-# UNPACK #-} !RegNo
- | RealRegPair {-# UNPACK #-} !RegNo {-# UNPACK #-} !RegNo
- deriving (Eq, Show, Ord)
-
-instance Uniquable RealReg where
- getUnique reg
- = case reg of
- RealRegSingle i -> mkRegSingleUnique i
- RealRegPair r1 r2 -> mkRegPairUnique (r1 * 65536 + r2)
-
-instance Outputable RealReg where
- ppr reg
- = case reg of
- RealRegSingle i -> text "%r" <> int i
- RealRegPair r1 r2 -> text "%r(" <> int r1
- <> vbar <> int r2 <> text ")"
-
-regNosOfRealReg :: RealReg -> [RegNo]
-regNosOfRealReg rr
- = case rr of
- RealRegSingle r1 -> [r1]
- RealRegPair r1 r2 -> [r1, r2]
-
-
-realRegsAlias :: RealReg -> RealReg -> Bool
-realRegsAlias rr1 rr2
- = not $ null $ intersect (regNosOfRealReg rr1) (regNosOfRealReg rr2)
-
---------------------------------------------------------------------------------
--- | A register, either virtual or real
-data Reg
- = RegVirtual !VirtualReg
- | RegReal !RealReg
- deriving (Eq, Ord)
-
-regSingle :: RegNo -> Reg
-regSingle regNo = RegReal $ RealRegSingle regNo
-
-regPair :: RegNo -> RegNo -> Reg
-regPair regNo1 regNo2 = RegReal $ RealRegPair regNo1 regNo2
-
-
--- We like to have Uniques for Reg so that we can make UniqFM and UniqSets
--- in the register allocator.
-instance Uniquable Reg where
- getUnique reg
- = case reg of
- RegVirtual vr -> getUnique vr
- RegReal rr -> getUnique rr
-
--- | Print a reg in a generic manner
--- If you want the architecture specific names, then use the pprReg
--- function from the appropriate Ppr module.
-instance Outputable Reg where
- ppr reg
- = case reg of
- RegVirtual vr -> ppr vr
- RegReal rr -> ppr rr
-
-
-isRealReg :: Reg -> Bool
-isRealReg reg
- = case reg of
- RegReal _ -> True
- RegVirtual _ -> False
-
-takeRealReg :: Reg -> Maybe RealReg
-takeRealReg reg
- = case reg of
- RegReal rr -> Just rr
- _ -> Nothing
-
-
-isVirtualReg :: Reg -> Bool
-isVirtualReg reg
- = case reg of
- RegReal _ -> False
- RegVirtual _ -> True
-
-takeVirtualReg :: Reg -> Maybe VirtualReg
-takeVirtualReg reg
- = case reg of
- RegReal _ -> Nothing
- RegVirtual vr -> Just vr
-
-
--- | The patch function supplied by the allocator maps VirtualReg to RealReg
--- regs, but sometimes we want to apply it to plain old Reg.
---
-liftPatchFnToRegReg :: (VirtualReg -> RealReg) -> (Reg -> Reg)
-liftPatchFnToRegReg patchF reg
- = case reg of
- RegVirtual vr -> RegReal (patchF vr)
- RegReal _ -> reg
diff --git a/compiler/nativeGen/RegAlloc/Graph/ArchBase.hs b/compiler/nativeGen/RegAlloc/Graph/ArchBase.hs
deleted file mode 100644
index c38d998779..0000000000
--- a/compiler/nativeGen/RegAlloc/Graph/ArchBase.hs
+++ /dev/null
@@ -1,163 +0,0 @@
-
--- | Utils for calculating general worst, bound, squeese and free, functions.
---
--- as per: "A Generalized Algorithm for Graph-Coloring Register Allocation"
--- Michael Smith, Normal Ramsey, Glenn Holloway.
--- PLDI 2004
---
--- These general versions are not used in GHC proper because they are too slow.
--- Instead, hand written optimised versions are provided for each architecture
--- in MachRegs*.hs
---
--- This code is here because we can test the architecture specific code against
--- it.
---
-module RegAlloc.Graph.ArchBase (
- RegClass(..),
- Reg(..),
- RegSub(..),
-
- worst,
- bound,
- squeese
-) where
-
-import GhcPrelude
-
-import UniqSet
-import UniqFM
-import Unique
-import MonadUtils (concatMapM)
-
-
--- Some basic register classes.
--- These aren't necessarily in 1-to-1 correspondence with the allocatable
--- RegClasses in MachRegs.hs
-data RegClass
- -- general purpose regs
- = ClassG32 -- 32 bit GPRs
- | ClassG16 -- 16 bit GPRs
- | ClassG8 -- 8 bit GPRs
-
- -- floating point regs
- | ClassF64 -- 64 bit FPRs
- deriving (Show, Eq, Enum)
-
-
--- | A register of some class
-data Reg
- -- a register of some class
- = Reg RegClass Int
-
- -- a sub-component of one of the other regs
- | RegSub RegSub Reg
- deriving (Show, Eq)
-
-
--- | so we can put regs in UniqSets
-instance Uniquable Reg where
- getUnique (Reg c i)
- = mkRegSingleUnique
- $ fromEnum c * 1000 + i
-
- getUnique (RegSub s (Reg c i))
- = mkRegSubUnique
- $ fromEnum s * 10000 + fromEnum c * 1000 + i
-
- getUnique (RegSub _ (RegSub _ _))
- = error "RegArchBase.getUnique: can't have a sub-reg of a sub-reg."
-
-
--- | A subcomponent of another register
-data RegSub
- = SubL16 -- lowest 16 bits
- | SubL8 -- lowest 8 bits
- | SubL8H -- second lowest 8 bits
- deriving (Show, Enum, Ord, Eq)
-
-
--- | Worst case displacement
---
--- a node N of classN has some number of neighbors,
--- all of which are from classC.
---
--- (worst neighbors classN classC) is the maximum number of potential
--- colors for N that can be lost by coloring its neighbors.
---
--- This should be hand coded/cached for each particular architecture,
--- because the compute time is very long..
-worst :: (RegClass -> UniqSet Reg)
- -> (Reg -> UniqSet Reg)
- -> Int -> RegClass -> RegClass -> Int
-
-worst regsOfClass regAlias neighbors classN classC
- = let regAliasS regs = unionManyUniqSets
- $ map regAlias
- $ nonDetEltsUniqSet regs
- -- This is non-deterministic but we do not
- -- currently support deterministic code-generation.
- -- See Note [Unique Determinism and code generation]
-
- -- all the regs in classes N, C
- regsN = regsOfClass classN
- regsC = regsOfClass classC
-
- -- all the possible subsets of c which have size < m
- regsS = filter (\s -> sizeUniqSet s >= 1
- && sizeUniqSet s <= neighbors)
- $ powersetLS regsC
-
- -- for each of the subsets of C, the regs which conflict
- -- with posiblities for N
- regsS_conflict
- = map (\s -> intersectUniqSets regsN (regAliasS s)) regsS
-
- in maximum $ map sizeUniqSet $ regsS_conflict
-
-
--- | For a node N of classN and neighbors of classesC
--- (bound classN classesC) is the maximum number of potential
--- colors for N that can be lost by coloring its neighbors.
-bound :: (RegClass -> UniqSet Reg)
- -> (Reg -> UniqSet Reg)
- -> RegClass -> [RegClass] -> Int
-
-bound regsOfClass regAlias classN classesC
- = let regAliasS regs = unionManyUniqSets
- $ map regAlias
- $ nonDetEltsUFM regs
- -- See Note [Unique Determinism and code generation]
-
- regsC_aliases
- = unionManyUniqSets
- $ map (regAliasS . getUniqSet . regsOfClass) classesC
-
- overlap = intersectUniqSets (regsOfClass classN) regsC_aliases
-
- in sizeUniqSet overlap
-
-
--- | The total squeese on a particular node with a list of neighbors.
---
--- A version of this should be constructed for each particular architecture,
--- possibly including uses of bound, so that alised registers don't get
--- counted twice, as per the paper.
-squeese :: (RegClass -> UniqSet Reg)
- -> (Reg -> UniqSet Reg)
- -> RegClass -> [(Int, RegClass)] -> Int
-
-squeese regsOfClass regAlias classN countCs
- = sum
- $ map (\(i, classC) -> worst regsOfClass regAlias i classN classC)
- $ countCs
-
-
--- | powerset (for lists)
-powersetL :: [a] -> [[a]]
-powersetL = concatMapM (\x -> [[],[x]])
-
-
--- | powersetLS (list of sets)
-powersetLS :: Uniquable a => UniqSet a -> [UniqSet a]
-powersetLS s = map mkUniqSet $ powersetL $ nonDetEltsUniqSet s
- -- See Note [Unique Determinism and code generation]
diff --git a/compiler/nativeGen/RegAlloc/Graph/ArchX86.hs b/compiler/nativeGen/RegAlloc/Graph/ArchX86.hs
deleted file mode 100644
index 0472e4cf09..0000000000
--- a/compiler/nativeGen/RegAlloc/Graph/ArchX86.hs
+++ /dev/null
@@ -1,161 +0,0 @@
-
--- | A description of the register set of the X86.
---
--- This isn't used directly in GHC proper.
---
--- See RegArchBase.hs for the reference.
--- See MachRegs.hs for the actual trivColorable function used in GHC.
---
-module RegAlloc.Graph.ArchX86 (
- classOfReg,
- regsOfClass,
- regName,
- regAlias,
- worst,
- squeese,
-) where
-
-import GhcPrelude
-
-import RegAlloc.Graph.ArchBase (Reg(..), RegSub(..), RegClass(..))
-import UniqSet
-
-import qualified Data.Array as A
-
-
--- | Determine the class of a register
-classOfReg :: Reg -> RegClass
-classOfReg reg
- = case reg of
- Reg c _ -> c
-
- RegSub SubL16 _ -> ClassG16
- RegSub SubL8 _ -> ClassG8
- RegSub SubL8H _ -> ClassG8
-
-
--- | Determine all the regs that make up a certain class.
-regsOfClass :: RegClass -> UniqSet Reg
-regsOfClass c
- = case c of
- ClassG32
- -> mkUniqSet [ Reg ClassG32 i
- | i <- [0..7] ]
-
- ClassG16
- -> mkUniqSet [ RegSub SubL16 (Reg ClassG32 i)
- | i <- [0..7] ]
-
- ClassG8
- -> unionUniqSets
- (mkUniqSet [ RegSub SubL8 (Reg ClassG32 i) | i <- [0..3] ])
- (mkUniqSet [ RegSub SubL8H (Reg ClassG32 i) | i <- [0..3] ])
-
- ClassF64
- -> mkUniqSet [ Reg ClassF64 i
- | i <- [0..5] ]
-
-
--- | Determine the common name of a reg
--- returns Nothing if this reg is not part of the machine.
-regName :: Reg -> Maybe String
-regName reg
- = case reg of
- Reg ClassG32 i
- | i <= 7 ->
- let names = A.listArray (0,8)
- [ "eax", "ebx", "ecx", "edx"
- , "ebp", "esi", "edi", "esp" ]
- in Just $ names A.! i
-
- RegSub SubL16 (Reg ClassG32 i)
- | i <= 7 ->
- let names = A.listArray (0,8)
- [ "ax", "bx", "cx", "dx"
- , "bp", "si", "di", "sp"]
- in Just $ names A.! i
-
- RegSub SubL8 (Reg ClassG32 i)
- | i <= 3 ->
- let names = A.listArray (0,4) [ "al", "bl", "cl", "dl"]
- in Just $ names A.! i
-
- RegSub SubL8H (Reg ClassG32 i)
- | i <= 3 ->
- let names = A.listArray (0,4) [ "ah", "bh", "ch", "dh"]
- in Just $ names A.! i
-
- _ -> Nothing
-
-
--- | Which regs alias what other regs.
-regAlias :: Reg -> UniqSet Reg
-regAlias reg
- = case reg of
-
- -- 32 bit regs alias all of the subregs
- Reg ClassG32 i
-
- -- for eax, ebx, ecx, eds
- | i <= 3
- -> mkUniqSet
- $ [ Reg ClassG32 i, RegSub SubL16 reg
- , RegSub SubL8 reg, RegSub SubL8H reg ]
-
- -- for esi, edi, esp, ebp
- | 4 <= i && i <= 7
- -> mkUniqSet
- $ [ Reg ClassG32 i, RegSub SubL16 reg ]
-
- -- 16 bit subregs alias the whole reg
- RegSub SubL16 r@(Reg ClassG32 _)
- -> regAlias r
-
- -- 8 bit subregs alias the 32 and 16, but not the other 8 bit subreg
- RegSub SubL8 r@(Reg ClassG32 _)
- -> mkUniqSet $ [ r, RegSub SubL16 r, RegSub SubL8 r ]
-
- RegSub SubL8H r@(Reg ClassG32 _)
- -> mkUniqSet $ [ r, RegSub SubL16 r, RegSub SubL8H r ]
-
- -- fp
- Reg ClassF64 _
- -> unitUniqSet reg
-
- _ -> error "regAlias: invalid register"
-
-
--- | Optimised versions of RegColorBase.{worst, squeese} specific to x86
-worst :: Int -> RegClass -> RegClass -> Int
-worst n classN classC
- = case classN of
- ClassG32
- -> case classC of
- ClassG32 -> min n 8
- ClassG16 -> min n 8
- ClassG8 -> min n 4
- ClassF64 -> 0
-
- ClassG16
- -> case classC of
- ClassG32 -> min n 8
- ClassG16 -> min n 8
- ClassG8 -> min n 4
- ClassF64 -> 0
-
- ClassG8
- -> case classC of
- ClassG32 -> min (n*2) 8
- ClassG16 -> min (n*2) 8
- ClassG8 -> min n 8
- ClassF64 -> 0
-
- ClassF64
- -> case classC of
- ClassF64 -> min n 6
- _ -> 0
-
-squeese :: RegClass -> [(Int, RegClass)] -> Int
-squeese classN countCs
- = sum (map (\(i, classC) -> worst i classN classC) countCs)
-
diff --git a/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs b/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs
deleted file mode 100644
index f42ff9450a..0000000000
--- a/compiler/nativeGen/RegAlloc/Graph/Coalesce.hs
+++ /dev/null
@@ -1,99 +0,0 @@
--- | Register coalescing.
-module RegAlloc.Graph.Coalesce (
- regCoalesce,
- slurpJoinMovs
-) where
-import GhcPrelude
-
-import RegAlloc.Liveness
-import Instruction
-import Reg
-
-import GHC.Cmm
-import Bag
-import Digraph
-import UniqFM
-import UniqSet
-import UniqSupply
-
-
--- | Do register coalescing on this top level thing
---
--- For Reg -> Reg moves, if the first reg dies at the same time the
--- second reg is born then the mov only serves to join live ranges.
--- The two regs can be renamed to be the same and the move instruction
--- safely erased.
-regCoalesce
- :: Instruction instr
- => [LiveCmmDecl statics instr]
- -> UniqSM [LiveCmmDecl statics instr]
-
-regCoalesce code
- = do
- let joins = foldl' unionBags emptyBag
- $ map slurpJoinMovs code
-
- let alloc = foldl' buildAlloc emptyUFM
- $ bagToList joins
-
- let patched = map (patchEraseLive (sinkReg alloc)) code
-
- return patched
-
-
--- | Add a v1 = v2 register renaming to the map.
--- The register with the lowest lexical name is set as the
--- canonical version.
-buildAlloc :: UniqFM Reg -> (Reg, Reg) -> UniqFM Reg
-buildAlloc fm (r1, r2)
- = let rmin = min r1 r2
- rmax = max r1 r2
- in addToUFM fm rmax rmin
-
-
--- | Determine the canonical name for a register by following
--- v1 = v2 renamings in this map.
-sinkReg :: UniqFM Reg -> Reg -> Reg
-sinkReg fm r
- = case lookupUFM fm r of
- Nothing -> r
- Just r' -> sinkReg fm r'
-
-
--- | Slurp out mov instructions that only serve to join live ranges.
---
--- During a mov, if the source reg dies and the destination reg is
--- born then we can rename the two regs to the same thing and
--- eliminate the move.
-slurpJoinMovs
- :: Instruction instr
- => LiveCmmDecl statics instr
- -> Bag (Reg, Reg)
-
-slurpJoinMovs live
- = slurpCmm emptyBag live
- where
- slurpCmm rs CmmData{}
- = rs
-
- slurpCmm rs (CmmProc _ _ _ sccs)
- = foldl' slurpBlock rs (flattenSCCs sccs)
-
- slurpBlock rs (BasicBlock _ instrs)
- = foldl' slurpLI rs instrs
-
- slurpLI rs (LiveInstr _ Nothing) = rs
- slurpLI rs (LiveInstr instr (Just live))
- | Just (r1, r2) <- takeRegRegMoveInstr instr
- , elementOfUniqSet r1 $ liveDieRead live
- , elementOfUniqSet r2 $ liveBorn live
-
- -- only coalesce movs between two virtuals for now,
- -- else we end up with allocatable regs in the live
- -- regs list..
- , isVirtualReg r1 && isVirtualReg r2
- = consBag (r1, r2) rs
-
- | otherwise
- = rs
-
diff --git a/compiler/nativeGen/RegAlloc/Graph/Main.hs b/compiler/nativeGen/RegAlloc/Graph/Main.hs
deleted file mode 100644
index 6b2758f723..0000000000
--- a/compiler/nativeGen/RegAlloc/Graph/Main.hs
+++ /dev/null
@@ -1,472 +0,0 @@
-{-# LANGUAGE CPP #-}
-{-# LANGUAGE ScopedTypeVariables #-}
-
--- | Graph coloring register allocator.
-module RegAlloc.Graph.Main (
- regAlloc
-) where
-import GhcPrelude
-
-import qualified GraphColor as Color
-import RegAlloc.Liveness
-import RegAlloc.Graph.Spill
-import RegAlloc.Graph.SpillClean
-import RegAlloc.Graph.SpillCost
-import RegAlloc.Graph.Stats
-import RegAlloc.Graph.TrivColorable
-import Instruction
-import TargetReg
-import RegClass
-import Reg
-
-import Bag
-import GHC.Driver.Session
-import Outputable
-import GHC.Platform
-import UniqFM
-import UniqSet
-import UniqSupply
-import Util (seqList)
-import CFG
-
-import Data.Maybe
-import Control.Monad
-
-
--- | The maximum number of build\/spill cycles we'll allow.
---
--- It should only take 3 or 4 cycles for the allocator to converge.
--- If it takes any longer than this it's probably in an infinite loop,
--- so it's better just to bail out and report a bug.
-maxSpinCount :: Int
-maxSpinCount = 10
-
-
--- | The top level of the graph coloring register allocator.
-regAlloc
- :: (Outputable statics, Outputable instr, Instruction instr)
- => DynFlags
- -> UniqFM (UniqSet RealReg) -- ^ registers we can use for allocation
- -> UniqSet Int -- ^ set of available spill slots.
- -> Int -- ^ current number of spill slots
- -> [LiveCmmDecl statics instr] -- ^ code annotated with liveness information.
- -> Maybe CFG -- ^ CFG of basic blocks if available
- -> UniqSM ( [NatCmmDecl statics instr]
- , Maybe Int, [RegAllocStats statics instr] )
- -- ^ code with registers allocated, additional stacks required
- -- and stats for each stage of allocation
-
-regAlloc dflags regsFree slotsFree slotsCount code cfg
- = do
- -- TODO: the regClass function is currently hard coded to the default
- -- target architecture. Would prefer to determine this from dflags.
- -- There are other uses of targetRegClass later in this module.
- let platform = targetPlatform dflags
- triv = trivColorable platform
- (targetVirtualRegSqueeze platform)
- (targetRealRegSqueeze platform)
-
- (code_final, debug_codeGraphs, slotsCount', _)
- <- regAlloc_spin dflags 0
- triv
- regsFree slotsFree slotsCount [] code cfg
-
- let needStack
- | slotsCount == slotsCount'
- = Nothing
- | otherwise
- = Just slotsCount'
-
- return ( code_final
- , needStack
- , reverse debug_codeGraphs )
-
-
--- | Perform solver iterations for the graph coloring allocator.
---
--- We extract a register conflict graph from the provided cmm code,
--- and try to colour it. If that works then we use the solution rewrite
--- the code with real hregs. If coloring doesn't work we add spill code
--- and try to colour it again. After `maxSpinCount` iterations we give up.
---
-regAlloc_spin
- :: forall instr statics.
- (Instruction instr,
- Outputable instr,
- Outputable statics)
- => DynFlags
- -> Int -- ^ Number of solver iterations we've already performed.
- -> Color.Triv VirtualReg RegClass RealReg
- -- ^ Function for calculating whether a register is trivially
- -- colourable.
- -> UniqFM (UniqSet RealReg) -- ^ Free registers that we can allocate.
- -> UniqSet Int -- ^ Free stack slots that we can use.
- -> Int -- ^ Number of spill slots in use
- -> [RegAllocStats statics instr] -- ^ Current regalloc stats to add to.
- -> [LiveCmmDecl statics instr] -- ^ Liveness annotated code to allocate.
- -> Maybe CFG
- -> UniqSM ( [NatCmmDecl statics instr]
- , [RegAllocStats statics instr]
- , Int -- Slots in use
- , Color.Graph VirtualReg RegClass RealReg)
-
-regAlloc_spin dflags spinCount triv regsFree slotsFree slotsCount debug_codeGraphs code cfg
- = do
- let platform = targetPlatform dflags
-
- -- If any of these dump flags are turned on we want to hang on to
- -- intermediate structures in the allocator - otherwise tell the
- -- allocator to ditch them early so we don't end up creating space leaks.
- let dump = or
- [ dopt Opt_D_dump_asm_regalloc_stages dflags
- , dopt Opt_D_dump_asm_stats dflags
- , dopt Opt_D_dump_asm_conflicts dflags ]
-
- -- Check that we're not running off down the garden path.
- when (spinCount > maxSpinCount)
- $ pprPanic "regAlloc_spin: max build/spill cycle count exceeded."
- ( text "It looks like the register allocator is stuck in an infinite loop."
- $$ text "max cycles = " <> int maxSpinCount
- $$ text "regsFree = " <> (hcat $ punctuate space $ map ppr
- $ nonDetEltsUniqSet $ unionManyUniqSets
- $ nonDetEltsUFM regsFree)
- -- This is non-deterministic but we do not
- -- currently support deterministic code-generation.
- -- See Note [Unique Determinism and code generation]
- $$ text "slotsFree = " <> ppr (sizeUniqSet slotsFree))
-
- -- Build the register conflict graph from the cmm code.
- (graph :: Color.Graph VirtualReg RegClass RealReg)
- <- {-# SCC "BuildGraph" #-} buildGraph code
-
- -- VERY IMPORTANT:
- -- We really do want the graph to be fully evaluated _before_ we
- -- start coloring. If we don't do this now then when the call to
- -- Color.colorGraph forces bits of it, the heap will be filled with
- -- half evaluated pieces of graph and zillions of apply thunks.
- seqGraph graph `seq` return ()
-
- -- Build a map of the cost of spilling each instruction.
- -- This is a lazy binding, so the map will only be computed if we
- -- actually have to spill to the stack.
- let spillCosts = foldl' plusSpillCostInfo zeroSpillCostInfo
- $ map (slurpSpillCostInfo platform cfg) code
-
- -- The function to choose regs to leave uncolored.
- let spill = chooseSpill spillCosts
-
- -- Record startup state in our log.
- let stat1
- = if spinCount == 0
- then Just $ RegAllocStatsStart
- { raLiveCmm = code
- , raGraph = graph
- , raSpillCosts = spillCosts }
- else Nothing
-
- -- Try and color the graph.
- let (graph_colored, rsSpill, rmCoalesce)
- = {-# SCC "ColorGraph" #-}
- Color.colorGraph
- (gopt Opt_RegsIterative dflags)
- spinCount
- regsFree triv spill graph
-
- -- Rewrite registers in the code that have been coalesced.
- let patchF reg
- | RegVirtual vr <- reg
- = case lookupUFM rmCoalesce vr of
- Just vr' -> patchF (RegVirtual vr')
- Nothing -> reg
-
- | otherwise
- = reg
-
- let (code_coalesced :: [LiveCmmDecl statics instr])
- = map (patchEraseLive patchF) code
-
- -- Check whether we've found a coloring.
- if isEmptyUniqSet rsSpill
-
- -- Coloring was successful because no registers needed to be spilled.
- then do
- -- if -fasm-lint is turned on then validate the graph.
- -- This checks for bugs in the graph allocator itself.
- let graph_colored_lint =
- if gopt Opt_DoAsmLinting dflags
- then Color.validateGraph (text "")
- True -- Require all nodes to be colored.
- graph_colored
- else graph_colored
-
- -- Rewrite the code to use real hregs, using the colored graph.
- let code_patched
- = map (patchRegsFromGraph platform graph_colored_lint)
- code_coalesced
-
- -- Clean out unneeded SPILL/RELOAD meta instructions.
- -- The spill code generator just spills the entire live range
- -- of a vreg, but it might not need to be on the stack for
- -- its entire lifetime.
- let code_spillclean
- = map (cleanSpills platform) code_patched
-
- -- Strip off liveness information from the allocated code.
- -- Also rewrite SPILL/RELOAD meta instructions into real machine
- -- instructions along the way
- let code_final
- = map (stripLive dflags) code_spillclean
-
- -- Record what happened in this stage for debugging
- let stat
- = RegAllocStatsColored
- { raCode = code
- , raGraph = graph
- , raGraphColored = graph_colored_lint
- , raCoalesced = rmCoalesce
- , raCodeCoalesced = code_coalesced
- , raPatched = code_patched
- , raSpillClean = code_spillclean
- , raFinal = code_final
- , raSRMs = foldl' addSRM (0, 0, 0)
- $ map countSRMs code_spillclean }
-
- -- Bundle up all the register allocator statistics.
- -- .. but make sure to drop them on the floor if they're not
- -- needed, otherwise we'll get a space leak.
- let statList =
- if dump then [stat] ++ maybeToList stat1 ++ debug_codeGraphs
- else []
-
- -- Ensure all the statistics are evaluated, to avoid space leaks.
- seqList statList (return ())
-
- return ( code_final
- , statList
- , slotsCount
- , graph_colored_lint)
-
- -- Coloring was unsuccessful. We need to spill some register to the
- -- stack, make a new graph, and try to color it again.
- else do
- -- if -fasm-lint is turned on then validate the graph
- let graph_colored_lint =
- if gopt Opt_DoAsmLinting dflags
- then Color.validateGraph (text "")
- False -- don't require nodes to be colored
- graph_colored
- else graph_colored
-
- -- Spill uncolored regs to the stack.
- (code_spilled, slotsFree', slotsCount', spillStats)
- <- regSpill platform code_coalesced slotsFree slotsCount rsSpill
-
- -- Recalculate liveness information.
- -- NOTE: we have to reverse the SCCs here to get them back into
- -- the reverse-dependency order required by computeLiveness.
- -- If they're not in the correct order that function will panic.
- code_relive <- mapM (regLiveness platform . reverseBlocksInTops)
- code_spilled
-
- -- Record what happened in this stage for debugging.
- let stat =
- RegAllocStatsSpill
- { raCode = code
- , raGraph = graph_colored_lint
- , raCoalesced = rmCoalesce
- , raSpillStats = spillStats
- , raSpillCosts = spillCosts
- , raSpilled = code_spilled }
-
- -- Bundle up all the register allocator statistics.
- -- .. but make sure to drop them on the floor if they're not
- -- needed, otherwise we'll get a space leak.
- let statList =
- if dump
- then [stat] ++ maybeToList stat1 ++ debug_codeGraphs
- else []
-
- -- Ensure all the statistics are evaluated, to avoid space leaks.
- seqList statList (return ())
-
- regAlloc_spin dflags (spinCount + 1) triv regsFree slotsFree'
- slotsCount' statList code_relive cfg
-
-
--- | Build a graph from the liveness and coalesce information in this code.
-buildGraph
- :: Instruction instr
- => [LiveCmmDecl statics instr]
- -> UniqSM (Color.Graph VirtualReg RegClass RealReg)
-
-buildGraph code
- = do
- -- Slurp out the conflicts and reg->reg moves from this code.
- let (conflictList, moveList) =
- unzip $ map slurpConflicts code
-
- -- Slurp out the spill/reload coalesces.
- let moveList2 = map slurpReloadCoalesce code
-
- -- Add the reg-reg conflicts to the graph.
- let conflictBag = unionManyBags conflictList
- let graph_conflict
- = foldr graphAddConflictSet Color.initGraph conflictBag
-
- -- Add the coalescences edges to the graph.
- let moveBag
- = unionBags (unionManyBags moveList2)
- (unionManyBags moveList)
-
- let graph_coalesce
- = foldr graphAddCoalesce graph_conflict moveBag
-
- return graph_coalesce
-
-
--- | Add some conflict edges to the graph.
--- Conflicts between virtual and real regs are recorded as exclusions.
-graphAddConflictSet
- :: UniqSet Reg
- -> Color.Graph VirtualReg RegClass RealReg
- -> Color.Graph VirtualReg RegClass RealReg
-
-graphAddConflictSet set graph
- = let virtuals = mkUniqSet
- [ vr | RegVirtual vr <- nonDetEltsUniqSet set ]
-
- graph1 = Color.addConflicts virtuals classOfVirtualReg graph
-
- graph2 = foldr (\(r1, r2) -> Color.addExclusion r1 classOfVirtualReg r2)
- graph1
- [ (vr, rr)
- | RegVirtual vr <- nonDetEltsUniqSet set
- , RegReal rr <- nonDetEltsUniqSet set]
- -- See Note [Unique Determinism and code generation]
-
- in graph2
-
-
--- | Add some coalesence edges to the graph
--- Coalesences between virtual and real regs are recorded as preferences.
-graphAddCoalesce
- :: (Reg, Reg)
- -> Color.Graph VirtualReg RegClass RealReg
- -> Color.Graph VirtualReg RegClass RealReg
-
-graphAddCoalesce (r1, r2) graph
- | RegReal rr <- r1
- , RegVirtual vr <- r2
- = Color.addPreference (vr, classOfVirtualReg vr) rr graph
-
- | RegReal rr <- r2
- , RegVirtual vr <- r1
- = Color.addPreference (vr, classOfVirtualReg vr) rr graph
-
- | RegVirtual vr1 <- r1
- , RegVirtual vr2 <- r2
- = Color.addCoalesce
- (vr1, classOfVirtualReg vr1)
- (vr2, classOfVirtualReg vr2)
- graph
-
- -- We can't coalesce two real regs, but there could well be existing
- -- hreg,hreg moves in the input code. We'll just ignore these
- -- for coalescing purposes.
- | RegReal _ <- r1
- , RegReal _ <- r2
- = graph
-
-#if __GLASGOW_HASKELL__ <= 810
- | otherwise
- = panic "graphAddCoalesce"
-#endif
-
-
--- | Patch registers in code using the reg -> reg mapping in this graph.
-patchRegsFromGraph
- :: (Outputable statics, Outputable instr, Instruction instr)
- => Platform -> Color.Graph VirtualReg RegClass RealReg
- -> LiveCmmDecl statics instr -> LiveCmmDecl statics instr
-
-patchRegsFromGraph platform graph code
- = patchEraseLive patchF code
- where
- -- Function to lookup the hardreg for a virtual reg from the graph.
- patchF reg
- -- leave real regs alone.
- | RegReal{} <- reg
- = reg
-
- -- this virtual has a regular node in the graph.
- | RegVirtual vr <- reg
- , Just node <- Color.lookupNode graph vr
- = case Color.nodeColor node of
- Just color -> RegReal color
- Nothing -> RegVirtual vr
-
- -- no node in the graph for this virtual, bad news.
- | otherwise
- = pprPanic "patchRegsFromGraph: register mapping failed."
- ( text "There is no node in the graph for register "
- <> ppr reg
- $$ ppr code
- $$ Color.dotGraph
- (\_ -> text "white")
- (trivColorable platform
- (targetVirtualRegSqueeze platform)
- (targetRealRegSqueeze platform))
- graph)
-
-
------
--- for when laziness just isn't what you wanted...
--- We need to deepSeq the whole graph before trying to colour it to avoid
--- space leaks.
-seqGraph :: Color.Graph VirtualReg RegClass RealReg -> ()
-seqGraph graph = seqNodes (nonDetEltsUFM (Color.graphMap graph))
- -- See Note [Unique Determinism and code generation]
-
-seqNodes :: [Color.Node VirtualReg RegClass RealReg] -> ()
-seqNodes ns
- = case ns of
- [] -> ()
- (n : ns) -> seqNode n `seq` seqNodes ns
-
-seqNode :: Color.Node VirtualReg RegClass RealReg -> ()
-seqNode node
- = seqVirtualReg (Color.nodeId node)
- `seq` seqRegClass (Color.nodeClass node)
- `seq` seqMaybeRealReg (Color.nodeColor node)
- `seq` (seqVirtualRegList (nonDetEltsUniqSet (Color.nodeConflicts node)))
- `seq` (seqRealRegList (nonDetEltsUniqSet (Color.nodeExclusions node)))
- `seq` (seqRealRegList (Color.nodePreference node))
- `seq` (seqVirtualRegList (nonDetEltsUniqSet (Color.nodeCoalesce node)))
- -- It's OK to use nonDetEltsUniqSet for seq
-
-seqVirtualReg :: VirtualReg -> ()
-seqVirtualReg reg = reg `seq` ()
-
-seqRealReg :: RealReg -> ()
-seqRealReg reg = reg `seq` ()
-
-seqRegClass :: RegClass -> ()
-seqRegClass c = c `seq` ()
-
-seqMaybeRealReg :: Maybe RealReg -> ()
-seqMaybeRealReg mr
- = case mr of
- Nothing -> ()
- Just r -> seqRealReg r
-
-seqVirtualRegList :: [VirtualReg] -> ()
-seqVirtualRegList rs
- = case rs of
- [] -> ()
- (r : rs) -> seqVirtualReg r `seq` seqVirtualRegList rs
-
-seqRealRegList :: [RealReg] -> ()
-seqRealRegList rs
- = case rs of
- [] -> ()
- (r : rs) -> seqRealReg r `seq` seqRealRegList rs
diff --git a/compiler/nativeGen/RegAlloc/Graph/Spill.hs b/compiler/nativeGen/RegAlloc/Graph/Spill.hs
deleted file mode 100644
index 9ffb51ee29..0000000000
--- a/compiler/nativeGen/RegAlloc/Graph/Spill.hs
+++ /dev/null
@@ -1,382 +0,0 @@
-
--- | When there aren't enough registers to hold all the vregs we have to spill
--- some of those vregs to slots on the stack. This module is used modify the
--- code to use those slots.
-module RegAlloc.Graph.Spill (
- regSpill,
- SpillStats(..),
- accSpillSL
-) where
-import GhcPrelude
-
-import RegAlloc.Liveness
-import Instruction
-import Reg
-import GHC.Cmm hiding (RegSet)
-import GHC.Cmm.BlockId
-import GHC.Cmm.Dataflow.Collections
-
-import MonadUtils
-import State
-import Unique
-import UniqFM
-import UniqSet
-import UniqSupply
-import Outputable
-import GHC.Platform
-
-import Data.List
-import Data.Maybe
-import Data.IntSet (IntSet)
-import qualified Data.IntSet as IntSet
-
-
--- | Spill all these virtual regs to stack slots.
---
--- Bumps the number of required stack slots if required.
---
---
--- TODO: See if we can split some of the live ranges instead of just globally
--- spilling the virtual reg. This might make the spill cleaner's job easier.
---
--- TODO: On CISCy x86 and x86_64 we don't necessarily have to add a mov instruction
--- when making spills. If an instr is using a spilled virtual we may be able to
--- address the spill slot directly.
---
-regSpill
- :: Instruction instr
- => Platform
- -> [LiveCmmDecl statics instr] -- ^ the code
- -> UniqSet Int -- ^ available stack slots
- -> Int -- ^ current number of spill slots.
- -> UniqSet VirtualReg -- ^ the regs to spill
- -> UniqSM
- ([LiveCmmDecl statics instr]
- -- code with SPILL and RELOAD meta instructions added.
- , UniqSet Int -- left over slots
- , Int -- slot count in use now.
- , SpillStats ) -- stats about what happened during spilling
-
-regSpill platform code slotsFree slotCount regs
-
- -- Not enough slots to spill these regs.
- | sizeUniqSet slotsFree < sizeUniqSet regs
- = -- pprTrace "Bumping slot count:" (ppr slotCount <> text " -> " <> ppr (slotCount+512)) $
- let slotsFree' = (addListToUniqSet slotsFree [slotCount+1 .. slotCount+512])
- in regSpill platform code slotsFree' (slotCount+512) regs
-
- | otherwise
- = do
- -- Allocate a slot for each of the spilled regs.
- let slots = take (sizeUniqSet regs) $ nonDetEltsUniqSet slotsFree
- let regSlotMap = listToUFM
- $ zip (nonDetEltsUniqSet regs) slots
- -- This is non-deterministic but we do not
- -- currently support deterministic code-generation.
- -- See Note [Unique Determinism and code generation]
-
- -- Grab the unique supply from the monad.
- us <- getUniqueSupplyM
-
- -- Run the spiller on all the blocks.
- let (code', state') =
- runState (mapM (regSpill_top platform regSlotMap) code)
- (initSpillS us)
-
- return ( code'
- , minusUniqSet slotsFree (mkUniqSet slots)
- , slotCount
- , makeSpillStats state')
-
-
--- | Spill some registers to stack slots in a top-level thing.
-regSpill_top
- :: Instruction instr
- => Platform
- -> RegMap Int
- -- ^ map of vregs to slots they're being spilled to.
- -> LiveCmmDecl statics instr
- -- ^ the top level thing.
- -> SpillM (LiveCmmDecl statics instr)
-
-regSpill_top platform regSlotMap cmm
- = case cmm of
- CmmData{}
- -> return cmm
-
- CmmProc info label live sccs
- | LiveInfo static firstId liveVRegsOnEntry liveSlotsOnEntry <- info
- -> do
- -- The liveVRegsOnEntry contains the set of vregs that are live
- -- on entry to each basic block. If we spill one of those vregs
- -- we remove it from that set and add the corresponding slot
- -- number to the liveSlotsOnEntry set. The spill cleaner needs
- -- this information to erase unneeded spill and reload instructions
- -- after we've done a successful allocation.
- let liveSlotsOnEntry' :: BlockMap IntSet
- liveSlotsOnEntry'
- = mapFoldlWithKey patchLiveSlot
- liveSlotsOnEntry liveVRegsOnEntry
-
- let info'
- = LiveInfo static firstId
- liveVRegsOnEntry
- liveSlotsOnEntry'
-
- -- Apply the spiller to all the basic blocks in the CmmProc.
- sccs' <- mapM (mapSCCM (regSpill_block platform regSlotMap)) sccs
-
- return $ CmmProc info' label live sccs'
-
- where -- Given a BlockId and the set of registers live in it,
- -- if registers in this block are being spilled to stack slots,
- -- then record the fact that these slots are now live in those blocks
- -- in the given slotmap.
- patchLiveSlot
- :: BlockMap IntSet -> BlockId -> RegSet -> BlockMap IntSet
-
- patchLiveSlot slotMap blockId regsLive
- = let
- -- Slots that are already recorded as being live.
- curSlotsLive = fromMaybe IntSet.empty
- $ mapLookup blockId slotMap
-
- moreSlotsLive = IntSet.fromList
- $ catMaybes
- $ map (lookupUFM regSlotMap)
- $ nonDetEltsUniqSet regsLive
- -- See Note [Unique Determinism and code generation]
-
- slotMap'
- = mapInsert blockId (IntSet.union curSlotsLive moreSlotsLive)
- slotMap
-
- in slotMap'
-
-
--- | Spill some registers to stack slots in a basic block.
-regSpill_block
- :: Instruction instr
- => Platform
- -> UniqFM Int -- ^ map of vregs to slots they're being spilled to.
- -> LiveBasicBlock instr
- -> SpillM (LiveBasicBlock instr)
-
-regSpill_block platform regSlotMap (BasicBlock i instrs)
- = do instrss' <- mapM (regSpill_instr platform regSlotMap) instrs
- return $ BasicBlock i (concat instrss')
-
-
--- | Spill some registers to stack slots in a single instruction.
--- If the instruction uses registers that need to be spilled, then it is
--- prefixed (or postfixed) with the appropriate RELOAD or SPILL meta
--- instructions.
-regSpill_instr
- :: Instruction instr
- => Platform
- -> UniqFM Int -- ^ map of vregs to slots they're being spilled to.
- -> LiveInstr instr
- -> SpillM [LiveInstr instr]
-
-regSpill_instr _ _ li@(LiveInstr _ Nothing)
- = do return [li]
-
-regSpill_instr platform regSlotMap
- (LiveInstr instr (Just _))
- = do
- -- work out which regs are read and written in this instr
- let RU rlRead rlWritten = regUsageOfInstr platform instr
-
- -- sometimes a register is listed as being read more than once,
- -- nub this so we don't end up inserting two lots of spill code.
- let rsRead_ = nub rlRead
- let rsWritten_ = nub rlWritten
-
- -- if a reg is modified, it appears in both lists, want to undo this..
- let rsRead = rsRead_ \\ rsWritten_
- let rsWritten = rsWritten_ \\ rsRead_
- let rsModify = intersect rsRead_ rsWritten_
-
- -- work out if any of the regs being used are currently being spilled.
- let rsSpillRead = filter (\r -> elemUFM r regSlotMap) rsRead
- let rsSpillWritten = filter (\r -> elemUFM r regSlotMap) rsWritten
- let rsSpillModify = filter (\r -> elemUFM r regSlotMap) rsModify
-
- -- rewrite the instr and work out spill code.
- (instr1, prepost1) <- mapAccumLM (spillRead regSlotMap) instr rsSpillRead
- (instr2, prepost2) <- mapAccumLM (spillWrite regSlotMap) instr1 rsSpillWritten
- (instr3, prepost3) <- mapAccumLM (spillModify regSlotMap) instr2 rsSpillModify
-
- let (mPrefixes, mPostfixes) = unzip (prepost1 ++ prepost2 ++ prepost3)
- let prefixes = concat mPrefixes
- let postfixes = concat mPostfixes
-
- -- final code
- let instrs' = prefixes
- ++ [LiveInstr instr3 Nothing]
- ++ postfixes
-
- return $ instrs'
-
-
--- | Add a RELOAD met a instruction to load a value for an instruction that
--- writes to a vreg that is being spilled.
-spillRead
- :: Instruction instr
- => UniqFM Int
- -> instr
- -> Reg
- -> SpillM (instr, ([LiveInstr instr'], [LiveInstr instr']))
-
-spillRead regSlotMap instr reg
- | Just slot <- lookupUFM regSlotMap reg
- = do (instr', nReg) <- patchInstr reg instr
-
- modify $ \s -> s
- { stateSpillSL = addToUFM_C accSpillSL (stateSpillSL s) reg (reg, 0, 1) }
-
- return ( instr'
- , ( [LiveInstr (RELOAD slot nReg) Nothing]
- , []) )
-
- | otherwise = panic "RegSpill.spillRead: no slot defined for spilled reg"
-
-
--- | Add a SPILL meta instruction to store a value for an instruction that
--- writes to a vreg that is being spilled.
-spillWrite
- :: Instruction instr
- => UniqFM Int
- -> instr
- -> Reg
- -> SpillM (instr, ([LiveInstr instr'], [LiveInstr instr']))
-
-spillWrite regSlotMap instr reg
- | Just slot <- lookupUFM regSlotMap reg
- = do (instr', nReg) <- patchInstr reg instr
-
- modify $ \s -> s
- { stateSpillSL = addToUFM_C accSpillSL (stateSpillSL s) reg (reg, 1, 0) }
-
- return ( instr'
- , ( []
- , [LiveInstr (SPILL nReg slot) Nothing]))
-
- | otherwise = panic "RegSpill.spillWrite: no slot defined for spilled reg"
-
-
--- | Add both RELOAD and SPILL meta instructions for an instruction that
--- both reads and writes to a vreg that is being spilled.
-spillModify
- :: Instruction instr
- => UniqFM Int
- -> instr
- -> Reg
- -> SpillM (instr, ([LiveInstr instr'], [LiveInstr instr']))
-
-spillModify regSlotMap instr reg
- | Just slot <- lookupUFM regSlotMap reg
- = do (instr', nReg) <- patchInstr reg instr
-
- modify $ \s -> s
- { stateSpillSL = addToUFM_C accSpillSL (stateSpillSL s) reg (reg, 1, 1) }
-
- return ( instr'
- , ( [LiveInstr (RELOAD slot nReg) Nothing]
- , [LiveInstr (SPILL nReg slot) Nothing]))
-
- | otherwise = panic "RegSpill.spillModify: no slot defined for spilled reg"
-
-
--- | Rewrite uses of this virtual reg in an instr to use a different
--- virtual reg.
-patchInstr
- :: Instruction instr
- => Reg -> instr -> SpillM (instr, Reg)
-
-patchInstr reg instr
- = do nUnique <- newUnique
-
- -- The register we're rewriting is supposed to be virtual.
- -- If it's not then something has gone horribly wrong.
- let nReg
- = case reg of
- RegVirtual vr
- -> RegVirtual (renameVirtualReg nUnique vr)
-
- RegReal{}
- -> panic "RegAlloc.Graph.Spill.patchIntr: not patching real reg"
-
- let instr' = patchReg1 reg nReg instr
- return (instr', nReg)
-
-
-patchReg1
- :: Instruction instr
- => Reg -> Reg -> instr -> instr
-
-patchReg1 old new instr
- = let patchF r
- | r == old = new
- | otherwise = r
- in patchRegsOfInstr instr patchF
-
-
--- Spiller monad --------------------------------------------------------------
--- | State monad for the spill code generator.
-type SpillM a
- = State SpillS a
-
--- | Spill code generator state.
-data SpillS
- = SpillS
- { -- | Unique supply for generating fresh vregs.
- stateUS :: UniqSupply
-
- -- | Spilled vreg vs the number of times it was loaded, stored.
- , stateSpillSL :: UniqFM (Reg, Int, Int) }
-
-
--- | Create a new spiller state.
-initSpillS :: UniqSupply -> SpillS
-initSpillS uniqueSupply
- = SpillS
- { stateUS = uniqueSupply
- , stateSpillSL = emptyUFM }
-
-
--- | Allocate a new unique in the spiller monad.
-newUnique :: SpillM Unique
-newUnique
- = do us <- gets stateUS
- case takeUniqFromSupply us of
- (uniq, us')
- -> do modify $ \s -> s { stateUS = us' }
- return uniq
-
-
--- | Add a spill/reload count to a stats record for a register.
-accSpillSL :: (Reg, Int, Int) -> (Reg, Int, Int) -> (Reg, Int, Int)
-accSpillSL (r1, s1, l1) (_, s2, l2)
- = (r1, s1 + s2, l1 + l2)
-
-
--- Spiller stats --------------------------------------------------------------
--- | Spiller statistics.
--- Tells us what registers were spilled.
-data SpillStats
- = SpillStats
- { spillStoreLoad :: UniqFM (Reg, Int, Int) }
-
-
--- | Extract spiller statistics from the spiller state.
-makeSpillStats :: SpillS -> SpillStats
-makeSpillStats s
- = SpillStats
- { spillStoreLoad = stateSpillSL s }
-
-
-instance Outputable SpillStats where
- ppr stats
- = pprUFM (spillStoreLoad stats)
- (vcat . map (\(r, s, l) -> ppr r <+> int s <+> int l))
diff --git a/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs b/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs
deleted file mode 100644
index bd8b449cbb..0000000000
--- a/compiler/nativeGen/RegAlloc/Graph/SpillClean.hs
+++ /dev/null
@@ -1,616 +0,0 @@
-{-# LANGUAGE CPP #-}
-
--- | Clean out unneeded spill\/reload instructions.
---
--- Handling of join points
--- ~~~~~~~~~~~~~~~~~~~~~~~
---
--- B1: B2:
--- ... ...
--- RELOAD SLOT(0), %r1 RELOAD SLOT(0), %r1
--- ... A ... ... B ...
--- jump B3 jump B3
---
--- B3: ... C ...
--- RELOAD SLOT(0), %r1
--- ...
---
--- The Plan
--- ~~~~~~~~
--- As long as %r1 hasn't been written to in A, B or C then we don't need
--- the reload in B3.
---
--- What we really care about here is that on the entry to B3, %r1 will
--- always have the same value that is in SLOT(0) (ie, %r1 is _valid_)
---
--- This also works if the reloads in B1\/B2 were spills instead, because
--- spilling %r1 to a slot makes that slot have the same value as %r1.
---
-module RegAlloc.Graph.SpillClean (
- cleanSpills
-) where
-import GhcPrelude
-
-import RegAlloc.Liveness
-import Instruction
-import Reg
-
-import GHC.Cmm.BlockId
-import GHC.Cmm
-import UniqSet
-import UniqFM
-import Unique
-import State
-import Outputable
-import GHC.Platform
-import GHC.Cmm.Dataflow.Collections
-
-import Data.List
-import Data.Maybe
-import Data.IntSet (IntSet)
-import qualified Data.IntSet as IntSet
-
-
--- | The identification number of a spill slot.
--- A value is stored in a spill slot when we don't have a free
--- register to hold it.
-type Slot = Int
-
-
--- | Clean out unneeded spill\/reloads from this top level thing.
-cleanSpills
- :: Instruction instr
- => Platform
- -> LiveCmmDecl statics instr
- -> LiveCmmDecl statics instr
-
-cleanSpills platform cmm
- = evalState (cleanSpin platform 0 cmm) initCleanS
-
-
--- | Do one pass of cleaning.
-cleanSpin
- :: Instruction instr
- => Platform
- -> Int -- ^ Iteration number for the cleaner.
- -> LiveCmmDecl statics instr -- ^ Liveness annotated code to clean.
- -> CleanM (LiveCmmDecl statics instr)
-
-cleanSpin platform spinCount code
- = do
- -- Initialise count of cleaned spill and reload instructions.
- modify $ \s -> s
- { sCleanedSpillsAcc = 0
- , sCleanedReloadsAcc = 0
- , sReloadedBy = emptyUFM }
-
- code_forward <- mapBlockTopM (cleanBlockForward platform) code
- code_backward <- cleanTopBackward code_forward
-
- -- During the cleaning of each block we collected information about
- -- what regs were valid across each jump. Based on this, work out
- -- whether it will be safe to erase reloads after join points for
- -- the next pass.
- collateJoinPoints
-
- -- Remember how many spill and reload instructions we cleaned in this pass.
- spills <- gets sCleanedSpillsAcc
- reloads <- gets sCleanedReloadsAcc
- modify $ \s -> s
- { sCleanedCount = (spills, reloads) : sCleanedCount s }
-
- -- If nothing was cleaned in this pass or the last one
- -- then we're done and it's time to bail out.
- cleanedCount <- gets sCleanedCount
- if take 2 cleanedCount == [(0, 0), (0, 0)]
- then return code
-
- -- otherwise go around again
- else cleanSpin platform (spinCount + 1) code_backward
-
-
--------------------------------------------------------------------------------
--- | Clean out unneeded reload instructions,
--- while walking forward over the code.
-cleanBlockForward
- :: Instruction instr
- => Platform
- -> LiveBasicBlock instr
- -> CleanM (LiveBasicBlock instr)
-
-cleanBlockForward platform (BasicBlock blockId instrs)
- = do
- -- See if we have a valid association for the entry to this block.
- jumpValid <- gets sJumpValid
- let assoc = case lookupUFM jumpValid blockId of
- Just assoc -> assoc
- Nothing -> emptyAssoc
-
- instrs_reload <- cleanForward platform blockId assoc [] instrs
- return $ BasicBlock blockId instrs_reload
-
-
-
--- | Clean out unneeded reload instructions.
---
--- Walking forwards across the code
--- On a reload, if we know a reg already has the same value as a slot
--- then we don't need to do the reload.
---
-cleanForward
- :: Instruction instr
- => Platform
- -> BlockId -- ^ the block that we're currently in
- -> Assoc Store -- ^ two store locations are associated if
- -- they have the same value
- -> [LiveInstr instr] -- ^ acc
- -> [LiveInstr instr] -- ^ instrs to clean (in backwards order)
- -> CleanM [LiveInstr instr] -- ^ cleaned instrs (in forward order)
-
-cleanForward _ _ _ acc []
- = return acc
-
--- Rewrite live range joins via spill slots to just a spill and a reg-reg move
--- hopefully the spill will be also be cleaned in the next pass
-cleanForward platform blockId assoc acc (li1 : li2 : instrs)
-
- | LiveInstr (SPILL reg1 slot1) _ <- li1
- , LiveInstr (RELOAD slot2 reg2) _ <- li2
- , slot1 == slot2
- = do
- modify $ \s -> s { sCleanedReloadsAcc = sCleanedReloadsAcc s + 1 }
- cleanForward platform blockId assoc acc
- $ li1 : LiveInstr (mkRegRegMoveInstr platform reg1 reg2) Nothing
- : instrs
-
-cleanForward platform blockId assoc acc (li@(LiveInstr i1 _) : instrs)
- | Just (r1, r2) <- takeRegRegMoveInstr i1
- = if r1 == r2
- -- Erase any left over nop reg reg moves while we're here
- -- this will also catch any nop moves that the previous case
- -- happens to add.
- then cleanForward platform blockId assoc acc instrs
-
- -- If r1 has the same value as some slots and we copy r1 to r2,
- -- then r2 is now associated with those slots instead
- else do let assoc' = addAssoc (SReg r1) (SReg r2)
- $ delAssoc (SReg r2)
- $ assoc
-
- cleanForward platform blockId assoc' (li : acc) instrs
-
-
-cleanForward platform blockId assoc acc (li : instrs)
-
- -- Update association due to the spill.
- | LiveInstr (SPILL reg slot) _ <- li
- = let assoc' = addAssoc (SReg reg) (SSlot slot)
- $ delAssoc (SSlot slot)
- $ assoc
- in cleanForward platform blockId assoc' (li : acc) instrs
-
- -- Clean a reload instr.
- | LiveInstr (RELOAD{}) _ <- li
- = do (assoc', mli) <- cleanReload platform blockId assoc li
- case mli of
- Nothing -> cleanForward platform blockId assoc' acc
- instrs
-
- Just li' -> cleanForward platform blockId assoc' (li' : acc)
- instrs
-
- -- Remember the association over a jump.
- | LiveInstr instr _ <- li
- , targets <- jumpDestsOfInstr instr
- , not $ null targets
- = do mapM_ (accJumpValid assoc) targets
- cleanForward platform blockId assoc (li : acc) instrs
-
- -- Writing to a reg changes its value.
- | LiveInstr instr _ <- li
- , RU _ written <- regUsageOfInstr platform instr
- = let assoc' = foldr delAssoc assoc (map SReg $ nub written)
- in cleanForward platform blockId assoc' (li : acc) instrs
-
-
-
--- | Try and rewrite a reload instruction to something more pleasing
-cleanReload
- :: Instruction instr
- => Platform
- -> BlockId
- -> Assoc Store
- -> LiveInstr instr
- -> CleanM (Assoc Store, Maybe (LiveInstr instr))
-
-cleanReload platform blockId assoc li@(LiveInstr (RELOAD slot reg) _)
-
- -- If the reg we're reloading already has the same value as the slot
- -- then we can erase the instruction outright.
- | elemAssoc (SSlot slot) (SReg reg) assoc
- = do modify $ \s -> s { sCleanedReloadsAcc = sCleanedReloadsAcc s + 1 }
- return (assoc, Nothing)
-
- -- If we can find another reg with the same value as this slot then
- -- do a move instead of a reload.
- | Just reg2 <- findRegOfSlot assoc slot
- = do modify $ \s -> s { sCleanedReloadsAcc = sCleanedReloadsAcc s + 1 }
-
- let assoc' = addAssoc (SReg reg) (SReg reg2)
- $ delAssoc (SReg reg)
- $ assoc
-
- return ( assoc'
- , Just $ LiveInstr (mkRegRegMoveInstr platform reg2 reg) Nothing)
-
- -- Gotta keep this instr.
- | otherwise
- = do -- Update the association.
- let assoc'
- = addAssoc (SReg reg) (SSlot slot)
- -- doing the reload makes reg and slot the same value
- $ delAssoc (SReg reg)
- -- reg value changes on reload
- $ assoc
-
- -- Remember that this block reloads from this slot.
- accBlockReloadsSlot blockId slot
-
- return (assoc', Just li)
-
-cleanReload _ _ _ _
- = panic "RegSpillClean.cleanReload: unhandled instr"
-
-
--------------------------------------------------------------------------------
--- | Clean out unneeded spill instructions,
--- while walking backwards over the code.
---
--- If there were no reloads from a slot between a spill and the last one
--- then the slot was never read and we don't need the spill.
---
--- SPILL r0 -> s1
--- RELOAD s1 -> r2
--- SPILL r3 -> s1 <--- don't need this spill
--- SPILL r4 -> s1
--- RELOAD s1 -> r5
---
--- Maintain a set of
--- "slots which were spilled to but not reloaded from yet"
---
--- Walking backwards across the code:
--- a) On a reload from a slot, remove it from the set.
---
--- a) On a spill from a slot
--- If the slot is in set then we can erase the spill,
--- because it won't be reloaded from until after the next spill.
---
--- otherwise
--- keep the spill and add the slot to the set
---
--- TODO: This is mostly inter-block
--- we should really be updating the noReloads set as we cross jumps also.
---
--- TODO: generate noReloads from liveSlotsOnEntry
---
-cleanTopBackward
- :: Instruction instr
- => LiveCmmDecl statics instr
- -> CleanM (LiveCmmDecl statics instr)
-
-cleanTopBackward cmm
- = case cmm of
- CmmData{}
- -> return cmm
-
- CmmProc info label live sccs
- | LiveInfo _ _ _ liveSlotsOnEntry <- info
- -> do sccs' <- mapM (mapSCCM (cleanBlockBackward liveSlotsOnEntry)) sccs
- return $ CmmProc info label live sccs'
-
-
-cleanBlockBackward
- :: Instruction instr
- => BlockMap IntSet
- -> LiveBasicBlock instr
- -> CleanM (LiveBasicBlock instr)
-
-cleanBlockBackward liveSlotsOnEntry (BasicBlock blockId instrs)
- = do instrs_spill <- cleanBackward liveSlotsOnEntry emptyUniqSet [] instrs
- return $ BasicBlock blockId instrs_spill
-
-
-
-cleanBackward
- :: Instruction instr
- => BlockMap IntSet -- ^ Slots live on entry to each block
- -> UniqSet Int -- ^ Slots that have been spilled, but not reloaded from
- -> [LiveInstr instr] -- ^ acc
- -> [LiveInstr instr] -- ^ Instrs to clean (in forwards order)
- -> CleanM [LiveInstr instr] -- ^ Cleaned instrs (in backwards order)
-
-cleanBackward liveSlotsOnEntry noReloads acc lis
- = do reloadedBy <- gets sReloadedBy
- cleanBackward' liveSlotsOnEntry reloadedBy noReloads acc lis
-
-
-cleanBackward'
- :: Instruction instr
- => BlockMap IntSet
- -> UniqFM [BlockId]
- -> UniqSet Int
- -> [LiveInstr instr]
- -> [LiveInstr instr]
- -> State CleanS [LiveInstr instr]
-
-cleanBackward' _ _ _ acc []
- = return acc
-
-cleanBackward' liveSlotsOnEntry reloadedBy noReloads acc (li : instrs)
-
- -- If nothing ever reloads from this slot then we don't need the spill.
- | LiveInstr (SPILL _ slot) _ <- li
- , Nothing <- lookupUFM reloadedBy (SSlot slot)
- = do modify $ \s -> s { sCleanedSpillsAcc = sCleanedSpillsAcc s + 1 }
- cleanBackward liveSlotsOnEntry noReloads acc instrs
-
- | LiveInstr (SPILL _ slot) _ <- li
- = if elementOfUniqSet slot noReloads
-
- -- We can erase this spill because the slot won't be read until
- -- after the next one
- then do
- modify $ \s -> s { sCleanedSpillsAcc = sCleanedSpillsAcc s + 1 }
- cleanBackward liveSlotsOnEntry noReloads acc instrs
-
- else do
- -- This slot is being spilled to, but we haven't seen any reloads yet.
- let noReloads' = addOneToUniqSet noReloads slot
- cleanBackward liveSlotsOnEntry noReloads' (li : acc) instrs
-
- -- if we reload from a slot then it's no longer unused
- | LiveInstr (RELOAD slot _) _ <- li
- , noReloads' <- delOneFromUniqSet noReloads slot
- = cleanBackward liveSlotsOnEntry noReloads' (li : acc) instrs
-
- -- If a slot is live in a jump target then assume it's reloaded there.
- --
- -- TODO: A real dataflow analysis would do a better job here.
- -- If the target block _ever_ used the slot then we assume
- -- it always does, but if those reloads are cleaned the slot
- -- liveness map doesn't get updated.
- | LiveInstr instr _ <- li
- , targets <- jumpDestsOfInstr instr
- = do
- let slotsReloadedByTargets
- = IntSet.unions
- $ catMaybes
- $ map (flip mapLookup liveSlotsOnEntry)
- $ targets
-
- let noReloads'
- = foldl' delOneFromUniqSet noReloads
- $ IntSet.toList slotsReloadedByTargets
-
- cleanBackward liveSlotsOnEntry noReloads' (li : acc) instrs
-
-#if __GLASGOW_HASKELL__ <= 810
- -- some other instruction
- | otherwise
- = cleanBackward liveSlotsOnEntry noReloads (li : acc) instrs
-#endif
-
-
--- | Combine the associations from all the inward control flow edges.
---
-collateJoinPoints :: CleanM ()
-collateJoinPoints
- = modify $ \s -> s
- { sJumpValid = mapUFM intersects (sJumpValidAcc s)
- , sJumpValidAcc = emptyUFM }
-
-intersects :: [Assoc Store] -> Assoc Store
-intersects [] = emptyAssoc
-intersects assocs = foldl1' intersectAssoc assocs
-
-
--- | See if we have a reg with the same value as this slot in the association table.
-findRegOfSlot :: Assoc Store -> Int -> Maybe Reg
-findRegOfSlot assoc slot
- | close <- closeAssoc (SSlot slot) assoc
- , Just (SReg reg) <- find isStoreReg $ nonDetEltsUniqSet close
- -- See Note [Unique Determinism and code generation]
- = Just reg
-
- | otherwise
- = Nothing
-
-
--------------------------------------------------------------------------------
--- | Cleaner monad.
-type CleanM
- = State CleanS
-
--- | Cleaner state.
-data CleanS
- = CleanS
- { -- | Regs which are valid at the start of each block.
- sJumpValid :: UniqFM (Assoc Store)
-
- -- | Collecting up what regs were valid across each jump.
- -- in the next pass we can collate these and write the results
- -- to sJumpValid.
- , sJumpValidAcc :: UniqFM [Assoc Store]
-
- -- | Map of (slot -> blocks which reload from this slot)
- -- used to decide if whether slot spilled to will ever be
- -- reloaded from on this path.
- , sReloadedBy :: UniqFM [BlockId]
-
- -- | Spills and reloads cleaned each pass (latest at front)
- , sCleanedCount :: [(Int, Int)]
-
- -- | Spills and reloads that have been cleaned in this pass so far.
- , sCleanedSpillsAcc :: Int
- , sCleanedReloadsAcc :: Int }
-
-
--- | Construct the initial cleaner state.
-initCleanS :: CleanS
-initCleanS
- = CleanS
- { sJumpValid = emptyUFM
- , sJumpValidAcc = emptyUFM
-
- , sReloadedBy = emptyUFM
-
- , sCleanedCount = []
-
- , sCleanedSpillsAcc = 0
- , sCleanedReloadsAcc = 0 }
-
-
--- | Remember the associations before a jump.
-accJumpValid :: Assoc Store -> BlockId -> CleanM ()
-accJumpValid assocs target
- = modify $ \s -> s {
- sJumpValidAcc = addToUFM_C (++)
- (sJumpValidAcc s)
- target
- [assocs] }
-
-
-accBlockReloadsSlot :: BlockId -> Slot -> CleanM ()
-accBlockReloadsSlot blockId slot
- = modify $ \s -> s {
- sReloadedBy = addToUFM_C (++)
- (sReloadedBy s)
- (SSlot slot)
- [blockId] }
-
-
--------------------------------------------------------------------------------
--- A store location can be a stack slot or a register
-data Store
- = SSlot Int
- | SReg Reg
-
-
--- | Check if this is a reg store.
-isStoreReg :: Store -> Bool
-isStoreReg ss
- = case ss of
- SSlot _ -> False
- SReg _ -> True
-
-
--- Spill cleaning is only done once all virtuals have been allocated to realRegs
-instance Uniquable Store where
- getUnique (SReg r)
- | RegReal (RealRegSingle i) <- r
- = mkRegSingleUnique i
-
- | RegReal (RealRegPair r1 r2) <- r
- = mkRegPairUnique (r1 * 65535 + r2)
-
- | otherwise
- = error $ "RegSpillClean.getUnique: found virtual reg during spill clean,"
- ++ "only real regs expected."
-
- getUnique (SSlot i) = mkRegSubUnique i -- [SLPJ] I hope "SubUnique" is ok
-
-
-instance Outputable Store where
- ppr (SSlot i) = text "slot" <> int i
- ppr (SReg r) = ppr r
-
-
--------------------------------------------------------------------------------
--- Association graphs.
--- In the spill cleaner, two store locations are associated if they are known
--- to hold the same value.
---
-type Assoc a = UniqFM (UniqSet a)
-
--- | An empty association
-emptyAssoc :: Assoc a
-emptyAssoc = emptyUFM
-
-
--- | Add an association between these two things.
-addAssoc :: Uniquable a
- => a -> a -> Assoc a -> Assoc a
-
-addAssoc a b m
- = let m1 = addToUFM_C unionUniqSets m a (unitUniqSet b)
- m2 = addToUFM_C unionUniqSets m1 b (unitUniqSet a)
- in m2
-
-
--- | Delete all associations to a node.
-delAssoc :: (Uniquable a)
- => a -> Assoc a -> Assoc a
-
-delAssoc a m
- | Just aSet <- lookupUFM m a
- , m1 <- delFromUFM m a
- = nonDetFoldUniqSet (\x m -> delAssoc1 x a m) m1 aSet
- -- It's OK to use nonDetFoldUFM here because deletion is commutative
-
- | otherwise = m
-
-
--- | Delete a single association edge (a -> b).
-delAssoc1 :: Uniquable a
- => a -> a -> Assoc a -> Assoc a
-
-delAssoc1 a b m
- | Just aSet <- lookupUFM m a
- = addToUFM m a (delOneFromUniqSet aSet b)
-
- | otherwise = m
-
-
--- | Check if these two things are associated.
-elemAssoc :: (Uniquable a)
- => a -> a -> Assoc a -> Bool
-
-elemAssoc a b m
- = elementOfUniqSet b (closeAssoc a m)
-
-
--- | Find the refl. trans. closure of the association from this point.
-closeAssoc :: (Uniquable a)
- => a -> Assoc a -> UniqSet a
-
-closeAssoc a assoc
- = closeAssoc' assoc emptyUniqSet (unitUniqSet a)
- where
- closeAssoc' assoc visited toVisit
- = case nonDetEltsUniqSet toVisit of
- -- See Note [Unique Determinism and code generation]
-
- -- nothing else to visit, we're done
- [] -> visited
-
- (x:_)
- -- we've already seen this node
- | elementOfUniqSet x visited
- -> closeAssoc' assoc visited (delOneFromUniqSet toVisit x)
-
- -- haven't seen this node before,
- -- remember to visit all its neighbors
- | otherwise
- -> let neighbors
- = case lookupUFM assoc x of
- Nothing -> emptyUniqSet
- Just set -> set
-
- in closeAssoc' assoc
- (addOneToUniqSet visited x)
- (unionUniqSets toVisit neighbors)
-
--- | Intersect two associations.
-intersectAssoc :: Assoc a -> Assoc a -> Assoc a
-intersectAssoc a b
- = intersectUFM_C (intersectUniqSets) a b
diff --git a/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs b/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs
deleted file mode 100644
index 4870bf5269..0000000000
--- a/compiler/nativeGen/RegAlloc/Graph/SpillCost.hs
+++ /dev/null
@@ -1,317 +0,0 @@
-{-# LANGUAGE ScopedTypeVariables, GADTs, BangPatterns #-}
-module RegAlloc.Graph.SpillCost (
- SpillCostRecord,
- plusSpillCostRecord,
- pprSpillCostRecord,
-
- SpillCostInfo,
- zeroSpillCostInfo,
- plusSpillCostInfo,
-
- slurpSpillCostInfo,
- chooseSpill,
-
- lifeMapFromSpillCostInfo
-) where
-import GhcPrelude
-
-import RegAlloc.Liveness
-import Instruction
-import RegClass
-import Reg
-
-import GraphBase
-
-import GHC.Cmm.Dataflow.Collections (mapLookup)
-import GHC.Cmm.Dataflow.Label
-import GHC.Cmm
-import UniqFM
-import UniqSet
-import Digraph (flattenSCCs)
-import Outputable
-import GHC.Platform
-import State
-import CFG
-
-import Data.List (nub, minimumBy)
-import Data.Maybe
-import Control.Monad (join)
-
-
--- | Records the expected cost to spill some register.
-type SpillCostRecord
- = ( VirtualReg -- register name
- , Int -- number of writes to this reg
- , Int -- number of reads from this reg
- , Int) -- number of instrs this reg was live on entry to
-
-
--- | Map of `SpillCostRecord`
-type SpillCostInfo
- = UniqFM SpillCostRecord
-
-type SpillCostState = State (UniqFM SpillCostRecord) ()
-
--- | An empty map of spill costs.
-zeroSpillCostInfo :: SpillCostInfo
-zeroSpillCostInfo = emptyUFM
-
-
--- | Add two spill cost infos.
-plusSpillCostInfo :: SpillCostInfo -> SpillCostInfo -> SpillCostInfo
-plusSpillCostInfo sc1 sc2
- = plusUFM_C plusSpillCostRecord sc1 sc2
-
-
--- | Add two spill cost records.
-plusSpillCostRecord :: SpillCostRecord -> SpillCostRecord -> SpillCostRecord
-plusSpillCostRecord (r1, a1, b1, c1) (r2, a2, b2, c2)
- | r1 == r2 = (r1, a1 + a2, b1 + b2, c1 + c2)
- | otherwise = error "RegSpillCost.plusRegInt: regs don't match"
-
-
--- | Slurp out information used for determining spill costs.
---
--- For each vreg, the number of times it was written to, read from,
--- and the number of instructions it was live on entry to (lifetime)
---
-slurpSpillCostInfo :: forall instr statics. (Outputable instr, Instruction instr)
- => Platform
- -> Maybe CFG
- -> LiveCmmDecl statics instr
- -> SpillCostInfo
-
-slurpSpillCostInfo platform cfg cmm
- = execState (countCmm cmm) zeroSpillCostInfo
- where
- countCmm CmmData{} = return ()
- countCmm (CmmProc info _ _ sccs)
- = mapM_ (countBlock info freqMap)
- $ flattenSCCs sccs
- where
- LiveInfo _ entries _ _ = info
- freqMap = (fst . mkGlobalWeights (head entries)) <$> cfg
-
- -- Lookup the regs that are live on entry to this block in
- -- the info table from the CmmProc.
- countBlock info freqMap (BasicBlock blockId instrs)
- | LiveInfo _ _ blockLive _ <- info
- , Just rsLiveEntry <- mapLookup blockId blockLive
- , rsLiveEntry_virt <- takeVirtuals rsLiveEntry
- = countLIs (ceiling $ blockFreq freqMap blockId) rsLiveEntry_virt instrs
-
- | otherwise
- = error "RegAlloc.SpillCost.slurpSpillCostInfo: bad block"
-
-
- countLIs :: Int -> UniqSet VirtualReg -> [LiveInstr instr] -> SpillCostState
- countLIs _ _ []
- = return ()
-
- -- Skip over comment and delta pseudo instrs.
- countLIs scale rsLive (LiveInstr instr Nothing : lis)
- | isMetaInstr instr
- = countLIs scale rsLive lis
-
- | otherwise
- = pprPanic "RegSpillCost.slurpSpillCostInfo"
- $ text "no liveness information on instruction " <> ppr instr
-
- countLIs scale rsLiveEntry (LiveInstr instr (Just live) : lis)
- = do
- -- Increment the lifetime counts for regs live on entry to this instr.
- mapM_ incLifetime $ nonDetEltsUniqSet rsLiveEntry
- -- This is non-deterministic but we do not
- -- currently support deterministic code-generation.
- -- See Note [Unique Determinism and code generation]
-
- -- Increment counts for what regs were read/written from.
- let (RU read written) = regUsageOfInstr platform instr
- mapM_ (incUses scale) $ catMaybes $ map takeVirtualReg $ nub read
- mapM_ (incDefs scale) $ catMaybes $ map takeVirtualReg $ nub written
-
- -- Compute liveness for entry to next instruction.
- let liveDieRead_virt = takeVirtuals (liveDieRead live)
- let liveDieWrite_virt = takeVirtuals (liveDieWrite live)
- let liveBorn_virt = takeVirtuals (liveBorn live)
-
- let rsLiveAcross
- = rsLiveEntry `minusUniqSet` liveDieRead_virt
-
- let rsLiveNext
- = (rsLiveAcross `unionUniqSets` liveBorn_virt)
- `minusUniqSet` liveDieWrite_virt
-
- countLIs scale rsLiveNext lis
-
- incDefs count reg = modify $ \s -> addToUFM_C plusSpillCostRecord s reg (reg, count, 0, 0)
- incUses count reg = modify $ \s -> addToUFM_C plusSpillCostRecord s reg (reg, 0, count, 0)
- incLifetime reg = modify $ \s -> addToUFM_C plusSpillCostRecord s reg (reg, 0, 0, 1)
-
- blockFreq :: Maybe (LabelMap Double) -> Label -> Double
- blockFreq freqs bid
- | Just freq <- join (mapLookup bid <$> freqs)
- = max 1.0 (10000 * freq)
- | otherwise
- = 1.0 -- Only if no cfg given
-
--- | Take all the virtual registers from this set.
-takeVirtuals :: UniqSet Reg -> UniqSet VirtualReg
-takeVirtuals set = mkUniqSet
- [ vr | RegVirtual vr <- nonDetEltsUniqSet set ]
- -- See Note [Unique Determinism and code generation]
-
-
--- | Choose a node to spill from this graph
-chooseSpill
- :: SpillCostInfo
- -> Graph VirtualReg RegClass RealReg
- -> VirtualReg
-
-chooseSpill info graph
- = let cost = spillCost_length info graph
- node = minimumBy (\n1 n2 -> compare (cost $ nodeId n1) (cost $ nodeId n2))
- $ nonDetEltsUFM $ graphMap graph
- -- See Note [Unique Determinism and code generation]
-
- in nodeId node
-
-
--------------------------------------------------------------------------------
--- | Chaitins spill cost function is:
---
--- cost = sum loadCost * freq (u) + sum storeCost * freq (d)
--- u <- uses (v) d <- defs (v)
---
--- There are no loops in our code at the moment, so we can set the freq's to 1.
---
--- If we don't have live range splitting then Chaitins function performs badly
--- if we have lots of nested live ranges and very few registers.
---
--- v1 v2 v3
--- def v1 .
--- use v1 .
--- def v2 . .
--- def v3 . . .
--- use v1 . . .
--- use v3 . . .
--- use v2 . .
--- use v1 .
---
--- defs uses degree cost
--- v1: 1 3 3 1.5
--- v2: 1 2 3 1.0
--- v3: 1 1 3 0.666
---
--- v3 has the lowest cost, but if we only have 2 hardregs and we insert
--- spill code for v3 then this isn't going to improve the colorability of
--- the graph.
---
--- When compiling SHA1, which as very long basic blocks and some vregs
--- with very long live ranges the allocator seems to try and spill from
--- the inside out and eventually run out of stack slots.
---
--- Without live range splitting, its's better to spill from the outside
--- in so set the cost of very long live ranges to zero
---
-
--- spillCost_chaitin
--- :: SpillCostInfo
--- -> Graph VirtualReg RegClass RealReg
--- -> VirtualReg
--- -> Float
-
--- spillCost_chaitin info graph reg
--- -- Spilling a live range that only lives for 1 instruction
--- -- isn't going to help us at all - and we definitely want to avoid
--- -- trying to re-spill previously inserted spill code.
--- | lifetime <= 1 = 1/0
-
--- -- It's unlikely that we'll find a reg for a live range this long
--- -- better to spill it straight up and not risk trying to keep it around
--- -- and have to go through the build/color cycle again.
-
--- -- To facility this we scale down the spill cost of long ranges.
--- -- This makes sure long ranges are still spilled first.
--- -- But this way spill cost remains relevant for long live
--- -- ranges.
--- | lifetime >= 128
--- = (spillCost / conflicts) / 10.0
-
-
--- -- Otherwise revert to chaitin's regular cost function.
--- | otherwise = (spillCost / conflicts)
--- where
--- !spillCost = fromIntegral (uses + defs) :: Float
--- conflicts = fromIntegral (nodeDegree classOfVirtualReg graph reg)
--- (_, defs, uses, lifetime)
--- = fromMaybe (reg, 0, 0, 0) $ lookupUFM info reg
-
-
--- Just spill the longest live range.
-spillCost_length
- :: SpillCostInfo
- -> Graph VirtualReg RegClass RealReg
- -> VirtualReg
- -> Float
-
-spillCost_length info _ reg
- | lifetime <= 1 = 1/0
- | otherwise = 1 / fromIntegral lifetime
- where (_, _, _, lifetime)
- = fromMaybe (reg, 0, 0, 0)
- $ lookupUFM info reg
-
-
--- | Extract a map of register lifetimes from a `SpillCostInfo`.
-lifeMapFromSpillCostInfo :: SpillCostInfo -> UniqFM (VirtualReg, Int)
-lifeMapFromSpillCostInfo info
- = listToUFM
- $ map (\(r, _, _, life) -> (r, (r, life)))
- $ nonDetEltsUFM info
- -- See Note [Unique Determinism and code generation]
-
-
--- | Determine the degree (number of neighbors) of this node which
--- have the same class.
-nodeDegree
- :: (VirtualReg -> RegClass)
- -> Graph VirtualReg RegClass RealReg
- -> VirtualReg
- -> Int
-
-nodeDegree classOfVirtualReg graph reg
- | Just node <- lookupUFM (graphMap graph) reg
-
- , virtConflicts
- <- length
- $ filter (\r -> classOfVirtualReg r == classOfVirtualReg reg)
- $ nonDetEltsUniqSet
- -- See Note [Unique Determinism and code generation]
- $ nodeConflicts node
-
- = virtConflicts + sizeUniqSet (nodeExclusions node)
-
- | otherwise
- = 0
-
-
--- | Show a spill cost record, including the degree from the graph
--- and final calculated spill cost.
-pprSpillCostRecord
- :: (VirtualReg -> RegClass)
- -> (Reg -> SDoc)
- -> Graph VirtualReg RegClass RealReg
- -> SpillCostRecord
- -> SDoc
-
-pprSpillCostRecord regClass pprReg graph (reg, uses, defs, life)
- = hsep
- [ pprReg (RegVirtual reg)
- , ppr uses
- , ppr defs
- , ppr life
- , ppr $ nodeDegree regClass graph reg
- , text $ show $ (fromIntegral (uses + defs)
- / fromIntegral (nodeDegree regClass graph reg) :: Float) ]
-
diff --git a/compiler/nativeGen/RegAlloc/Graph/Stats.hs b/compiler/nativeGen/RegAlloc/Graph/Stats.hs
deleted file mode 100644
index 2159548437..0000000000
--- a/compiler/nativeGen/RegAlloc/Graph/Stats.hs
+++ /dev/null
@@ -1,346 +0,0 @@
-{-# LANGUAGE BangPatterns, CPP #-}
-
-{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
-
--- | Carries interesting info for debugging / profiling of the
--- graph coloring register allocator.
-module RegAlloc.Graph.Stats (
- RegAllocStats (..),
-
- pprStats,
- pprStatsSpills,
- pprStatsLifetimes,
- pprStatsConflict,
- pprStatsLifeConflict,
-
- countSRMs, addSRM
-) where
-
-import GhcPrelude
-
-import qualified GraphColor as Color
-import RegAlloc.Liveness
-import RegAlloc.Graph.Spill
-import RegAlloc.Graph.SpillCost
-import RegAlloc.Graph.TrivColorable
-import Instruction
-import RegClass
-import Reg
-import TargetReg
-
-import Outputable
-import UniqFM
-import UniqSet
-import State
-
--- | Holds interesting statistics from the register allocator.
-data RegAllocStats statics instr
-
- -- Information about the initial conflict graph.
- = RegAllocStatsStart
- { -- | Initial code, with liveness.
- raLiveCmm :: [LiveCmmDecl statics instr]
-
- -- | The initial, uncolored graph.
- , raGraph :: Color.Graph VirtualReg RegClass RealReg
-
- -- | Information to help choose which regs to spill.
- , raSpillCosts :: SpillCostInfo }
-
-
- -- Information about an intermediate graph.
- -- This is one that we couldn't color, so had to insert spill code
- -- instruction stream.
- | RegAllocStatsSpill
- { -- | Code we tried to allocate registers for.
- raCode :: [LiveCmmDecl statics instr]
-
- -- | Partially colored graph.
- , raGraph :: Color.Graph VirtualReg RegClass RealReg
-
- -- | The regs that were coalesced.
- , raCoalesced :: UniqFM VirtualReg
-
- -- | Spiller stats.
- , raSpillStats :: SpillStats
-
- -- | Number of instructions each reg lives for.
- , raSpillCosts :: SpillCostInfo
-
- -- | Code with spill instructions added.
- , raSpilled :: [LiveCmmDecl statics instr] }
-
-
- -- a successful coloring
- | RegAllocStatsColored
- { -- | Code we tried to allocate registers for.
- raCode :: [LiveCmmDecl statics instr]
-
- -- | Uncolored graph.
- , raGraph :: Color.Graph VirtualReg RegClass RealReg
-
- -- | Coalesced and colored graph.
- , raGraphColored :: Color.Graph VirtualReg RegClass RealReg
-
- -- | Regs that were coalesced.
- , raCoalesced :: UniqFM VirtualReg
-
- -- | Code with coalescings applied.
- , raCodeCoalesced :: [LiveCmmDecl statics instr]
-
- -- | Code with vregs replaced by hregs.
- , raPatched :: [LiveCmmDecl statics instr]
-
- -- | Code with unneeded spill\/reloads cleaned out.
- , raSpillClean :: [LiveCmmDecl statics instr]
-
- -- | Final code.
- , raFinal :: [NatCmmDecl statics instr]
-
- -- | Spill\/reload\/reg-reg moves present in this code.
- , raSRMs :: (Int, Int, Int) }
-
-
-instance (Outputable statics, Outputable instr)
- => Outputable (RegAllocStats statics instr) where
-
- ppr (s@RegAllocStatsStart{}) = sdocWithPlatform $ \platform ->
- text "# Start"
- $$ text "# Native code with liveness information."
- $$ ppr (raLiveCmm s)
- $$ text ""
- $$ text "# Initial register conflict graph."
- $$ Color.dotGraph
- (targetRegDotColor platform)
- (trivColorable platform
- (targetVirtualRegSqueeze platform)
- (targetRealRegSqueeze platform))
- (raGraph s)
-
-
- ppr (s@RegAllocStatsSpill{}) =
- text "# Spill"
-
- $$ text "# Code with liveness information."
- $$ ppr (raCode s)
- $$ text ""
-
- $$ (if (not $ isNullUFM $ raCoalesced s)
- then text "# Registers coalesced."
- $$ pprUFMWithKeys (raCoalesced s) (vcat . map ppr)
- $$ text ""
- else empty)
-
- $$ text "# Spills inserted."
- $$ ppr (raSpillStats s)
- $$ text ""
-
- $$ text "# Code with spills inserted."
- $$ ppr (raSpilled s)
-
-
- ppr (s@RegAllocStatsColored { raSRMs = (spills, reloads, moves) })
- = sdocWithPlatform $ \platform ->
- text "# Colored"
-
- $$ text "# Code with liveness information."
- $$ ppr (raCode s)
- $$ text ""
-
- $$ text "# Register conflict graph (colored)."
- $$ Color.dotGraph
- (targetRegDotColor platform)
- (trivColorable platform
- (targetVirtualRegSqueeze platform)
- (targetRealRegSqueeze platform))
- (raGraphColored s)
- $$ text ""
-
- $$ (if (not $ isNullUFM $ raCoalesced s)
- then text "# Registers coalesced."
- $$ pprUFMWithKeys (raCoalesced s) (vcat . map ppr)
- $$ text ""
- else empty)
-
- $$ text "# Native code after coalescings applied."
- $$ ppr (raCodeCoalesced s)
- $$ text ""
-
- $$ text "# Native code after register allocation."
- $$ ppr (raPatched s)
- $$ text ""
-
- $$ text "# Clean out unneeded spill/reloads."
- $$ ppr (raSpillClean s)
- $$ text ""
-
- $$ text "# Final code, after rewriting spill/rewrite pseudo instrs."
- $$ ppr (raFinal s)
- $$ text ""
- $$ text "# Score:"
- $$ (text "# spills inserted: " <> int spills)
- $$ (text "# reloads inserted: " <> int reloads)
- $$ (text "# reg-reg moves remaining: " <> int moves)
- $$ text ""
-
-
--- | Do all the different analysis on this list of RegAllocStats
-pprStats
- :: [RegAllocStats statics instr]
- -> Color.Graph VirtualReg RegClass RealReg
- -> SDoc
-
-pprStats stats graph
- = let outSpills = pprStatsSpills stats
- outLife = pprStatsLifetimes stats
- outConflict = pprStatsConflict stats
- outScatter = pprStatsLifeConflict stats graph
-
- in vcat [outSpills, outLife, outConflict, outScatter]
-
-
--- | Dump a table of how many spill loads \/ stores were inserted for each vreg.
-pprStatsSpills
- :: [RegAllocStats statics instr] -> SDoc
-
-pprStatsSpills stats
- = let
- finals = [ s | s@RegAllocStatsColored{} <- stats]
-
- -- sum up how many stores\/loads\/reg-reg-moves were left in the code
- total = foldl' addSRM (0, 0, 0)
- $ map raSRMs finals
-
- in ( text "-- spills-added-total"
- $$ text "-- (stores, loads, reg_reg_moves_remaining)"
- $$ ppr total
- $$ text "")
-
-
--- | Dump a table of how long vregs tend to live for in the initial code.
-pprStatsLifetimes
- :: [RegAllocStats statics instr] -> SDoc
-
-pprStatsLifetimes stats
- = let info = foldl' plusSpillCostInfo zeroSpillCostInfo
- [ raSpillCosts s
- | s@RegAllocStatsStart{} <- stats ]
-
- lifeBins = binLifetimeCount $ lifeMapFromSpillCostInfo info
-
- in ( text "-- vreg-population-lifetimes"
- $$ text "-- (instruction_count, number_of_vregs_that_lived_that_long)"
- $$ pprUFM lifeBins (vcat . map ppr)
- $$ text "\n")
-
-
-binLifetimeCount :: UniqFM (VirtualReg, Int) -> UniqFM (Int, Int)
-binLifetimeCount fm
- = let lifes = map (\l -> (l, (l, 1)))
- $ map snd
- $ nonDetEltsUFM fm
- -- See Note [Unique Determinism and code generation]
-
- in addListToUFM_C
- (\(l1, c1) (_, c2) -> (l1, c1 + c2))
- emptyUFM
- lifes
-
-
--- | Dump a table of how many conflicts vregs tend to have in the initial code.
-pprStatsConflict
- :: [RegAllocStats statics instr] -> SDoc
-
-pprStatsConflict stats
- = let confMap = foldl' (plusUFM_C (\(c1, n1) (_, n2) -> (c1, n1 + n2)))
- emptyUFM
- $ map Color.slurpNodeConflictCount
- [ raGraph s | s@RegAllocStatsStart{} <- stats ]
-
- in ( text "-- vreg-conflicts"
- $$ text "-- (conflict_count, number_of_vregs_that_had_that_many_conflicts)"
- $$ pprUFM confMap (vcat . map ppr)
- $$ text "\n")
-
-
--- | For every vreg, dump how many conflicts it has, and its lifetime.
--- Good for making a scatter plot.
-pprStatsLifeConflict
- :: [RegAllocStats statics instr]
- -> Color.Graph VirtualReg RegClass RealReg -- ^ global register conflict graph
- -> SDoc
-
-pprStatsLifeConflict stats graph
- = let lifeMap = lifeMapFromSpillCostInfo
- $ foldl' plusSpillCostInfo zeroSpillCostInfo
- $ [ raSpillCosts s | s@RegAllocStatsStart{} <- stats ]
-
- scatter = map (\r -> let lifetime = case lookupUFM lifeMap r of
- Just (_, l) -> l
- Nothing -> 0
- Just node = Color.lookupNode graph r
- in parens $ hcat $ punctuate (text ", ")
- [ doubleQuotes $ ppr $ Color.nodeId node
- , ppr $ sizeUniqSet (Color.nodeConflicts node)
- , ppr $ lifetime ])
- $ map Color.nodeId
- $ nonDetEltsUFM
- -- See Note [Unique Determinism and code generation]
- $ Color.graphMap graph
-
- in ( text "-- vreg-conflict-lifetime"
- $$ text "-- (vreg, vreg_conflicts, vreg_lifetime)"
- $$ (vcat scatter)
- $$ text "\n")
-
-
--- | Count spill/reload/reg-reg moves.
--- Lets us see how well the register allocator has done.
-countSRMs
- :: Instruction instr
- => LiveCmmDecl statics instr -> (Int, Int, Int)
-
-countSRMs cmm
- = execState (mapBlockTopM countSRM_block cmm) (0, 0, 0)
-
-
-countSRM_block
- :: Instruction instr
- => GenBasicBlock (LiveInstr instr)
- -> State (Int, Int, Int) (GenBasicBlock (LiveInstr instr))
-
-countSRM_block (BasicBlock i instrs)
- = do instrs' <- mapM countSRM_instr instrs
- return $ BasicBlock i instrs'
-
-
-countSRM_instr
- :: Instruction instr
- => LiveInstr instr -> State (Int, Int, Int) (LiveInstr instr)
-
-countSRM_instr li
- | LiveInstr SPILL{} _ <- li
- = do modify $ \(s, r, m) -> (s + 1, r, m)
- return li
-
- | LiveInstr RELOAD{} _ <- li
- = do modify $ \(s, r, m) -> (s, r + 1, m)
- return li
-
- | LiveInstr instr _ <- li
- , Just _ <- takeRegRegMoveInstr instr
- = do modify $ \(s, r, m) -> (s, r, m + 1)
- return li
-
- | otherwise
- = return li
-
-
--- sigh..
-addSRM :: (Int, Int, Int) -> (Int, Int, Int) -> (Int, Int, Int)
-addSRM (s1, r1, m1) (s2, r2, m2)
- = let !s = s1 + s2
- !r = r1 + r2
- !m = m1 + m2
- in (s, r, m)
-
diff --git a/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs b/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs
deleted file mode 100644
index cc2ad7d594..0000000000
--- a/compiler/nativeGen/RegAlloc/Graph/TrivColorable.hs
+++ /dev/null
@@ -1,274 +0,0 @@
-{-# LANGUAGE CPP #-}
-
-module RegAlloc.Graph.TrivColorable (
- trivColorable,
-)
-
-where
-
-#include "HsVersions.h"
-
-import GhcPrelude
-
-import RegClass
-import Reg
-
-import GraphBase
-
-import UniqSet
-import GHC.Platform
-import Panic
-
--- trivColorable ---------------------------------------------------------------
-
--- trivColorable function for the graph coloring allocator
---
--- This gets hammered by scanGraph during register allocation,
--- so needs to be fairly efficient.
---
--- NOTE: This only works for architectures with just RcInteger and RcDouble
--- (which are disjoint) ie. x86, x86_64 and ppc
---
--- The number of allocatable regs is hard coded in here so we can do
--- a fast comparison in trivColorable.
---
--- It's ok if these numbers are _less_ than the actual number of free
--- regs, but they can't be more or the register conflict
--- graph won't color.
---
--- If the graph doesn't color then the allocator will panic, but it won't
--- generate bad object code or anything nasty like that.
---
--- There is an allocatableRegsInClass :: RegClass -> Int, but doing
--- the unboxing is too slow for us here.
--- TODO: Is that still true? Could we use allocatableRegsInClass
--- without losing performance now?
---
--- Look at includes/stg/MachRegs.h to get the numbers.
---
-
-
--- Disjoint registers ----------------------------------------------------------
---
--- The definition has been unfolded into individual cases for speed.
--- Each architecture has a different register setup, so we use a
--- different regSqueeze function for each.
---
-accSqueeze
- :: Int
- -> Int
- -> (reg -> Int)
- -> UniqSet reg
- -> Int
-
-accSqueeze count maxCount squeeze us = acc count (nonDetEltsUniqSet us)
- -- See Note [Unique Determinism and code generation]
- where acc count [] = count
- acc count _ | count >= maxCount = count
- acc count (r:rs) = acc (count + squeeze r) rs
-
-{- Note [accSqueeze]
-~~~~~~~~~~~~~~~~~~~~
-BL 2007/09
-Doing a nice fold over the UniqSet makes trivColorable use
-32% of total compile time and 42% of total alloc when compiling SHA1.hs from darcs.
-Therefore the UniqFM is made non-abstract and we use custom fold.
-
-MS 2010/04
-When converting UniqFM to use Data.IntMap, the fold cannot use UniqFM internal
-representation any more. But it is imperative that the accSqueeze stops
-the folding if the count gets greater or equal to maxCount. We thus convert
-UniqFM to a (lazy) list, do the fold and stops if necessary, which was
-the most efficient variant tried. Benchmark compiling 10-times SHA1.hs follows.
-(original = previous implementation, folding = fold of the whole UFM,
- lazyFold = the current implementation,
- hackFold = using internal representation of Data.IntMap)
-
- original folding hackFold lazyFold
- -O -fasm (used everywhere) 31.509s 30.387s 30.791s 30.603s
- 100.00% 96.44% 97.72% 97.12%
- -fregs-graph 67.938s 74.875s 62.673s 64.679s
- 100.00% 110.21% 92.25% 95.20%
- -fregs-iterative 89.761s 143.913s 81.075s 86.912s
- 100.00% 160.33% 90.32% 96.83%
- -fnew-codegen 38.225s 37.142s 37.551s 37.119s
- 100.00% 97.17% 98.24% 97.11%
- -fnew-codegen -fregs-graph 91.786s 91.51s 87.368s 86.88s
- 100.00% 99.70% 95.19% 94.65%
- -fnew-codegen -fregs-iterative 206.72s 343.632s 194.694s 208.677s
- 100.00% 166.23% 94.18% 100.95%
--}
-
-trivColorable
- :: Platform
- -> (RegClass -> VirtualReg -> Int)
- -> (RegClass -> RealReg -> Int)
- -> Triv VirtualReg RegClass RealReg
-
-trivColorable platform virtualRegSqueeze realRegSqueeze RcInteger conflicts exclusions
- | let cALLOCATABLE_REGS_INTEGER
- = (case platformArch platform of
- ArchX86 -> 3
- ArchX86_64 -> 5
- ArchPPC -> 16
- ArchSPARC -> 14
- ArchSPARC64 -> panic "trivColorable ArchSPARC64"
- ArchPPC_64 _ -> 15
- ArchARM _ _ _ -> panic "trivColorable ArchARM"
- ArchARM64 -> panic "trivColorable ArchARM64"
- ArchAlpha -> panic "trivColorable ArchAlpha"
- ArchMipseb -> panic "trivColorable ArchMipseb"
- ArchMipsel -> panic "trivColorable ArchMipsel"
- ArchS390X -> panic "trivColorable ArchS390X"
- ArchJavaScript-> panic "trivColorable ArchJavaScript"
- ArchUnknown -> panic "trivColorable ArchUnknown")
- , count2 <- accSqueeze 0 cALLOCATABLE_REGS_INTEGER
- (virtualRegSqueeze RcInteger)
- conflicts
-
- , count3 <- accSqueeze count2 cALLOCATABLE_REGS_INTEGER
- (realRegSqueeze RcInteger)
- exclusions
-
- = count3 < cALLOCATABLE_REGS_INTEGER
-
-trivColorable platform virtualRegSqueeze realRegSqueeze RcFloat conflicts exclusions
- | let cALLOCATABLE_REGS_FLOAT
- = (case platformArch platform of
- -- On x86_64 and x86, Float and RcDouble
- -- use the same registers,
- -- so we only use RcDouble to represent the
- -- register allocation problem on those types.
- ArchX86 -> 0
- ArchX86_64 -> 0
- ArchPPC -> 0
- ArchSPARC -> 22
- ArchSPARC64 -> panic "trivColorable ArchSPARC64"
- ArchPPC_64 _ -> 0
- ArchARM _ _ _ -> panic "trivColorable ArchARM"
- ArchARM64 -> panic "trivColorable ArchARM64"
- ArchAlpha -> panic "trivColorable ArchAlpha"
- ArchMipseb -> panic "trivColorable ArchMipseb"
- ArchMipsel -> panic "trivColorable ArchMipsel"
- ArchS390X -> panic "trivColorable ArchS390X"
- ArchJavaScript-> panic "trivColorable ArchJavaScript"
- ArchUnknown -> panic "trivColorable ArchUnknown")
- , count2 <- accSqueeze 0 cALLOCATABLE_REGS_FLOAT
- (virtualRegSqueeze RcFloat)
- conflicts
-
- , count3 <- accSqueeze count2 cALLOCATABLE_REGS_FLOAT
- (realRegSqueeze RcFloat)
- exclusions
-
- = count3 < cALLOCATABLE_REGS_FLOAT
-
-trivColorable platform virtualRegSqueeze realRegSqueeze RcDouble conflicts exclusions
- | let cALLOCATABLE_REGS_DOUBLE
- = (case platformArch platform of
- ArchX86 -> 8
- -- in x86 32bit mode sse2 there are only
- -- 8 XMM registers xmm0 ... xmm7
- ArchX86_64 -> 10
- -- in x86_64 there are 16 XMM registers
- -- xmm0 .. xmm15, here 10 is a
- -- "dont need to solve conflicts" count that
- -- was chosen at some point in the past.
- ArchPPC -> 26
- ArchSPARC -> 11
- ArchSPARC64 -> panic "trivColorable ArchSPARC64"
- ArchPPC_64 _ -> 20
- ArchARM _ _ _ -> panic "trivColorable ArchARM"
- ArchARM64 -> panic "trivColorable ArchARM64"
- ArchAlpha -> panic "trivColorable ArchAlpha"
- ArchMipseb -> panic "trivColorable ArchMipseb"
- ArchMipsel -> panic "trivColorable ArchMipsel"
- ArchS390X -> panic "trivColorable ArchS390X"
- ArchJavaScript-> panic "trivColorable ArchJavaScript"
- ArchUnknown -> panic "trivColorable ArchUnknown")
- , count2 <- accSqueeze 0 cALLOCATABLE_REGS_DOUBLE
- (virtualRegSqueeze RcDouble)
- conflicts
-
- , count3 <- accSqueeze count2 cALLOCATABLE_REGS_DOUBLE
- (realRegSqueeze RcDouble)
- exclusions
-
- = count3 < cALLOCATABLE_REGS_DOUBLE
-
-
-
-
--- Specification Code ----------------------------------------------------------
---
--- The trivColorable function for each particular architecture should
--- implement the following function, but faster.
---
-
-{-
-trivColorable :: RegClass -> UniqSet Reg -> UniqSet Reg -> Bool
-trivColorable classN conflicts exclusions
- = let
-
- acc :: Reg -> (Int, Int) -> (Int, Int)
- acc r (cd, cf)
- = case regClass r of
- RcInteger -> (cd+1, cf)
- RcFloat -> (cd, cf+1)
- _ -> panic "Regs.trivColorable: reg class not handled"
-
- tmp = nonDetFoldUFM acc (0, 0) conflicts
- (countInt, countFloat) = nonDetFoldUFM acc tmp exclusions
-
- squeese = worst countInt classN RcInteger
- + worst countFloat classN RcFloat
-
- in squeese < allocatableRegsInClass classN
-
--- | Worst case displacement
--- node N of classN has n neighbors of class C.
---
--- We currently only have RcInteger and RcDouble, which don't conflict at all.
--- This is a bit boring compared to what's in RegArchX86.
---
-worst :: Int -> RegClass -> RegClass -> Int
-worst n classN classC
- = case classN of
- RcInteger
- -> case classC of
- RcInteger -> min n (allocatableRegsInClass RcInteger)
- RcFloat -> 0
-
- RcDouble
- -> case classC of
- RcFloat -> min n (allocatableRegsInClass RcFloat)
- RcInteger -> 0
-
--- allocatableRegs is allMachRegNos with the fixed-use regs removed.
--- i.e., these are the regs for which we are prepared to allow the
--- register allocator to attempt to map VRegs to.
-allocatableRegs :: [RegNo]
-allocatableRegs
- = let isFree i = freeReg i
- in filter isFree allMachRegNos
-
-
--- | The number of regs in each class.
--- We go via top level CAFs to ensure that we're not recomputing
--- the length of these lists each time the fn is called.
-allocatableRegsInClass :: RegClass -> Int
-allocatableRegsInClass cls
- = case cls of
- RcInteger -> allocatableRegsInteger
- RcFloat -> allocatableRegsDouble
-
-allocatableRegsInteger :: Int
-allocatableRegsInteger
- = length $ filter (\r -> regClass r == RcInteger)
- $ map RealReg allocatableRegs
-
-allocatableRegsFloat :: Int
-allocatableRegsFloat
- = length $ filter (\r -> regClass r == RcFloat
- $ map RealReg allocatableRegs
--}
diff --git a/compiler/nativeGen/RegAlloc/Linear/Base.hs b/compiler/nativeGen/RegAlloc/Linear/Base.hs
deleted file mode 100644
index 552f14929d..0000000000
--- a/compiler/nativeGen/RegAlloc/Linear/Base.hs
+++ /dev/null
@@ -1,141 +0,0 @@
-
--- | Put common type definitions here to break recursive module dependencies.
-
-module RegAlloc.Linear.Base (
- BlockAssignment,
-
- Loc(..),
- regsOfLoc,
-
- -- for stats
- SpillReason(..),
- RegAllocStats(..),
-
- -- the allocator monad
- RA_State(..),
-)
-
-where
-
-import GhcPrelude
-
-import RegAlloc.Linear.StackMap
-import RegAlloc.Liveness
-import Reg
-
-import GHC.Driver.Session
-import Outputable
-import Unique
-import UniqFM
-import UniqSupply
-import GHC.Cmm.BlockId
-
-
--- | Used to store the register assignment on entry to a basic block.
--- We use this to handle join points, where multiple branch instructions
--- target a particular label. We have to insert fixup code to make
--- the register assignments from the different sources match up.
---
-type BlockAssignment freeRegs
- = BlockMap (freeRegs, RegMap Loc)
-
-
--- | Where a vreg is currently stored
--- A temporary can be marked as living in both a register and memory
--- (InBoth), for example if it was recently loaded from a spill location.
--- This makes it cheap to spill (no save instruction required), but we
--- have to be careful to turn this into InReg if the value in the
--- register is changed.
-
--- This is also useful when a temporary is about to be clobbered. We
--- save it in a spill location, but mark it as InBoth because the current
--- instruction might still want to read it.
---
-data Loc
- -- | vreg is in a register
- = InReg !RealReg
-
- -- | vreg is held in a stack slot
- | InMem {-# UNPACK #-} !StackSlot
-
-
- -- | vreg is held in both a register and a stack slot
- | InBoth !RealReg
- {-# UNPACK #-} !StackSlot
- deriving (Eq, Show, Ord)
-
-instance Outputable Loc where
- ppr l = text (show l)
-
-
--- | Get the reg numbers stored in this Loc.
-regsOfLoc :: Loc -> [RealReg]
-regsOfLoc (InReg r) = [r]
-regsOfLoc (InBoth r _) = [r]
-regsOfLoc (InMem _) = []
-
-
--- | Reasons why instructions might be inserted by the spiller.
--- Used when generating stats for -ddrop-asm-stats.
---
-data SpillReason
- -- | vreg was spilled to a slot so we could use its
- -- current hreg for another vreg
- = SpillAlloc !Unique
-
- -- | vreg was moved because its hreg was clobbered
- | SpillClobber !Unique
-
- -- | vreg was loaded from a spill slot
- | SpillLoad !Unique
-
- -- | reg-reg move inserted during join to targets
- | SpillJoinRR !Unique
-
- -- | reg-mem move inserted during join to targets
- | SpillJoinRM !Unique
-
-
--- | Used to carry interesting stats out of the register allocator.
-data RegAllocStats
- = RegAllocStats
- { ra_spillInstrs :: UniqFM [Int]
- , ra_fixupList :: [(BlockId,BlockId,BlockId)]
- -- ^ (from,fixup,to) : We inserted fixup code between from and to
- }
-
-
--- | The register allocator state
-data RA_State freeRegs
- = RA_State
-
- {
- -- | the current mapping from basic blocks to
- -- the register assignments at the beginning of that block.
- ra_blockassig :: BlockAssignment freeRegs
-
- -- | free machine registers
- , ra_freeregs :: !freeRegs
-
- -- | assignment of temps to locations
- , ra_assig :: RegMap Loc
-
- -- | current stack delta
- , ra_delta :: Int
-
- -- | free stack slots for spilling
- , ra_stack :: StackMap
-
- -- | unique supply for generating names for join point fixup blocks.
- , ra_us :: UniqSupply
-
- -- | Record why things were spilled, for -ddrop-asm-stats.
- -- Just keep a list here instead of a map of regs -> reasons.
- -- We don't want to slow down the allocator if we're not going to emit the stats.
- , ra_spills :: [SpillReason]
- , ra_DynFlags :: DynFlags
-
- -- | (from,fixup,to) : We inserted fixup code between from and to
- , ra_fixups :: [(BlockId,BlockId,BlockId)] }
-
-
diff --git a/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs
deleted file mode 100644
index b2b9cff5bb..0000000000
--- a/compiler/nativeGen/RegAlloc/Linear/FreeRegs.hs
+++ /dev/null
@@ -1,89 +0,0 @@
-{-# LANGUAGE CPP #-}
-
-module RegAlloc.Linear.FreeRegs (
- FR(..),
- maxSpillSlots
-)
-
-#include "HsVersions.h"
-
-where
-
-import GhcPrelude
-
-import Reg
-import RegClass
-
-import GHC.Driver.Session
-import Panic
-import GHC.Platform
-
--- -----------------------------------------------------------------------------
--- The free register set
--- This needs to be *efficient*
--- Here's an inefficient 'executable specification' of the FreeRegs data type:
---
--- type FreeRegs = [RegNo]
--- noFreeRegs = 0
--- releaseReg n f = if n `elem` f then f else (n : f)
--- initFreeRegs = allocatableRegs
--- getFreeRegs cls f = filter ( (==cls) . regClass . RealReg ) f
--- allocateReg f r = filter (/= r) f
-
-import qualified RegAlloc.Linear.PPC.FreeRegs as PPC
-import qualified RegAlloc.Linear.SPARC.FreeRegs as SPARC
-import qualified RegAlloc.Linear.X86.FreeRegs as X86
-import qualified RegAlloc.Linear.X86_64.FreeRegs as X86_64
-
-import qualified PPC.Instr
-import qualified SPARC.Instr
-import qualified X86.Instr
-
-class Show freeRegs => FR freeRegs where
- frAllocateReg :: Platform -> RealReg -> freeRegs -> freeRegs
- frGetFreeRegs :: Platform -> RegClass -> freeRegs -> [RealReg]
- frInitFreeRegs :: Platform -> freeRegs
- frReleaseReg :: Platform -> RealReg -> freeRegs -> freeRegs
-
-instance FR X86.FreeRegs where
- frAllocateReg = \_ -> X86.allocateReg
- frGetFreeRegs = X86.getFreeRegs
- frInitFreeRegs = X86.initFreeRegs
- frReleaseReg = \_ -> X86.releaseReg
-
-instance FR X86_64.FreeRegs where
- frAllocateReg = \_ -> X86_64.allocateReg
- frGetFreeRegs = X86_64.getFreeRegs
- frInitFreeRegs = X86_64.initFreeRegs
- frReleaseReg = \_ -> X86_64.releaseReg
-
-instance FR PPC.FreeRegs where
- frAllocateReg = \_ -> PPC.allocateReg
- frGetFreeRegs = \_ -> PPC.getFreeRegs
- frInitFreeRegs = PPC.initFreeRegs
- frReleaseReg = \_ -> PPC.releaseReg
-
-instance FR SPARC.FreeRegs where
- frAllocateReg = SPARC.allocateReg
- frGetFreeRegs = \_ -> SPARC.getFreeRegs
- frInitFreeRegs = SPARC.initFreeRegs
- frReleaseReg = SPARC.releaseReg
-
-maxSpillSlots :: DynFlags -> Int
-maxSpillSlots dflags
- = case platformArch (targetPlatform dflags) of
- ArchX86 -> X86.Instr.maxSpillSlots dflags
- ArchX86_64 -> X86.Instr.maxSpillSlots dflags
- ArchPPC -> PPC.Instr.maxSpillSlots dflags
- ArchS390X -> panic "maxSpillSlots ArchS390X"
- ArchSPARC -> SPARC.Instr.maxSpillSlots dflags
- ArchSPARC64 -> panic "maxSpillSlots ArchSPARC64"
- ArchARM _ _ _ -> panic "maxSpillSlots ArchARM"
- ArchARM64 -> panic "maxSpillSlots ArchARM64"
- ArchPPC_64 _ -> PPC.Instr.maxSpillSlots dflags
- ArchAlpha -> panic "maxSpillSlots ArchAlpha"
- ArchMipseb -> panic "maxSpillSlots ArchMipseb"
- ArchMipsel -> panic "maxSpillSlots ArchMipsel"
- ArchJavaScript-> panic "maxSpillSlots ArchJavaScript"
- ArchUnknown -> panic "maxSpillSlots ArchUnknown"
-
diff --git a/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs b/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs
deleted file mode 100644
index 4362ca8a17..0000000000
--- a/compiler/nativeGen/RegAlloc/Linear/JoinToTargets.hs
+++ /dev/null
@@ -1,378 +0,0 @@
-{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
-
--- | Handles joining of a jump instruction to its targets.
-
--- The first time we encounter a jump to a particular basic block, we
--- record the assignment of temporaries. The next time we encounter a
--- jump to the same block, we compare our current assignment to the
--- stored one. They might be different if spilling has occurred in one
--- branch; so some fixup code will be required to match up the assignments.
---
-module RegAlloc.Linear.JoinToTargets (joinToTargets) where
-
-import GhcPrelude
-
-import RegAlloc.Linear.State
-import RegAlloc.Linear.Base
-import RegAlloc.Linear.FreeRegs
-import RegAlloc.Liveness
-import Instruction
-import Reg
-
-import GHC.Cmm.BlockId
-import GHC.Cmm.Dataflow.Collections
-import Digraph
-import GHC.Driver.Session
-import Outputable
-import Unique
-import UniqFM
-import UniqSet
-
--- | For a jump instruction at the end of a block, generate fixup code so its
--- vregs are in the correct regs for its destination.
---
-joinToTargets
- :: (FR freeRegs, Instruction instr, Outputable instr)
- => BlockMap RegSet -- ^ maps the unique of the blockid to the set of vregs
- -- that are known to be live on the entry to each block.
-
- -> BlockId -- ^ id of the current block
- -> instr -- ^ branch instr on the end of the source block.
-
- -> RegM freeRegs ([NatBasicBlock instr] -- fresh blocks of fixup code.
- , instr) -- the original branch
- -- instruction, but maybe
- -- patched to jump
- -- to a fixup block first.
-
-joinToTargets block_live id instr
-
- -- we only need to worry about jump instructions.
- | not $ isJumpishInstr instr
- = return ([], instr)
-
- | otherwise
- = joinToTargets' block_live [] id instr (jumpDestsOfInstr instr)
-
------
-joinToTargets'
- :: (FR freeRegs, Instruction instr, Outputable instr)
- => BlockMap RegSet -- ^ maps the unique of the blockid to the set of vregs
- -- that are known to be live on the entry to each block.
-
- -> [NatBasicBlock instr] -- ^ acc blocks of fixup code.
-
- -> BlockId -- ^ id of the current block
- -> instr -- ^ branch instr on the end of the source block.
-
- -> [BlockId] -- ^ branch destinations still to consider.
-
- -> RegM freeRegs ([NatBasicBlock instr], instr)
-
--- no more targets to consider. all done.
-joinToTargets' _ new_blocks _ instr []
- = return (new_blocks, instr)
-
--- handle a branch target.
-joinToTargets' block_live new_blocks block_id instr (dest:dests)
- = do
- -- get the map of where the vregs are stored on entry to each basic block.
- block_assig <- getBlockAssigR
-
- -- get the assignment on entry to the branch instruction.
- assig <- getAssigR
-
- -- adjust the current assignment to remove any vregs that are not live
- -- on entry to the destination block.
- let Just live_set = mapLookup dest block_live
- let still_live uniq _ = uniq `elemUniqSet_Directly` live_set
- let adjusted_assig = filterUFM_Directly still_live assig
-
- -- and free up those registers which are now free.
- let to_free =
- [ r | (reg, loc) <- nonDetUFMToList assig
- -- This is non-deterministic but we do not
- -- currently support deterministic code-generation.
- -- See Note [Unique Determinism and code generation]
- , not (elemUniqSet_Directly reg live_set)
- , r <- regsOfLoc loc ]
-
- case mapLookup dest block_assig of
- Nothing
- -> joinToTargets_first
- block_live new_blocks block_id instr dest dests
- block_assig adjusted_assig to_free
-
- Just (_, dest_assig)
- -> joinToTargets_again
- block_live new_blocks block_id instr dest dests
- adjusted_assig dest_assig
-
-
--- this is the first time we jumped to this block.
-joinToTargets_first :: (FR freeRegs, Instruction instr, Outputable instr)
- => BlockMap RegSet
- -> [NatBasicBlock instr]
- -> BlockId
- -> instr
- -> BlockId
- -> [BlockId]
- -> BlockAssignment freeRegs
- -> RegMap Loc
- -> [RealReg]
- -> RegM freeRegs ([NatBasicBlock instr], instr)
-joinToTargets_first block_live new_blocks block_id instr dest dests
- block_assig src_assig
- to_free
-
- = do dflags <- getDynFlags
- let platform = targetPlatform dflags
-
- -- free up the regs that are not live on entry to this block.
- freeregs <- getFreeRegsR
- let freeregs' = foldl' (flip $ frReleaseReg platform) freeregs to_free
-
- -- remember the current assignment on entry to this block.
- setBlockAssigR (mapInsert dest (freeregs', src_assig) block_assig)
-
- joinToTargets' block_live new_blocks block_id instr dests
-
-
--- we've jumped to this block before
-joinToTargets_again :: (Instruction instr, FR freeRegs, Outputable instr)
- => BlockMap RegSet
- -> [NatBasicBlock instr]
- -> BlockId
- -> instr
- -> BlockId
- -> [BlockId]
- -> UniqFM Loc
- -> UniqFM Loc
- -> RegM freeRegs ([NatBasicBlock instr], instr)
-joinToTargets_again
- block_live new_blocks block_id instr dest dests
- src_assig dest_assig
-
- -- the assignments already match, no problem.
- | nonDetUFMToList dest_assig == nonDetUFMToList src_assig
- -- This is non-deterministic but we do not
- -- currently support deterministic code-generation.
- -- See Note [Unique Determinism and code generation]
- = joinToTargets' block_live new_blocks block_id instr dests
-
- -- assignments don't match, need fixup code
- | otherwise
- = do
-
- -- make a graph of what things need to be moved where.
- let graph = makeRegMovementGraph src_assig dest_assig
-
- -- look for cycles in the graph. This can happen if regs need to be swapped.
- -- Note that we depend on the fact that this function does a
- -- bottom up traversal of the tree-like portions of the graph.
- --
- -- eg, if we have
- -- R1 -> R2 -> R3
- --
- -- ie move value in R1 to R2 and value in R2 to R3.
- --
- -- We need to do the R2 -> R3 move before R1 -> R2.
- --
- let sccs = stronglyConnCompFromEdgedVerticesOrdR graph
-
- -- debugging
- {-
- pprTrace
- ("joinToTargets: making fixup code")
- (vcat [ text " in block: " <> ppr block_id
- , text " jmp instruction: " <> ppr instr
- , text " src assignment: " <> ppr src_assig
- , text " dest assignment: " <> ppr dest_assig
- , text " movement graph: " <> ppr graph
- , text " sccs of graph: " <> ppr sccs
- , text ""])
- (return ())
- -}
- delta <- getDeltaR
- fixUpInstrs_ <- mapM (handleComponent delta instr) sccs
- let fixUpInstrs = concat fixUpInstrs_
-
- -- make a new basic block containing the fixup code.
- -- A the end of the current block we will jump to the fixup one,
- -- then that will jump to our original destination.
- fixup_block_id <- mkBlockId <$> getUniqueR
- let block = BasicBlock fixup_block_id
- $ fixUpInstrs ++ mkJumpInstr dest
-
- -- if we didn't need any fixups, then don't include the block
- case fixUpInstrs of
- [] -> joinToTargets' block_live new_blocks block_id instr dests
-
- -- patch the original branch instruction so it goes to our
- -- fixup block instead.
- _ -> let instr' = patchJumpInstr instr
- (\bid -> if bid == dest
- then fixup_block_id
- else bid) -- no change!
-
- in do
- {- --debugging
- pprTrace "FixUpEdge info:"
- (
- text "inBlock:" <> ppr block_id $$
- text "instr:" <> ppr instr $$
- text "instr':" <> ppr instr' $$
- text "fixup_block_id':" <>
- ppr fixup_block_id $$
- text "dest:" <> ppr dest
- ) (return ())
- -}
- recordFixupBlock block_id fixup_block_id dest
- joinToTargets' block_live (block : new_blocks)
- block_id instr' dests
-
-
--- | Construct a graph of register\/spill movements.
---
--- Cyclic components seem to occur only very rarely.
---
--- We cut some corners by not handling memory-to-memory moves.
--- This shouldn't happen because every temporary gets its own stack slot.
---
-makeRegMovementGraph :: RegMap Loc -> RegMap Loc -> [Node Loc Unique]
-makeRegMovementGraph adjusted_assig dest_assig
- = [ node | (vreg, src) <- nonDetUFMToList adjusted_assig
- -- This is non-deterministic but we do not
- -- currently support deterministic code-generation.
- -- See Note [Unique Determinism and code generation]
- -- source reg might not be needed at the dest:
- , Just loc <- [lookupUFM_Directly dest_assig vreg]
- , node <- expandNode vreg src loc ]
-
-
--- | Expand out the destination, so InBoth destinations turn into
--- a combination of InReg and InMem.
-
--- The InBoth handling is a little tricky here. If the destination is
--- InBoth, then we must ensure that the value ends up in both locations.
--- An InBoth destination must conflict with an InReg or InMem source, so
--- we expand an InBoth destination as necessary.
---
--- An InBoth source is slightly different: we only care about the register
--- that the source value is in, so that we can move it to the destinations.
---
-expandNode
- :: a
- -> Loc -- ^ source of move
- -> Loc -- ^ destination of move
- -> [Node Loc a ]
-
-expandNode vreg loc@(InReg src) (InBoth dst mem)
- | src == dst = [DigraphNode vreg loc [InMem mem]]
- | otherwise = [DigraphNode vreg loc [InReg dst, InMem mem]]
-
-expandNode vreg loc@(InMem src) (InBoth dst mem)
- | src == mem = [DigraphNode vreg loc [InReg dst]]
- | otherwise = [DigraphNode vreg loc [InReg dst, InMem mem]]
-
-expandNode _ (InBoth _ src) (InMem dst)
- | src == dst = [] -- guaranteed to be true
-
-expandNode _ (InBoth src _) (InReg dst)
- | src == dst = []
-
-expandNode vreg (InBoth src _) dst
- = expandNode vreg (InReg src) dst
-
-expandNode vreg src dst
- | src == dst = []
- | otherwise = [DigraphNode vreg src [dst]]
-
-
--- | Generate fixup code for a particular component in the move graph
--- This component tells us what values need to be moved to what
--- destinations. We have eliminated any possibility of single-node
--- cycles in expandNode above.
---
-handleComponent
- :: Instruction instr
- => Int -> instr -> SCC (Node Loc Unique)
- -> RegM freeRegs [instr]
-
--- If the graph is acyclic then we won't get the swapping problem below.
--- In this case we can just do the moves directly, and avoid having to
--- go via a spill slot.
---
-handleComponent delta _ (AcyclicSCC (DigraphNode vreg src dsts))
- = mapM (makeMove delta vreg src) dsts
-
-
--- Handle some cyclic moves.
--- This can happen if we have two regs that need to be swapped.
--- eg:
--- vreg source loc dest loc
--- (vreg1, InReg r1, [InReg r2])
--- (vreg2, InReg r2, [InReg r1])
---
--- To avoid needing temp register, we just spill all the source regs, then
--- reaload them into their destination regs.
---
--- Note that we can not have cycles that involve memory locations as
--- sources as single destination because memory locations (stack slots)
--- are allocated exclusively for a virtual register and therefore can not
--- require a fixup.
---
-handleComponent delta instr
- (CyclicSCC ((DigraphNode vreg (InReg sreg) ((InReg dreg: _))) : rest))
- -- dest list may have more than one element, if the reg is also InMem.
- = do
- -- spill the source into its slot
- (instrSpill, slot)
- <- spillR (RegReal sreg) vreg
-
- -- reload into destination reg
- instrLoad <- loadR (RegReal dreg) slot
-
- remainingFixUps <- mapM (handleComponent delta instr)
- (stronglyConnCompFromEdgedVerticesOrdR rest)
-
- -- make sure to do all the reloads after all the spills,
- -- so we don't end up clobbering the source values.
- return ([instrSpill] ++ concat remainingFixUps ++ [instrLoad])
-
-handleComponent _ _ (CyclicSCC _)
- = panic "Register Allocator: handleComponent cyclic"
-
-
--- | Move a vreg between these two locations.
---
-makeMove
- :: Instruction instr
- => Int -- ^ current C stack delta.
- -> Unique -- ^ unique of the vreg that we're moving.
- -> Loc -- ^ source location.
- -> Loc -- ^ destination location.
- -> RegM freeRegs instr -- ^ move instruction.
-
-makeMove delta vreg src dst
- = do dflags <- getDynFlags
- let platform = targetPlatform dflags
-
- case (src, dst) of
- (InReg s, InReg d) ->
- do recordSpill (SpillJoinRR vreg)
- return $ mkRegRegMoveInstr platform (RegReal s) (RegReal d)
- (InMem s, InReg d) ->
- do recordSpill (SpillJoinRM vreg)
- return $ mkLoadInstr dflags (RegReal d) delta s
- (InReg s, InMem d) ->
- do recordSpill (SpillJoinRM vreg)
- return $ mkSpillInstr dflags (RegReal s) delta d
- _ ->
- -- we don't handle memory to memory moves.
- -- they shouldn't happen because we don't share
- -- stack slots between vregs.
- panic ("makeMove " ++ show vreg ++ " (" ++ show src ++ ") ("
- ++ show dst ++ ")"
- ++ " we don't handle mem->mem moves.")
-
diff --git a/compiler/nativeGen/RegAlloc/Linear/Main.hs b/compiler/nativeGen/RegAlloc/Linear/Main.hs
deleted file mode 100644
index 076b63a4ed..0000000000
--- a/compiler/nativeGen/RegAlloc/Linear/Main.hs
+++ /dev/null
@@ -1,920 +0,0 @@
-{-# LANGUAGE BangPatterns, CPP, ScopedTypeVariables #-}
-
-{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
-
------------------------------------------------------------------------------
---
--- The register allocator
---
--- (c) The University of Glasgow 2004
---
------------------------------------------------------------------------------
-
-{-
-The algorithm is roughly:
-
- 1) Compute strongly connected components of the basic block list.
-
- 2) Compute liveness (mapping from pseudo register to
- point(s) of death?).
-
- 3) Walk instructions in each basic block. We keep track of
- (a) Free real registers (a bitmap?)
- (b) Current assignment of temporaries to machine registers and/or
- spill slots (call this the "assignment").
- (c) Partial mapping from basic block ids to a virt-to-loc mapping.
- When we first encounter a branch to a basic block,
- we fill in its entry in this table with the current mapping.
-
- For each instruction:
- (a) For each temporary *read* by the instruction:
- If the temporary does not have a real register allocation:
- - Allocate a real register from the free list. If
- the list is empty:
- - Find a temporary to spill. Pick one that is
- not used in this instruction (ToDo: not
- used for a while...)
- - generate a spill instruction
- - If the temporary was previously spilled,
- generate an instruction to read the temp from its spill loc.
- (optimisation: if we can see that a real register is going to
- be used soon, then don't use it for allocation).
-
- (b) For each real register clobbered by this instruction:
- If a temporary resides in it,
- If the temporary is live after this instruction,
- Move the temporary to another (non-clobbered & free) reg,
- or spill it to memory. Mark the temporary as residing
- in both memory and a register if it was spilled (it might
- need to be read by this instruction).
-
- (ToDo: this is wrong for jump instructions?)
-
- We do this after step (a), because if we start with
- movq v1, %rsi
- which is an instruction that clobbers %rsi, if v1 currently resides
- in %rsi we want to get
- movq %rsi, %freereg
- movq %rsi, %rsi -- will disappear
- instead of
- movq %rsi, %freereg
- movq %freereg, %rsi
-
- (c) Update the current assignment
-
- (d) If the instruction is a branch:
- if the destination block already has a register assignment,
- Generate a new block with fixup code and redirect the
- jump to the new block.
- else,
- Update the block id->assignment mapping with the current
- assignment.
-
- (e) Delete all register assignments for temps which are read
- (only) and die here. Update the free register list.
-
- (f) Mark all registers clobbered by this instruction as not free,
- and mark temporaries which have been spilled due to clobbering
- as in memory (step (a) marks then as in both mem & reg).
-
- (g) For each temporary *written* by this instruction:
- Allocate a real register as for (b), spilling something
- else if necessary.
- - except when updating the assignment, drop any memory
- locations that the temporary was previously in, since
- they will be no longer valid after this instruction.
-
- (h) Delete all register assignments for temps which are
- written and die here (there should rarely be any). Update
- the free register list.
-
- (i) Rewrite the instruction with the new mapping.
-
- (j) For each spilled reg known to be now dead, re-add its stack slot
- to the free list.
-
--}
-
-module RegAlloc.Linear.Main (
- regAlloc,
- module RegAlloc.Linear.Base,
- module RegAlloc.Linear.Stats
- ) where
-
-#include "HsVersions.h"
-
-
-import GhcPrelude
-
-import RegAlloc.Linear.State
-import RegAlloc.Linear.Base
-import RegAlloc.Linear.StackMap
-import RegAlloc.Linear.FreeRegs
-import RegAlloc.Linear.Stats
-import RegAlloc.Linear.JoinToTargets
-import qualified RegAlloc.Linear.PPC.FreeRegs as PPC
-import qualified RegAlloc.Linear.SPARC.FreeRegs as SPARC
-import qualified RegAlloc.Linear.X86.FreeRegs as X86
-import qualified RegAlloc.Linear.X86_64.FreeRegs as X86_64
-import TargetReg
-import RegAlloc.Liveness
-import Instruction
-import Reg
-
-import GHC.Cmm.BlockId
-import GHC.Cmm.Dataflow.Collections
-import GHC.Cmm hiding (RegSet)
-
-import Digraph
-import GHC.Driver.Session
-import Unique
-import UniqSet
-import UniqFM
-import UniqSupply
-import Outputable
-import GHC.Platform
-
-import Data.Maybe
-import Data.List
-import Control.Monad
-
--- -----------------------------------------------------------------------------
--- Top level of the register allocator
-
--- Allocate registers
-regAlloc
- :: (Outputable instr, Instruction instr)
- => DynFlags
- -> LiveCmmDecl statics instr
- -> UniqSM ( NatCmmDecl statics instr
- , Maybe Int -- number of extra stack slots required,
- -- beyond maxSpillSlots
- , Maybe RegAllocStats
- )
-
-regAlloc _ (CmmData sec d)
- = return
- ( CmmData sec d
- , Nothing
- , Nothing )
-
-regAlloc _ (CmmProc (LiveInfo info _ _ _) lbl live [])
- = return ( CmmProc info lbl live (ListGraph [])
- , Nothing
- , Nothing )
-
-regAlloc dflags (CmmProc static lbl live sccs)
- | LiveInfo info entry_ids@(first_id:_) block_live _ <- static
- = do
- -- do register allocation on each component.
- (final_blocks, stats, stack_use)
- <- linearRegAlloc dflags entry_ids block_live sccs
-
- -- make sure the block that was first in the input list
- -- stays at the front of the output
- let ((first':_), rest')
- = partition ((== first_id) . blockId) final_blocks
-
- let max_spill_slots = maxSpillSlots dflags
- extra_stack
- | stack_use > max_spill_slots
- = Just (stack_use - max_spill_slots)
- | otherwise
- = Nothing
-
- return ( CmmProc info lbl live (ListGraph (first' : rest'))
- , extra_stack
- , Just stats)
-
--- bogus. to make non-exhaustive match warning go away.
-regAlloc _ (CmmProc _ _ _ _)
- = panic "RegAllocLinear.regAlloc: no match"
-
-
--- -----------------------------------------------------------------------------
--- Linear sweep to allocate registers
-
-
--- | Do register allocation on some basic blocks.
--- But be careful to allocate a block in an SCC only if it has
--- an entry in the block map or it is the first block.
---
-linearRegAlloc
- :: (Outputable instr, Instruction instr)
- => DynFlags
- -> [BlockId] -- ^ entry points
- -> BlockMap RegSet
- -- ^ live regs on entry to each basic block
- -> [SCC (LiveBasicBlock instr)]
- -- ^ instructions annotated with "deaths"
- -> UniqSM ([NatBasicBlock instr], RegAllocStats, Int)
-
-linearRegAlloc dflags entry_ids block_live sccs
- = case platformArch platform of
- ArchX86 -> go $ (frInitFreeRegs platform :: X86.FreeRegs)
- ArchX86_64 -> go $ (frInitFreeRegs platform :: X86_64.FreeRegs)
- ArchS390X -> panic "linearRegAlloc ArchS390X"
- ArchSPARC -> go $ (frInitFreeRegs platform :: SPARC.FreeRegs)
- ArchSPARC64 -> panic "linearRegAlloc ArchSPARC64"
- ArchPPC -> go $ (frInitFreeRegs platform :: PPC.FreeRegs)
- ArchARM _ _ _ -> panic "linearRegAlloc ArchARM"
- ArchARM64 -> panic "linearRegAlloc ArchARM64"
- ArchPPC_64 _ -> go $ (frInitFreeRegs platform :: PPC.FreeRegs)
- ArchAlpha -> panic "linearRegAlloc ArchAlpha"
- ArchMipseb -> panic "linearRegAlloc ArchMipseb"
- ArchMipsel -> panic "linearRegAlloc ArchMipsel"
- ArchJavaScript -> panic "linearRegAlloc ArchJavaScript"
- ArchUnknown -> panic "linearRegAlloc ArchUnknown"
- where
- go f = linearRegAlloc' dflags f entry_ids block_live sccs
- platform = targetPlatform dflags
-
-linearRegAlloc'
- :: (FR freeRegs, Outputable instr, Instruction instr)
- => DynFlags
- -> freeRegs
- -> [BlockId] -- ^ entry points
- -> BlockMap RegSet -- ^ live regs on entry to each basic block
- -> [SCC (LiveBasicBlock instr)] -- ^ instructions annotated with "deaths"
- -> UniqSM ([NatBasicBlock instr], RegAllocStats, Int)
-
-linearRegAlloc' dflags initFreeRegs entry_ids block_live sccs
- = do us <- getUniqueSupplyM
- let (_, stack, stats, blocks) =
- runR dflags mapEmpty initFreeRegs emptyRegMap (emptyStackMap dflags) us
- $ linearRA_SCCs entry_ids block_live [] sccs
- return (blocks, stats, getStackUse stack)
-
-
-linearRA_SCCs :: (FR freeRegs, Instruction instr, Outputable instr)
- => [BlockId]
- -> BlockMap RegSet
- -> [NatBasicBlock instr]
- -> [SCC (LiveBasicBlock instr)]
- -> RegM freeRegs [NatBasicBlock instr]
-
-linearRA_SCCs _ _ blocksAcc []
- = return $ reverse blocksAcc
-
-linearRA_SCCs entry_ids block_live blocksAcc (AcyclicSCC block : sccs)
- = do blocks' <- processBlock block_live block
- linearRA_SCCs entry_ids block_live
- ((reverse blocks') ++ blocksAcc)
- sccs
-
-linearRA_SCCs entry_ids block_live blocksAcc (CyclicSCC blocks : sccs)
- = do
- blockss' <- process entry_ids block_live blocks [] (return []) False
- linearRA_SCCs entry_ids block_live
- (reverse (concat blockss') ++ blocksAcc)
- sccs
-
-{- from John Dias's patch 2008/10/16:
- The linear-scan allocator sometimes allocates a block
- before allocating one of its predecessors, which could lead to
- inconsistent allocations. Make it so a block is only allocated
- if a predecessor has set the "incoming" assignments for the block, or
- if it's the procedure's entry block.
-
- BL 2009/02: Careful. If the assignment for a block doesn't get set for
- some reason then this function will loop. We should probably do some
- more sanity checking to guard against this eventuality.
--}
-
-process :: (FR freeRegs, Instruction instr, Outputable instr)
- => [BlockId]
- -> BlockMap RegSet
- -> [GenBasicBlock (LiveInstr instr)]
- -> [GenBasicBlock (LiveInstr instr)]
- -> [[NatBasicBlock instr]]
- -> Bool
- -> RegM freeRegs [[NatBasicBlock instr]]
-
-process _ _ [] [] accum _
- = return $ reverse accum
-
-process entry_ids block_live [] next_round accum madeProgress
- | not madeProgress
-
- {- BUGS: There are so many unreachable blocks in the code the warnings are overwhelming.
- pprTrace "RegAlloc.Linear.Main.process: no progress made, bailing out."
- ( text "Unreachable blocks:"
- $$ vcat (map ppr next_round)) -}
- = return $ reverse accum
-
- | otherwise
- = process entry_ids block_live
- next_round [] accum False
-
-process entry_ids block_live (b@(BasicBlock id _) : blocks)
- next_round accum madeProgress
- = do
- block_assig <- getBlockAssigR
-
- if isJust (mapLookup id block_assig)
- || id `elem` entry_ids
- then do
- b' <- processBlock block_live b
- process entry_ids block_live blocks
- next_round (b' : accum) True
-
- else process entry_ids block_live blocks
- (b : next_round) accum madeProgress
-
-
--- | Do register allocation on this basic block
---
-processBlock
- :: (FR freeRegs, Outputable instr, Instruction instr)
- => BlockMap RegSet -- ^ live regs on entry to each basic block
- -> LiveBasicBlock instr -- ^ block to do register allocation on
- -> RegM freeRegs [NatBasicBlock instr] -- ^ block with registers allocated
-
-processBlock block_live (BasicBlock id instrs)
- = do initBlock id block_live
- (instrs', fixups)
- <- linearRA block_live [] [] id instrs
- return $ BasicBlock id instrs' : fixups
-
-
--- | Load the freeregs and current reg assignment into the RegM state
--- for the basic block with this BlockId.
-initBlock :: FR freeRegs
- => BlockId -> BlockMap RegSet -> RegM freeRegs ()
-initBlock id block_live
- = do dflags <- getDynFlags
- let platform = targetPlatform dflags
- block_assig <- getBlockAssigR
- case mapLookup id block_assig of
- -- no prior info about this block: we must consider
- -- any fixed regs to be allocated, but we can ignore
- -- virtual regs (presumably this is part of a loop,
- -- and we'll iterate again). The assignment begins
- -- empty.
- Nothing
- -> do -- pprTrace "initFreeRegs" (text $ show initFreeRegs) (return ())
- case mapLookup id block_live of
- Nothing ->
- setFreeRegsR (frInitFreeRegs platform)
- Just live ->
- setFreeRegsR $ foldl' (flip $ frAllocateReg platform) (frInitFreeRegs platform)
- [ r | RegReal r <- nonDetEltsUniqSet live ]
- -- See Note [Unique Determinism and code generation]
- setAssigR emptyRegMap
-
- -- load info about register assignments leading into this block.
- Just (freeregs, assig)
- -> do setFreeRegsR freeregs
- setAssigR assig
-
-
--- | Do allocation for a sequence of instructions.
-linearRA
- :: (FR freeRegs, Outputable instr, Instruction instr)
- => BlockMap RegSet -- ^ map of what vregs are live on entry to each block.
- -> [instr] -- ^ accumulator for instructions already processed.
- -> [NatBasicBlock instr] -- ^ accumulator for blocks of fixup code.
- -> BlockId -- ^ id of the current block, for debugging.
- -> [LiveInstr instr] -- ^ liveness annotated instructions in this block.
-
- -> RegM freeRegs
- ( [instr] -- instructions after register allocation
- , [NatBasicBlock instr]) -- fresh blocks of fixup code.
-
-
-linearRA _ accInstr accFixup _ []
- = return
- ( reverse accInstr -- instrs need to be returned in the correct order.
- , accFixup) -- it doesn't matter what order the fixup blocks are returned in.
-
-
-linearRA block_live accInstr accFixups id (instr:instrs)
- = do
- (accInstr', new_fixups) <- raInsn block_live accInstr id instr
-
- linearRA block_live accInstr' (new_fixups ++ accFixups) id instrs
-
-
--- | Do allocation for a single instruction.
-raInsn
- :: (FR freeRegs, Outputable instr, Instruction instr)
- => BlockMap RegSet -- ^ map of what vregs are love on entry to each block.
- -> [instr] -- ^ accumulator for instructions already processed.
- -> BlockId -- ^ the id of the current block, for debugging
- -> LiveInstr instr -- ^ the instr to have its regs allocated, with liveness info.
- -> RegM freeRegs
- ( [instr] -- new instructions
- , [NatBasicBlock instr]) -- extra fixup blocks
-
-raInsn _ new_instrs _ (LiveInstr ii Nothing)
- | Just n <- takeDeltaInstr ii
- = do setDeltaR n
- return (new_instrs, [])
-
-raInsn _ new_instrs _ (LiveInstr ii@(Instr i) Nothing)
- | isMetaInstr ii
- = return (i : new_instrs, [])
-
-
-raInsn block_live new_instrs id (LiveInstr (Instr instr) (Just live))
- = do
- assig <- getAssigR
-
- -- If we have a reg->reg move between virtual registers, where the
- -- src register is not live after this instruction, and the dst
- -- register does not already have an assignment,
- -- and the source register is assigned to a register, not to a spill slot,
- -- then we can eliminate the instruction.
- -- (we can't eliminate it if the source register is on the stack, because
- -- we do not want to use one spill slot for different virtual registers)
- case takeRegRegMoveInstr instr of
- Just (src,dst) | src `elementOfUniqSet` (liveDieRead live),
- isVirtualReg dst,
- not (dst `elemUFM` assig),
- isRealReg src || isInReg src assig -> do
- case src of
- (RegReal rr) -> setAssigR (addToUFM assig dst (InReg rr))
- -- if src is a fixed reg, then we just map dest to this
- -- reg in the assignment. src must be an allocatable reg,
- -- otherwise it wouldn't be in r_dying.
- _virt -> case lookupUFM assig src of
- Nothing -> panic "raInsn"
- Just loc ->
- setAssigR (addToUFM (delFromUFM assig src) dst loc)
-
- -- we have eliminated this instruction
- {-
- freeregs <- getFreeRegsR
- assig <- getAssigR
- pprTrace "raInsn" (text "ELIMINATED: " <> docToSDoc (pprInstr instr)
- $$ ppr r_dying <+> ppr w_dying $$ text (show freeregs) $$ ppr assig) $ do
- -}
- return (new_instrs, [])
-
- _ -> genRaInsn block_live new_instrs id instr
- (nonDetEltsUniqSet $ liveDieRead live)
- (nonDetEltsUniqSet $ liveDieWrite live)
- -- See Note [Unique Determinism and code generation]
-
-raInsn _ _ _ instr
- = pprPanic "raInsn" (text "no match for:" <> ppr instr)
-
--- ToDo: what can we do about
---
--- R1 = x
--- jump I64[x] // [R1]
---
--- where x is mapped to the same reg as R1. We want to coalesce x and
--- R1, but the register allocator doesn't know whether x will be
--- assigned to again later, in which case x and R1 should be in
--- different registers. Right now we assume the worst, and the
--- assignment to R1 will clobber x, so we'll spill x into another reg,
--- generating another reg->reg move.
-
-
-isInReg :: Reg -> RegMap Loc -> Bool
-isInReg src assig | Just (InReg _) <- lookupUFM assig src = True
- | otherwise = False
-
-
-genRaInsn :: (FR freeRegs, Instruction instr, Outputable instr)
- => BlockMap RegSet
- -> [instr]
- -> BlockId
- -> instr
- -> [Reg]
- -> [Reg]
- -> RegM freeRegs ([instr], [NatBasicBlock instr])
-
-genRaInsn block_live new_instrs block_id instr r_dying w_dying = do
- dflags <- getDynFlags
- let platform = targetPlatform dflags
- case regUsageOfInstr platform instr of { RU read written ->
- do
- let real_written = [ rr | (RegReal rr) <- written ]
- let virt_written = [ vr | (RegVirtual vr) <- written ]
-
- -- we don't need to do anything with real registers that are
- -- only read by this instr. (the list is typically ~2 elements,
- -- so using nub isn't a problem).
- let virt_read = nub [ vr | (RegVirtual vr) <- read ]
-
- -- debugging
-{- freeregs <- getFreeRegsR
- assig <- getAssigR
- pprDebugAndThen (defaultDynFlags Settings{ sTargetPlatform=platform } undefined) trace "genRaInsn"
- (ppr instr
- $$ text "r_dying = " <+> ppr r_dying
- $$ text "w_dying = " <+> ppr w_dying
- $$ text "virt_read = " <+> ppr virt_read
- $$ text "virt_written = " <+> ppr virt_written
- $$ text "freeregs = " <+> text (show freeregs)
- $$ text "assig = " <+> ppr assig)
- $ do
--}
-
- -- (a), (b) allocate real regs for all regs read by this instruction.
- (r_spills, r_allocd) <-
- allocateRegsAndSpill True{-reading-} virt_read [] [] virt_read
-
- -- (c) save any temporaries which will be clobbered by this instruction
- clobber_saves <- saveClobberedTemps real_written r_dying
-
- -- (d) Update block map for new destinations
- -- NB. do this before removing dead regs from the assignment, because
- -- these dead regs might in fact be live in the jump targets (they're
- -- only dead in the code that follows in the current basic block).
- (fixup_blocks, adjusted_instr)
- <- joinToTargets block_live block_id instr
-
- -- Debugging - show places where the reg alloc inserted
- -- assignment fixup blocks.
- -- when (not $ null fixup_blocks) $
- -- pprTrace "fixup_blocks" (ppr fixup_blocks) (return ())
-
- -- (e) Delete all register assignments for temps which are read
- -- (only) and die here. Update the free register list.
- releaseRegs r_dying
-
- -- (f) Mark regs which are clobbered as unallocatable
- clobberRegs real_written
-
- -- (g) Allocate registers for temporaries *written* (only)
- (w_spills, w_allocd) <-
- allocateRegsAndSpill False{-writing-} virt_written [] [] virt_written
-
- -- (h) Release registers for temps which are written here and not
- -- used again.
- releaseRegs w_dying
-
- let
- -- (i) Patch the instruction
- patch_map
- = listToUFM
- [ (t, RegReal r)
- | (t, r) <- zip virt_read r_allocd
- ++ zip virt_written w_allocd ]
-
- patched_instr
- = patchRegsOfInstr adjusted_instr patchLookup
-
- patchLookup x
- = case lookupUFM patch_map x of
- Nothing -> x
- Just y -> y
-
-
- -- (j) free up stack slots for dead spilled regs
- -- TODO (can't be bothered right now)
-
- -- erase reg->reg moves where the source and destination are the same.
- -- If the src temp didn't die in this instr but happened to be allocated
- -- to the same real reg as the destination, then we can erase the move anyway.
- let squashed_instr = case takeRegRegMoveInstr patched_instr of
- Just (src, dst)
- | src == dst -> []
- _ -> [patched_instr]
-
- let code = squashed_instr ++ w_spills ++ reverse r_spills
- ++ clobber_saves ++ new_instrs
-
--- pprTrace "patched-code" ((vcat $ map (docToSDoc . pprInstr) code)) $ do
--- pprTrace "pached-fixup" ((ppr fixup_blocks)) $ do
-
- return (code, fixup_blocks)
-
- }
-
--- -----------------------------------------------------------------------------
--- releaseRegs
-
-releaseRegs :: FR freeRegs => [Reg] -> RegM freeRegs ()
-releaseRegs regs = do
- dflags <- getDynFlags
- let platform = targetPlatform dflags
- assig <- getAssigR
- free <- getFreeRegsR
- let loop assig !free [] = do setAssigR assig; setFreeRegsR free; return ()
- loop assig !free (RegReal rr : rs) = loop assig (frReleaseReg platform rr free) rs
- loop assig !free (r:rs) =
- case lookupUFM assig r of
- Just (InBoth real _) -> loop (delFromUFM assig r)
- (frReleaseReg platform real free) rs
- Just (InReg real) -> loop (delFromUFM assig r)
- (frReleaseReg platform real free) rs
- _ -> loop (delFromUFM assig r) free rs
- loop assig free regs
-
-
--- -----------------------------------------------------------------------------
--- Clobber real registers
-
--- For each temp in a register that is going to be clobbered:
--- - if the temp dies after this instruction, do nothing
--- - otherwise, put it somewhere safe (another reg if possible,
--- otherwise spill and record InBoth in the assignment).
--- - for allocateRegs on the temps *read*,
--- - clobbered regs are allocatable.
---
--- for allocateRegs on the temps *written*,
--- - clobbered regs are not allocatable.
---
-
-saveClobberedTemps
- :: (Instruction instr, FR freeRegs)
- => [RealReg] -- real registers clobbered by this instruction
- -> [Reg] -- registers which are no longer live after this insn
- -> RegM freeRegs [instr] -- return: instructions to spill any temps that will
- -- be clobbered.
-
-saveClobberedTemps [] _
- = return []
-
-saveClobberedTemps clobbered dying
- = do
- assig <- getAssigR
- let to_spill
- = [ (temp,reg)
- | (temp, InReg reg) <- nonDetUFMToList assig
- -- This is non-deterministic but we do not
- -- currently support deterministic code-generation.
- -- See Note [Unique Determinism and code generation]
- , any (realRegsAlias reg) clobbered
- , temp `notElem` map getUnique dying ]
-
- (instrs,assig') <- clobber assig [] to_spill
- setAssigR assig'
- return instrs
-
- where
- clobber assig instrs []
- = return (instrs, assig)
-
- clobber assig instrs ((temp, reg) : rest)
- = do dflags <- getDynFlags
- let platform = targetPlatform dflags
-
- freeRegs <- getFreeRegsR
- let regclass = targetClassOfRealReg platform reg
- freeRegs_thisClass = frGetFreeRegs platform regclass freeRegs
-
- case filter (`notElem` clobbered) freeRegs_thisClass of
-
- -- (1) we have a free reg of the right class that isn't
- -- clobbered by this instruction; use it to save the
- -- clobbered value.
- (my_reg : _) -> do
- setFreeRegsR (frAllocateReg platform my_reg freeRegs)
-
- let new_assign = addToUFM assig temp (InReg my_reg)
- let instr = mkRegRegMoveInstr platform
- (RegReal reg) (RegReal my_reg)
-
- clobber new_assign (instr : instrs) rest
-
- -- (2) no free registers: spill the value
- [] -> do
- (spill, slot) <- spillR (RegReal reg) temp
-
- -- record why this reg was spilled for profiling
- recordSpill (SpillClobber temp)
-
- let new_assign = addToUFM assig temp (InBoth reg slot)
-
- clobber new_assign (spill : instrs) rest
-
-
-
--- | Mark all these real regs as allocated,
--- and kick out their vreg assignments.
---
-clobberRegs :: FR freeRegs => [RealReg] -> RegM freeRegs ()
-clobberRegs []
- = return ()
-
-clobberRegs clobbered
- = do dflags <- getDynFlags
- let platform = targetPlatform dflags
-
- freeregs <- getFreeRegsR
- setFreeRegsR $! foldl' (flip $ frAllocateReg platform) freeregs clobbered
-
- assig <- getAssigR
- setAssigR $! clobber assig (nonDetUFMToList assig)
- -- This is non-deterministic but we do not
- -- currently support deterministic code-generation.
- -- See Note [Unique Determinism and code generation]
-
- where
- -- if the temp was InReg and clobbered, then we will have
- -- saved it in saveClobberedTemps above. So the only case
- -- we have to worry about here is InBoth. Note that this
- -- also catches temps which were loaded up during allocation
- -- of read registers, not just those saved in saveClobberedTemps.
-
- clobber assig []
- = assig
-
- clobber assig ((temp, InBoth reg slot) : rest)
- | any (realRegsAlias reg) clobbered
- = clobber (addToUFM assig temp (InMem slot)) rest
-
- clobber assig (_:rest)
- = clobber assig rest
-
--- -----------------------------------------------------------------------------
--- allocateRegsAndSpill
-
--- Why are we performing a spill?
-data SpillLoc = ReadMem StackSlot -- reading from register only in memory
- | WriteNew -- writing to a new variable
- | WriteMem -- writing to register only in memory
--- Note that ReadNew is not valid, since you don't want to be reading
--- from an uninitialized register. We also don't need the location of
--- the register in memory, since that will be invalidated by the write.
--- Technically, we could coalesce WriteNew and WriteMem into a single
--- entry as well. -- EZY
-
--- This function does several things:
--- For each temporary referred to by this instruction,
--- we allocate a real register (spilling another temporary if necessary).
--- We load the temporary up from memory if necessary.
--- We also update the register assignment in the process, and
--- the list of free registers and free stack slots.
-
-allocateRegsAndSpill
- :: (FR freeRegs, Outputable instr, Instruction instr)
- => Bool -- True <=> reading (load up spilled regs)
- -> [VirtualReg] -- don't push these out
- -> [instr] -- spill insns
- -> [RealReg] -- real registers allocated (accum.)
- -> [VirtualReg] -- temps to allocate
- -> RegM freeRegs ( [instr] , [RealReg])
-
-allocateRegsAndSpill _ _ spills alloc []
- = return (spills, reverse alloc)
-
-allocateRegsAndSpill reading keep spills alloc (r:rs)
- = do assig <- getAssigR
- let doSpill = allocRegsAndSpill_spill reading keep spills alloc r rs assig
- case lookupUFM assig r of
- -- case (1a): already in a register
- Just (InReg my_reg) ->
- allocateRegsAndSpill reading keep spills (my_reg:alloc) rs
-
- -- case (1b): already in a register (and memory)
- -- NB1. if we're writing this register, update its assignment to be
- -- InReg, because the memory value is no longer valid.
- -- NB2. This is why we must process written registers here, even if they
- -- are also read by the same instruction.
- Just (InBoth my_reg _)
- -> do when (not reading) (setAssigR (addToUFM assig r (InReg my_reg)))
- allocateRegsAndSpill reading keep spills (my_reg:alloc) rs
-
- -- Not already in a register, so we need to find a free one...
- Just (InMem slot) | reading -> doSpill (ReadMem slot)
- | otherwise -> doSpill WriteMem
- Nothing | reading ->
- pprPanic "allocateRegsAndSpill: Cannot read from uninitialized register" (ppr r)
- -- NOTE: if the input to the NCG contains some
- -- unreachable blocks with junk code, this panic
- -- might be triggered. Make sure you only feed
- -- sensible code into the NCG. In GHC.Cmm.Pipeline we
- -- call removeUnreachableBlocks at the end for this
- -- reason.
-
- | otherwise -> doSpill WriteNew
-
-
--- reading is redundant with reason, but we keep it around because it's
--- convenient and it maintains the recursive structure of the allocator. -- EZY
-allocRegsAndSpill_spill :: (FR freeRegs, Instruction instr, Outputable instr)
- => Bool
- -> [VirtualReg]
- -> [instr]
- -> [RealReg]
- -> VirtualReg
- -> [VirtualReg]
- -> UniqFM Loc
- -> SpillLoc
- -> RegM freeRegs ([instr], [RealReg])
-allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc
- = do dflags <- getDynFlags
- let platform = targetPlatform dflags
- freeRegs <- getFreeRegsR
- let freeRegs_thisClass = frGetFreeRegs platform (classOfVirtualReg r) freeRegs
-
- case freeRegs_thisClass of
-
- -- case (2): we have a free register
- (my_reg : _) ->
- do spills' <- loadTemp r spill_loc my_reg spills
-
- setAssigR (addToUFM assig r $! newLocation spill_loc my_reg)
- setFreeRegsR $ frAllocateReg platform my_reg freeRegs
-
- allocateRegsAndSpill reading keep spills' (my_reg : alloc) rs
-
-
- -- case (3): we need to push something out to free up a register
- [] ->
- do let inRegOrBoth (InReg _) = True
- inRegOrBoth (InBoth _ _) = True
- inRegOrBoth _ = False
- let candidates' =
- flip delListFromUFM keep $
- filterUFM inRegOrBoth $
- assig
- -- This is non-deterministic but we do not
- -- currently support deterministic code-generation.
- -- See Note [Unique Determinism and code generation]
- let candidates = nonDetUFMToList candidates'
-
- -- the vregs we could kick out that are already in a slot
- let candidates_inBoth
- = [ (temp, reg, mem)
- | (temp, InBoth reg mem) <- candidates
- , targetClassOfRealReg platform reg == classOfVirtualReg r ]
-
- -- the vregs we could kick out that are only in a reg
- -- this would require writing the reg to a new slot before using it.
- let candidates_inReg
- = [ (temp, reg)
- | (temp, InReg reg) <- candidates
- , targetClassOfRealReg platform reg == classOfVirtualReg r ]
-
- let result
-
- -- we have a temporary that is in both register and mem,
- -- just free up its register for use.
- | (temp, my_reg, slot) : _ <- candidates_inBoth
- = do spills' <- loadTemp r spill_loc my_reg spills
- let assig1 = addToUFM assig temp (InMem slot)
- let assig2 = addToUFM assig1 r $! newLocation spill_loc my_reg
-
- setAssigR assig2
- allocateRegsAndSpill reading keep spills' (my_reg:alloc) rs
-
- -- otherwise, we need to spill a temporary that currently
- -- resides in a register.
- | (temp_to_push_out, (my_reg :: RealReg)) : _
- <- candidates_inReg
- = do
- (spill_insn, slot) <- spillR (RegReal my_reg) temp_to_push_out
- let spill_store = (if reading then id else reverse)
- [ -- COMMENT (fsLit "spill alloc")
- spill_insn ]
-
- -- record that this temp was spilled
- recordSpill (SpillAlloc temp_to_push_out)
-
- -- update the register assignment
- let assig1 = addToUFM assig temp_to_push_out (InMem slot)
- let assig2 = addToUFM assig1 r $! newLocation spill_loc my_reg
- setAssigR assig2
-
- -- if need be, load up a spilled temp into the reg we've just freed up.
- spills' <- loadTemp r spill_loc my_reg spills
-
- allocateRegsAndSpill reading keep
- (spill_store ++ spills')
- (my_reg:alloc) rs
-
-
- -- there wasn't anything to spill, so we're screwed.
- | otherwise
- = pprPanic ("RegAllocLinear.allocRegsAndSpill: no spill candidates\n")
- $ vcat
- [ text "allocating vreg: " <> text (show r)
- , text "assignment: " <> ppr assig
- , text "freeRegs: " <> text (show freeRegs)
- , text "initFreeRegs: " <> text (show (frInitFreeRegs platform `asTypeOf` freeRegs)) ]
-
- result
-
-
--- | Calculate a new location after a register has been loaded.
-newLocation :: SpillLoc -> RealReg -> Loc
--- if the tmp was read from a slot, then now its in a reg as well
-newLocation (ReadMem slot) my_reg = InBoth my_reg slot
--- writes will always result in only the register being available
-newLocation _ my_reg = InReg my_reg
-
--- | Load up a spilled temporary if we need to (read from memory).
-loadTemp
- :: (Instruction instr)
- => VirtualReg -- the temp being loaded
- -> SpillLoc -- the current location of this temp
- -> RealReg -- the hreg to load the temp into
- -> [instr]
- -> RegM freeRegs [instr]
-
-loadTemp vreg (ReadMem slot) hreg spills
- = do
- insn <- loadR (RegReal hreg) slot
- recordSpill (SpillLoad $ getUnique vreg)
- return $ {- COMMENT (fsLit "spill load") : -} insn : spills
-
-loadTemp _ _ _ spills =
- return spills
-
diff --git a/compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs
deleted file mode 100644
index 1239380ba2..0000000000
--- a/compiler/nativeGen/RegAlloc/Linear/PPC/FreeRegs.hs
+++ /dev/null
@@ -1,61 +0,0 @@
--- | Free regs map for PowerPC
-module RegAlloc.Linear.PPC.FreeRegs
-where
-
-import GhcPrelude
-
-import PPC.Regs
-import RegClass
-import Reg
-
-import Outputable
-import GHC.Platform
-
-import Data.Word
-import Data.Bits
-
--- The PowerPC has 32 integer and 32 floating point registers.
--- This is 32bit PowerPC, so Word64 is inefficient - two Word32s are much
--- better.
--- Note that when getFreeRegs scans for free registers, it starts at register
--- 31 and counts down. This is a hack for the PowerPC - the higher-numbered
--- registers are callee-saves, while the lower regs are caller-saves, so it
--- makes sense to start at the high end.
--- Apart from that, the code does nothing PowerPC-specific, so feel free to
--- add your favourite platform to the #if (if you have 64 registers but only
--- 32-bit words).
-
-data FreeRegs = FreeRegs !Word32 !Word32
- deriving( Show ) -- The Show is used in an ASSERT
-
-noFreeRegs :: FreeRegs
-noFreeRegs = FreeRegs 0 0
-
-releaseReg :: RealReg -> FreeRegs -> FreeRegs
-releaseReg (RealRegSingle r) (FreeRegs g f)
- | r > 31 = FreeRegs g (f .|. (1 `shiftL` (r - 32)))
- | otherwise = FreeRegs (g .|. (1 `shiftL` r)) f
-
-releaseReg _ _
- = panic "RegAlloc.Linear.PPC.releaseReg: bad reg"
-
-initFreeRegs :: Platform -> FreeRegs
-initFreeRegs platform = foldl' (flip releaseReg) noFreeRegs (allocatableRegs platform)
-
-getFreeRegs :: RegClass -> FreeRegs -> [RealReg] -- lazily
-getFreeRegs cls (FreeRegs g f)
- | RcDouble <- cls = go f (0x80000000) 63
- | RcInteger <- cls = go g (0x80000000) 31
- | otherwise = pprPanic "RegAllocLinear.getFreeRegs: Bad register class" (ppr cls)
- where
- go _ 0 _ = []
- go x m i | x .&. m /= 0 = RealRegSingle i : (go x (m `shiftR` 1) $! i-1)
- | otherwise = go x (m `shiftR` 1) $! i-1
-
-allocateReg :: RealReg -> FreeRegs -> FreeRegs
-allocateReg (RealRegSingle r) (FreeRegs g f)
- | r > 31 = FreeRegs g (f .&. complement (1 `shiftL` (r - 32)))
- | otherwise = FreeRegs (g .&. complement (1 `shiftL` r)) f
-
-allocateReg _ _
- = panic "RegAlloc.Linear.PPC.allocateReg: bad reg"
diff --git a/compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs
deleted file mode 100644
index fc67159f0f..0000000000
--- a/compiler/nativeGen/RegAlloc/Linear/SPARC/FreeRegs.hs
+++ /dev/null
@@ -1,189 +0,0 @@
-{-# LANGUAGE CPP #-}
-
--- | Free regs map for SPARC
-module RegAlloc.Linear.SPARC.FreeRegs
-where
-
-import GhcPrelude
-
-import SPARC.Regs
-import RegClass
-import Reg
-
-import GHC.Platform.Regs
-import Outputable
-import GHC.Platform
-
-import Data.Word
-import Data.Bits
-
-
---------------------------------------------------------------------------------
--- SPARC is like PPC, except for twinning of floating point regs.
--- When we allocate a double reg we must take an even numbered
--- float reg, as well as the one after it.
-
-
--- Holds bitmaps showing what registers are currently allocated.
--- The float and double reg bitmaps overlap, but we only alloc
--- float regs into the float map, and double regs into the double map.
---
--- Free regs have a bit set in the corresponding bitmap.
---
-data FreeRegs
- = FreeRegs
- !Word32 -- int reg bitmap regs 0..31
- !Word32 -- float reg bitmap regs 32..63
- !Word32 -- double reg bitmap regs 32..63
-
-instance Show FreeRegs where
- show = showFreeRegs
-
--- | A reg map where no regs are free to be allocated.
-noFreeRegs :: FreeRegs
-noFreeRegs = FreeRegs 0 0 0
-
-
--- | The initial set of free regs.
-initFreeRegs :: Platform -> FreeRegs
-initFreeRegs platform
- = foldl' (flip $ releaseReg platform) noFreeRegs allocatableRegs
-
-
--- | Get all the free registers of this class.
-getFreeRegs :: RegClass -> FreeRegs -> [RealReg] -- lazily
-getFreeRegs cls (FreeRegs g f d)
- | RcInteger <- cls = map RealRegSingle $ go 1 g 1 0
- | RcFloat <- cls = map RealRegSingle $ go 1 f 1 32
- | RcDouble <- cls = map (\i -> RealRegPair i (i+1)) $ go 2 d 1 32
-#if __GLASGOW_HASKELL__ <= 810
- | otherwise = pprPanic "RegAllocLinear.getFreeRegs: Bad register class " (ppr cls)
-#endif
- where
- go _ _ 0 _
- = []
-
- go step bitmap mask ix
- | bitmap .&. mask /= 0
- = ix : (go step bitmap (mask `shiftL` step) $! ix + step)
-
- | otherwise
- = go step bitmap (mask `shiftL` step) $! ix + step
-
-
--- | Grab a register.
-allocateReg :: Platform -> RealReg -> FreeRegs -> FreeRegs
-allocateReg platform
- reg@(RealRegSingle r)
- (FreeRegs g f d)
-
- -- can't allocate free regs
- | not $ freeReg platform r
- = pprPanic "SPARC.FreeRegs.allocateReg: not allocating pinned reg" (ppr reg)
-
- -- a general purpose reg
- | r <= 31
- = let mask = complement (bitMask r)
- in FreeRegs
- (g .&. mask)
- f
- d
-
- -- a float reg
- | r >= 32, r <= 63
- = let mask = complement (bitMask (r - 32))
-
- -- the mask of the double this FP reg aliases
- maskLow = if r `mod` 2 == 0
- then complement (bitMask (r - 32))
- else complement (bitMask (r - 32 - 1))
- in FreeRegs
- g
- (f .&. mask)
- (d .&. maskLow)
-
- | otherwise
- = pprPanic "SPARC.FreeRegs.releaseReg: not allocating bad reg" (ppr reg)
-
-allocateReg _
- reg@(RealRegPair r1 r2)
- (FreeRegs g f d)
-
- | r1 >= 32, r1 <= 63, r1 `mod` 2 == 0
- , r2 >= 32, r2 <= 63
- = let mask1 = complement (bitMask (r1 - 32))
- mask2 = complement (bitMask (r2 - 32))
- in
- FreeRegs
- g
- ((f .&. mask1) .&. mask2)
- (d .&. mask1)
-
- | otherwise
- = pprPanic "SPARC.FreeRegs.releaseReg: not allocating bad reg" (ppr reg)
-
-
-
--- | Release a register from allocation.
--- The register liveness information says that most regs die after a C call,
--- but we still don't want to allocate to some of them.
---
-releaseReg :: Platform -> RealReg -> FreeRegs -> FreeRegs
-releaseReg platform
- reg@(RealRegSingle r)
- regs@(FreeRegs g f d)
-
- -- don't release pinned reg
- | not $ freeReg platform r
- = regs
-
- -- a general purpose reg
- | r <= 31
- = let mask = bitMask r
- in FreeRegs (g .|. mask) f d
-
- -- a float reg
- | r >= 32, r <= 63
- = let mask = bitMask (r - 32)
-
- -- the mask of the double this FP reg aliases
- maskLow = if r `mod` 2 == 0
- then bitMask (r - 32)
- else bitMask (r - 32 - 1)
- in FreeRegs
- g
- (f .|. mask)
- (d .|. maskLow)
-
- | otherwise
- = pprPanic "SPARC.FreeRegs.releaseReg: not releasing bad reg" (ppr reg)
-
-releaseReg _
- reg@(RealRegPair r1 r2)
- (FreeRegs g f d)
-
- | r1 >= 32, r1 <= 63, r1 `mod` 2 == 0
- , r2 >= 32, r2 <= 63
- = let mask1 = bitMask (r1 - 32)
- mask2 = bitMask (r2 - 32)
- in
- FreeRegs
- g
- ((f .|. mask1) .|. mask2)
- (d .|. mask1)
-
- | otherwise
- = pprPanic "SPARC.FreeRegs.releaseReg: not releasing bad reg" (ppr reg)
-
-
-
-bitMask :: Int -> Word32
-bitMask n = 1 `shiftL` n
-
-
-showFreeRegs :: FreeRegs -> String
-showFreeRegs regs
- = "FreeRegs\n"
- ++ " integer: " ++ (show $ getFreeRegs RcInteger regs) ++ "\n"
- ++ " float: " ++ (show $ getFreeRegs RcFloat regs) ++ "\n"
- ++ " double: " ++ (show $ getFreeRegs RcDouble regs) ++ "\n"
diff --git a/compiler/nativeGen/RegAlloc/Linear/StackMap.hs b/compiler/nativeGen/RegAlloc/Linear/StackMap.hs
deleted file mode 100644
index 79496c6e43..0000000000
--- a/compiler/nativeGen/RegAlloc/Linear/StackMap.hs
+++ /dev/null
@@ -1,61 +0,0 @@
-
--- | The assignment of virtual registers to stack slots
-
--- We have lots of stack slots. Memory-to-memory moves are a pain on most
--- architectures. Therefore, we avoid having to generate memory-to-memory moves
--- by simply giving every virtual register its own stack slot.
-
--- The StackMap stack map keeps track of virtual register - stack slot
--- associations and of which stack slots are still free. Once it has been
--- associated, a stack slot is never "freed" or removed from the StackMap again,
--- it remains associated until we are done with the current CmmProc.
---
-module RegAlloc.Linear.StackMap (
- StackSlot,
- StackMap(..),
- emptyStackMap,
- getStackSlotFor,
- getStackUse
-)
-
-where
-
-import GhcPrelude
-
-import GHC.Driver.Session
-import UniqFM
-import Unique
-
-
--- | Identifier for a stack slot.
-type StackSlot = Int
-
-data StackMap
- = StackMap
- { -- | The slots that are still available to be allocated.
- stackMapNextFreeSlot :: !Int
-
- -- | Assignment of vregs to stack slots.
- , stackMapAssignment :: UniqFM StackSlot }
-
-
--- | An empty stack map, with all slots available.
-emptyStackMap :: DynFlags -> StackMap
-emptyStackMap _ = StackMap 0 emptyUFM
-
-
--- | If this vreg unique already has a stack assignment then return the slot number,
--- otherwise allocate a new slot, and update the map.
---
-getStackSlotFor :: StackMap -> Unique -> (StackMap, Int)
-
-getStackSlotFor fs@(StackMap _ reserved) reg
- | Just slot <- lookupUFM reserved reg = (fs, slot)
-
-getStackSlotFor (StackMap freeSlot reserved) reg =
- (StackMap (freeSlot+1) (addToUFM reserved reg freeSlot), freeSlot)
-
--- | Return the number of stack slots that were allocated
-getStackUse :: StackMap -> Int
-getStackUse (StackMap freeSlot _) = freeSlot
-
diff --git a/compiler/nativeGen/RegAlloc/Linear/State.hs b/compiler/nativeGen/RegAlloc/Linear/State.hs
deleted file mode 100644
index 9e5efa5f7f..0000000000
--- a/compiler/nativeGen/RegAlloc/Linear/State.hs
+++ /dev/null
@@ -1,184 +0,0 @@
-{-# LANGUAGE CPP, PatternSynonyms, DeriveFunctor #-}
-
-#if !defined(GHC_LOADED_INTO_GHCI)
-{-# LANGUAGE UnboxedTuples #-}
-#endif
-
--- | State monad for the linear register allocator.
-
--- Here we keep all the state that the register allocator keeps track
--- of as it walks the instructions in a basic block.
-
-module RegAlloc.Linear.State (
- RA_State(..),
- RegM,
- runR,
-
- spillR,
- loadR,
-
- getFreeRegsR,
- setFreeRegsR,
-
- getAssigR,
- setAssigR,
-
- getBlockAssigR,
- setBlockAssigR,
-
- setDeltaR,
- getDeltaR,
-
- getUniqueR,
-
- recordSpill,
- recordFixupBlock
-)
-where
-
-import GhcPrelude
-
-import RegAlloc.Linear.Stats
-import RegAlloc.Linear.StackMap
-import RegAlloc.Linear.Base
-import RegAlloc.Liveness
-import Instruction
-import Reg
-import GHC.Cmm.BlockId
-
-import GHC.Driver.Session
-import Unique
-import UniqSupply
-
-import Control.Monad (ap)
-
--- Avoids using unboxed tuples when loading into GHCi
-#if !defined(GHC_LOADED_INTO_GHCI)
-
-type RA_Result freeRegs a = (# RA_State freeRegs, a #)
-
-pattern RA_Result :: a -> b -> (# a, b #)
-pattern RA_Result a b = (# a, b #)
-{-# COMPLETE RA_Result #-}
-#else
-
-data RA_Result freeRegs a = RA_Result {-# UNPACK #-} !(RA_State freeRegs) !a
- deriving (Functor)
-
-#endif
-
--- | The register allocator monad type.
-newtype RegM freeRegs a
- = RegM { unReg :: RA_State freeRegs -> RA_Result freeRegs a }
- deriving (Functor)
-
-instance Applicative (RegM freeRegs) where
- pure a = RegM $ \s -> RA_Result s a
- (<*>) = ap
-
-instance Monad (RegM freeRegs) where
- m >>= k = RegM $ \s -> case unReg m s of { RA_Result s a -> unReg (k a) s }
-
-instance HasDynFlags (RegM a) where
- getDynFlags = RegM $ \s -> RA_Result s (ra_DynFlags s)
-
-
--- | Run a computation in the RegM register allocator monad.
-runR :: DynFlags
- -> BlockAssignment freeRegs
- -> freeRegs
- -> RegMap Loc
- -> StackMap
- -> UniqSupply
- -> RegM freeRegs a
- -> (BlockAssignment freeRegs, StackMap, RegAllocStats, a)
-
-runR dflags block_assig freeregs assig stack us thing =
- case unReg thing
- (RA_State
- { ra_blockassig = block_assig
- , ra_freeregs = freeregs
- , ra_assig = assig
- , ra_delta = 0{-???-}
- , ra_stack = stack
- , ra_us = us
- , ra_spills = []
- , ra_DynFlags = dflags
- , ra_fixups = [] })
- of
- RA_Result state returned_thing
- -> (ra_blockassig state, ra_stack state, makeRAStats state, returned_thing)
-
-
--- | Make register allocator stats from its final state.
-makeRAStats :: RA_State freeRegs -> RegAllocStats
-makeRAStats state
- = RegAllocStats
- { ra_spillInstrs = binSpillReasons (ra_spills state)
- , ra_fixupList = ra_fixups state }
-
-
-spillR :: Instruction instr
- => Reg -> Unique -> RegM freeRegs (instr, Int)
-
-spillR reg temp = RegM $ \ s@RA_State{ra_delta=delta, ra_stack=stack0} ->
- let dflags = ra_DynFlags s
- (stack1,slot) = getStackSlotFor stack0 temp
- instr = mkSpillInstr dflags reg delta slot
- in
- RA_Result s{ra_stack=stack1} (instr,slot)
-
-
-loadR :: Instruction instr
- => Reg -> Int -> RegM freeRegs instr
-
-loadR reg slot = RegM $ \ s@RA_State{ra_delta=delta} ->
- let dflags = ra_DynFlags s
- in RA_Result s (mkLoadInstr dflags reg delta slot)
-
-getFreeRegsR :: RegM freeRegs freeRegs
-getFreeRegsR = RegM $ \ s@RA_State{ra_freeregs = freeregs} ->
- RA_Result s freeregs
-
-setFreeRegsR :: freeRegs -> RegM freeRegs ()
-setFreeRegsR regs = RegM $ \ s ->
- RA_Result s{ra_freeregs = regs} ()
-
-getAssigR :: RegM freeRegs (RegMap Loc)
-getAssigR = RegM $ \ s@RA_State{ra_assig = assig} ->
- RA_Result s assig
-
-setAssigR :: RegMap Loc -> RegM freeRegs ()
-setAssigR assig = RegM $ \ s ->
- RA_Result s{ra_assig=assig} ()
-
-getBlockAssigR :: RegM freeRegs (BlockAssignment freeRegs)
-getBlockAssigR = RegM $ \ s@RA_State{ra_blockassig = assig} ->
- RA_Result s assig
-
-setBlockAssigR :: BlockAssignment freeRegs -> RegM freeRegs ()
-setBlockAssigR assig = RegM $ \ s ->
- RA_Result s{ra_blockassig = assig} ()
-
-setDeltaR :: Int -> RegM freeRegs ()
-setDeltaR n = RegM $ \ s ->
- RA_Result s{ra_delta = n} ()
-
-getDeltaR :: RegM freeRegs Int
-getDeltaR = RegM $ \s -> RA_Result s (ra_delta s)
-
-getUniqueR :: RegM freeRegs Unique
-getUniqueR = RegM $ \s ->
- case takeUniqFromSupply (ra_us s) of
- (uniq, us) -> RA_Result s{ra_us = us} uniq
-
-
--- | Record that a spill instruction was inserted, for profiling.
-recordSpill :: SpillReason -> RegM freeRegs ()
-recordSpill spill
- = RegM $ \s -> RA_Result (s { ra_spills = spill : ra_spills s }) ()
-
--- | Record a created fixup block
-recordFixupBlock :: BlockId -> BlockId -> BlockId -> RegM freeRegs ()
-recordFixupBlock from between to
- = RegM $ \s -> RA_Result (s { ra_fixups = (from,between,to) : ra_fixups s }) ()
diff --git a/compiler/nativeGen/RegAlloc/Linear/Stats.hs b/compiler/nativeGen/RegAlloc/Linear/Stats.hs
deleted file mode 100644
index 74f3c834d0..0000000000
--- a/compiler/nativeGen/RegAlloc/Linear/Stats.hs
+++ /dev/null
@@ -1,87 +0,0 @@
-module RegAlloc.Linear.Stats (
- binSpillReasons,
- countRegRegMovesNat,
- pprStats
-)
-
-where
-
-import GhcPrelude
-
-import RegAlloc.Linear.Base
-import RegAlloc.Liveness
-import Instruction
-
-import UniqFM
-import Outputable
-
-import State
-
--- | Build a map of how many times each reg was alloced, clobbered, loaded etc.
-binSpillReasons
- :: [SpillReason] -> UniqFM [Int]
-
-binSpillReasons reasons
- = addListToUFM_C
- (zipWith (+))
- emptyUFM
- (map (\reason -> case reason of
- SpillAlloc r -> (r, [1, 0, 0, 0, 0])
- SpillClobber r -> (r, [0, 1, 0, 0, 0])
- SpillLoad r -> (r, [0, 0, 1, 0, 0])
- SpillJoinRR r -> (r, [0, 0, 0, 1, 0])
- SpillJoinRM r -> (r, [0, 0, 0, 0, 1])) reasons)
-
-
--- | Count reg-reg moves remaining in this code.
-countRegRegMovesNat
- :: Instruction instr
- => NatCmmDecl statics instr -> Int
-
-countRegRegMovesNat cmm
- = execState (mapGenBlockTopM countBlock cmm) 0
- where
- countBlock b@(BasicBlock _ instrs)
- = do mapM_ countInstr instrs
- return b
-
- countInstr instr
- | Just _ <- takeRegRegMoveInstr instr
- = do modify (+ 1)
- return instr
-
- | otherwise
- = return instr
-
-
--- | Pretty print some RegAllocStats
-pprStats
- :: Instruction instr
- => [NatCmmDecl statics instr] -> [RegAllocStats] -> SDoc
-
-pprStats code statss
- = let -- sum up all the instrs inserted by the spiller
- spills = foldl' (plusUFM_C (zipWith (+)))
- emptyUFM
- $ map ra_spillInstrs statss
-
- spillTotals = foldl' (zipWith (+))
- [0, 0, 0, 0, 0]
- $ nonDetEltsUFM spills
- -- See Note [Unique Determinism and code generation]
-
- -- count how many reg-reg-moves remain in the code
- moves = sum $ map countRegRegMovesNat code
-
- pprSpill (reg, spills)
- = parens $ (hcat $ punctuate (text ", ") (doubleQuotes (ppr reg) : map ppr spills))
-
- in ( text "-- spills-added-total"
- $$ text "-- (allocs, clobbers, loads, joinRR, joinRM, reg_reg_moves_remaining)"
- $$ (parens $ (hcat $ punctuate (text ", ") (map ppr spillTotals ++ [ppr moves])))
- $$ text ""
- $$ text "-- spills-added"
- $$ text "-- (reg_name, allocs, clobbers, loads, joinRR, joinRM)"
- $$ (pprUFMWithKeys spills (vcat . map pprSpill))
- $$ text "")
-
diff --git a/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs
deleted file mode 100644
index e7f8cb4a63..0000000000
--- a/compiler/nativeGen/RegAlloc/Linear/X86/FreeRegs.hs
+++ /dev/null
@@ -1,53 +0,0 @@
-
--- | Free regs map for i386
-module RegAlloc.Linear.X86.FreeRegs
-where
-
-import GhcPrelude
-
-import X86.Regs
-import RegClass
-import Reg
-import Panic
-import GHC.Platform
-
-import Data.Word
-import Data.Bits
-
-newtype FreeRegs = FreeRegs Word32
- deriving Show
-
-noFreeRegs :: FreeRegs
-noFreeRegs = FreeRegs 0
-
-releaseReg :: RealReg -> FreeRegs -> FreeRegs
-releaseReg (RealRegSingle n) (FreeRegs f)
- = FreeRegs (f .|. (1 `shiftL` n))
-
-releaseReg _ _
- = panic "RegAlloc.Linear.X86.FreeRegs.releaseReg: no reg"
-
-initFreeRegs :: Platform -> FreeRegs
-initFreeRegs platform
- = foldl' (flip releaseReg) noFreeRegs (allocatableRegs platform)
-
-getFreeRegs :: Platform -> RegClass -> FreeRegs -> [RealReg] -- lazily
-getFreeRegs platform cls (FreeRegs f) = go f 0
-
- where go 0 _ = []
- go n m
- | n .&. 1 /= 0 && classOfRealReg platform (RealRegSingle m) == cls
- = RealRegSingle m : (go (n `shiftR` 1) $! (m+1))
-
- | otherwise
- = go (n `shiftR` 1) $! (m+1)
- -- ToDo: there's no point looking through all the integer registers
- -- in order to find a floating-point one.
-
-allocateReg :: RealReg -> FreeRegs -> FreeRegs
-allocateReg (RealRegSingle r) (FreeRegs f)
- = FreeRegs (f .&. complement (1 `shiftL` r))
-
-allocateReg _ _
- = panic "RegAlloc.Linear.X86.FreeRegs.allocateReg: no reg"
-
diff --git a/compiler/nativeGen/RegAlloc/Linear/X86_64/FreeRegs.hs b/compiler/nativeGen/RegAlloc/Linear/X86_64/FreeRegs.hs
deleted file mode 100644
index 44a3bbb306..0000000000
--- a/compiler/nativeGen/RegAlloc/Linear/X86_64/FreeRegs.hs
+++ /dev/null
@@ -1,54 +0,0 @@
-
--- | Free regs map for x86_64
-module RegAlloc.Linear.X86_64.FreeRegs
-where
-
-import GhcPrelude
-
-import X86.Regs
-import RegClass
-import Reg
-import Panic
-import GHC.Platform
-
-import Data.Word
-import Data.Bits
-
-newtype FreeRegs = FreeRegs Word64
- deriving Show
-
-noFreeRegs :: FreeRegs
-noFreeRegs = FreeRegs 0
-
-releaseReg :: RealReg -> FreeRegs -> FreeRegs
-releaseReg (RealRegSingle n) (FreeRegs f)
- = FreeRegs (f .|. (1 `shiftL` n))
-
-releaseReg _ _
- = panic "RegAlloc.Linear.X86_64.FreeRegs.releaseReg: no reg"
-
-initFreeRegs :: Platform -> FreeRegs
-initFreeRegs platform
- = foldl' (flip releaseReg) noFreeRegs (allocatableRegs platform)
-
-getFreeRegs :: Platform -> RegClass -> FreeRegs -> [RealReg] -- lazily
-getFreeRegs platform cls (FreeRegs f) = go f 0
-
- where go 0 _ = []
- go n m
- | n .&. 1 /= 0 && classOfRealReg platform (RealRegSingle m) == cls
- = RealRegSingle m : (go (n `shiftR` 1) $! (m+1))
-
- | otherwise
- = go (n `shiftR` 1) $! (m+1)
- -- ToDo: there's no point looking through all the integer registers
- -- in order to find a floating-point one.
-
-allocateReg :: RealReg -> FreeRegs -> FreeRegs
-allocateReg (RealRegSingle r) (FreeRegs f)
- = FreeRegs (f .&. complement (1 `shiftL` r))
-
-allocateReg _ _
- = panic "RegAlloc.Linear.X86_64.FreeRegs.allocateReg: no reg"
-
-
diff --git a/compiler/nativeGen/RegAlloc/Liveness.hs b/compiler/nativeGen/RegAlloc/Liveness.hs
deleted file mode 100644
index b6fd3b3937..0000000000
--- a/compiler/nativeGen/RegAlloc/Liveness.hs
+++ /dev/null
@@ -1,1025 +0,0 @@
-{-# LANGUAGE BangPatterns #-}
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE ScopedTypeVariables #-}
-{-# LANGUAGE TypeFamilies #-}
-
-{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
-
------------------------------------------------------------------------------
---
--- The register liveness determinator
---
--- (c) The University of Glasgow 2004-2013
---
------------------------------------------------------------------------------
-
-module RegAlloc.Liveness (
- RegSet,
- RegMap, emptyRegMap,
- BlockMap, mapEmpty,
- LiveCmmDecl,
- InstrSR (..),
- LiveInstr (..),
- Liveness (..),
- LiveInfo (..),
- LiveBasicBlock,
-
- mapBlockTop, mapBlockTopM, mapSCCM,
- mapGenBlockTop, mapGenBlockTopM,
- stripLive,
- stripLiveBlock,
- slurpConflicts,
- slurpReloadCoalesce,
- eraseDeltasLive,
- patchEraseLive,
- patchRegsLiveInstr,
- reverseBlocksInTops,
- regLiveness,
- cmmTopLiveness
- ) where
-import GhcPrelude
-
-import Reg
-import Instruction
-
-import GHC.Cmm.BlockId
-import CFG
-import GHC.Cmm.Dataflow.Collections
-import GHC.Cmm.Dataflow.Label
-import GHC.Cmm hiding (RegSet, emptyRegSet)
-
-import Digraph
-import GHC.Driver.Session
-import MonadUtils
-import Outputable
-import GHC.Platform
-import UniqSet
-import UniqFM
-import UniqSupply
-import Bag
-import State
-
-import Data.List
-import Data.Maybe
-import Data.IntSet (IntSet)
-
------------------------------------------------------------------------------
-type RegSet = UniqSet Reg
-
-type RegMap a = UniqFM a
-
-emptyRegMap :: UniqFM a
-emptyRegMap = emptyUFM
-
-emptyRegSet :: RegSet
-emptyRegSet = emptyUniqSet
-
-type BlockMap a = LabelMap a
-
-
--- | A top level thing which carries liveness information.
-type LiveCmmDecl statics instr
- = GenCmmDecl
- statics
- LiveInfo
- [SCC (LiveBasicBlock instr)]
-
-
--- | The register allocator also wants to use SPILL/RELOAD meta instructions,
--- so we'll keep those here.
-data InstrSR instr
- -- | A real machine instruction
- = Instr instr
-
- -- | spill this reg to a stack slot
- | SPILL Reg Int
-
- -- | reload this reg from a stack slot
- | RELOAD Int Reg
-
-instance Instruction instr => Instruction (InstrSR instr) where
- regUsageOfInstr platform i
- = case i of
- Instr instr -> regUsageOfInstr platform instr
- SPILL reg _ -> RU [reg] []
- RELOAD _ reg -> RU [] [reg]
-
- patchRegsOfInstr i f
- = case i of
- Instr instr -> Instr (patchRegsOfInstr instr f)
- SPILL reg slot -> SPILL (f reg) slot
- RELOAD slot reg -> RELOAD slot (f reg)
-
- isJumpishInstr i
- = case i of
- Instr instr -> isJumpishInstr instr
- _ -> False
-
- jumpDestsOfInstr i
- = case i of
- Instr instr -> jumpDestsOfInstr instr
- _ -> []
-
- patchJumpInstr i f
- = case i of
- Instr instr -> Instr (patchJumpInstr instr f)
- _ -> i
-
- mkSpillInstr = error "mkSpillInstr[InstrSR]: Not making SPILL meta-instr"
- mkLoadInstr = error "mkLoadInstr[InstrSR]: Not making LOAD meta-instr"
-
- takeDeltaInstr i
- = case i of
- Instr instr -> takeDeltaInstr instr
- _ -> Nothing
-
- isMetaInstr i
- = case i of
- Instr instr -> isMetaInstr instr
- _ -> False
-
- mkRegRegMoveInstr platform r1 r2
- = Instr (mkRegRegMoveInstr platform r1 r2)
-
- takeRegRegMoveInstr i
- = case i of
- Instr instr -> takeRegRegMoveInstr instr
- _ -> Nothing
-
- mkJumpInstr target = map Instr (mkJumpInstr target)
-
- mkStackAllocInstr platform amount =
- Instr <$> mkStackAllocInstr platform amount
-
- mkStackDeallocInstr platform amount =
- Instr <$> mkStackDeallocInstr platform amount
-
-
--- | An instruction with liveness information.
-data LiveInstr instr
- = LiveInstr (InstrSR instr) (Maybe Liveness)
-
--- | Liveness information.
--- The regs which die are ones which are no longer live in the *next* instruction
--- in this sequence.
--- (NB. if the instruction is a jump, these registers might still be live
--- at the jump target(s) - you have to check the liveness at the destination
--- block to find out).
-
-data Liveness
- = Liveness
- { liveBorn :: RegSet -- ^ registers born in this instruction (written to for first time).
- , liveDieRead :: RegSet -- ^ registers that died because they were read for the last time.
- , liveDieWrite :: RegSet } -- ^ registers that died because they were clobbered by something.
-
-
--- | Stash regs live on entry to each basic block in the info part of the cmm code.
-data LiveInfo
- = LiveInfo
- (LabelMap RawCmmStatics) -- cmm info table static stuff
- [BlockId] -- entry points (first one is the
- -- entry point for the proc).
- (BlockMap RegSet) -- argument locals live on entry to this block
- (BlockMap IntSet) -- stack slots live on entry to this block
-
-
--- | A basic block with liveness information.
-type LiveBasicBlock instr
- = GenBasicBlock (LiveInstr instr)
-
-
-instance Outputable instr
- => Outputable (InstrSR instr) where
-
- ppr (Instr realInstr)
- = ppr realInstr
-
- ppr (SPILL reg slot)
- = hcat [
- text "\tSPILL",
- char ' ',
- ppr reg,
- comma,
- text "SLOT" <> parens (int slot)]
-
- ppr (RELOAD slot reg)
- = hcat [
- text "\tRELOAD",
- char ' ',
- text "SLOT" <> parens (int slot),
- comma,
- ppr reg]
-
-instance Outputable instr
- => Outputable (LiveInstr instr) where
-
- ppr (LiveInstr instr Nothing)
- = ppr instr
-
- ppr (LiveInstr instr (Just live))
- = ppr instr
- $$ (nest 8
- $ vcat
- [ pprRegs (text "# born: ") (liveBorn live)
- , pprRegs (text "# r_dying: ") (liveDieRead live)
- , pprRegs (text "# w_dying: ") (liveDieWrite live) ]
- $+$ space)
-
- where pprRegs :: SDoc -> RegSet -> SDoc
- pprRegs name regs
- | isEmptyUniqSet regs = empty
- | otherwise = name <>
- (pprUFM (getUniqSet regs) (hcat . punctuate space . map ppr))
-
-instance Outputable LiveInfo where
- ppr (LiveInfo mb_static entryIds liveVRegsOnEntry liveSlotsOnEntry)
- = (ppr mb_static)
- $$ text "# entryIds = " <> ppr entryIds
- $$ text "# liveVRegsOnEntry = " <> ppr liveVRegsOnEntry
- $$ text "# liveSlotsOnEntry = " <> text (show liveSlotsOnEntry)
-
-
-
--- | map a function across all the basic blocks in this code
---
-mapBlockTop
- :: (LiveBasicBlock instr -> LiveBasicBlock instr)
- -> LiveCmmDecl statics instr -> LiveCmmDecl statics instr
-
-mapBlockTop f cmm
- = evalState (mapBlockTopM (\x -> return $ f x) cmm) ()
-
-
--- | map a function across all the basic blocks in this code (monadic version)
---
-mapBlockTopM
- :: Monad m
- => (LiveBasicBlock instr -> m (LiveBasicBlock instr))
- -> LiveCmmDecl statics instr -> m (LiveCmmDecl statics instr)
-
-mapBlockTopM _ cmm@(CmmData{})
- = return cmm
-
-mapBlockTopM f (CmmProc header label live sccs)
- = do sccs' <- mapM (mapSCCM f) sccs
- return $ CmmProc header label live sccs'
-
-mapSCCM :: Monad m => (a -> m b) -> SCC a -> m (SCC b)
-mapSCCM f (AcyclicSCC x)
- = do x' <- f x
- return $ AcyclicSCC x'
-
-mapSCCM f (CyclicSCC xs)
- = do xs' <- mapM f xs
- return $ CyclicSCC xs'
-
-
--- map a function across all the basic blocks in this code
-mapGenBlockTop
- :: (GenBasicBlock i -> GenBasicBlock i)
- -> (GenCmmDecl d h (ListGraph i) -> GenCmmDecl d h (ListGraph i))
-
-mapGenBlockTop f cmm
- = evalState (mapGenBlockTopM (\x -> return $ f x) cmm) ()
-
-
--- | map a function across all the basic blocks in this code (monadic version)
-mapGenBlockTopM
- :: Monad m
- => (GenBasicBlock i -> m (GenBasicBlock i))
- -> (GenCmmDecl d h (ListGraph i) -> m (GenCmmDecl d h (ListGraph i)))
-
-mapGenBlockTopM _ cmm@(CmmData{})
- = return cmm
-
-mapGenBlockTopM f (CmmProc header label live (ListGraph blocks))
- = do blocks' <- mapM f blocks
- return $ CmmProc header label live (ListGraph blocks')
-
-
--- | Slurp out the list of register conflicts and reg-reg moves from this top level thing.
--- Slurping of conflicts and moves is wrapped up together so we don't have
--- to make two passes over the same code when we want to build the graph.
---
-slurpConflicts
- :: Instruction instr
- => LiveCmmDecl statics instr
- -> (Bag (UniqSet Reg), Bag (Reg, Reg))
-
-slurpConflicts live
- = slurpCmm (emptyBag, emptyBag) live
-
- where slurpCmm rs CmmData{} = rs
- slurpCmm rs (CmmProc info _ _ sccs)
- = foldl' (slurpSCC info) rs sccs
-
- slurpSCC info rs (AcyclicSCC b)
- = slurpBlock info rs b
-
- slurpSCC info rs (CyclicSCC bs)
- = foldl' (slurpBlock info) rs bs
-
- slurpBlock info rs (BasicBlock blockId instrs)
- | LiveInfo _ _ blockLive _ <- info
- , Just rsLiveEntry <- mapLookup blockId blockLive
- , (conflicts, moves) <- slurpLIs rsLiveEntry rs instrs
- = (consBag rsLiveEntry conflicts, moves)
-
- | otherwise
- = panic "Liveness.slurpConflicts: bad block"
-
- slurpLIs rsLive (conflicts, moves) []
- = (consBag rsLive conflicts, moves)
-
- slurpLIs rsLive rs (LiveInstr _ Nothing : lis)
- = slurpLIs rsLive rs lis
-
- slurpLIs rsLiveEntry (conflicts, moves) (LiveInstr instr (Just live) : lis)
- = let
- -- regs that die because they are read for the last time at the start of an instruction
- -- are not live across it.
- rsLiveAcross = rsLiveEntry `minusUniqSet` (liveDieRead live)
-
- -- regs live on entry to the next instruction.
- -- be careful of orphans, make sure to delete dying regs _after_ unioning
- -- in the ones that are born here.
- rsLiveNext = (rsLiveAcross `unionUniqSets` (liveBorn live))
- `minusUniqSet` (liveDieWrite live)
-
- -- orphan vregs are the ones that die in the same instruction they are born in.
- -- these are likely to be results that are never used, but we still
- -- need to assign a hreg to them..
- rsOrphans = intersectUniqSets
- (liveBorn live)
- (unionUniqSets (liveDieWrite live) (liveDieRead live))
-
- --
- rsConflicts = unionUniqSets rsLiveNext rsOrphans
-
- in case takeRegRegMoveInstr instr of
- Just rr -> slurpLIs rsLiveNext
- ( consBag rsConflicts conflicts
- , consBag rr moves) lis
-
- Nothing -> slurpLIs rsLiveNext
- ( consBag rsConflicts conflicts
- , moves) lis
-
-
--- | For spill\/reloads
---
--- SPILL v1, slot1
--- ...
--- RELOAD slot1, v2
---
--- If we can arrange that v1 and v2 are allocated to the same hreg it's more likely
--- the spill\/reload instrs can be cleaned and replaced by a nop reg-reg move.
---
---
-slurpReloadCoalesce
- :: forall statics instr. Instruction instr
- => LiveCmmDecl statics instr
- -> Bag (Reg, Reg)
-
-slurpReloadCoalesce live
- = slurpCmm emptyBag live
-
- where
- slurpCmm :: Bag (Reg, Reg)
- -> GenCmmDecl t t1 [SCC (LiveBasicBlock instr)]
- -> Bag (Reg, Reg)
- slurpCmm cs CmmData{} = cs
- slurpCmm cs (CmmProc _ _ _ sccs)
- = slurpComp cs (flattenSCCs sccs)
-
- slurpComp :: Bag (Reg, Reg)
- -> [LiveBasicBlock instr]
- -> Bag (Reg, Reg)
- slurpComp cs blocks
- = let (moveBags, _) = runState (slurpCompM blocks) emptyUFM
- in unionManyBags (cs : moveBags)
-
- slurpCompM :: [LiveBasicBlock instr]
- -> State (UniqFM [UniqFM Reg]) [Bag (Reg, Reg)]
- slurpCompM blocks
- = do -- run the analysis once to record the mapping across jumps.
- mapM_ (slurpBlock False) blocks
-
- -- run it a second time while using the information from the last pass.
- -- We /could/ run this many more times to deal with graphical control
- -- flow and propagating info across multiple jumps, but it's probably
- -- not worth the trouble.
- mapM (slurpBlock True) blocks
-
- slurpBlock :: Bool -> LiveBasicBlock instr
- -> State (UniqFM [UniqFM Reg]) (Bag (Reg, Reg))
- slurpBlock propagate (BasicBlock blockId instrs)
- = do -- grab the slot map for entry to this block
- slotMap <- if propagate
- then getSlotMap blockId
- else return emptyUFM
-
- (_, mMoves) <- mapAccumLM slurpLI slotMap instrs
- return $ listToBag $ catMaybes mMoves
-
- slurpLI :: UniqFM Reg -- current slotMap
- -> LiveInstr instr
- -> State (UniqFM [UniqFM Reg]) -- blockId -> [slot -> reg]
- -- for tracking slotMaps across jumps
-
- ( UniqFM Reg -- new slotMap
- , Maybe (Reg, Reg)) -- maybe a new coalesce edge
-
- slurpLI slotMap li
-
- -- remember what reg was stored into the slot
- | LiveInstr (SPILL reg slot) _ <- li
- , slotMap' <- addToUFM slotMap slot reg
- = return (slotMap', Nothing)
-
- -- add an edge between the this reg and the last one stored into the slot
- | LiveInstr (RELOAD slot reg) _ <- li
- = case lookupUFM slotMap slot of
- Just reg2
- | reg /= reg2 -> return (slotMap, Just (reg, reg2))
- | otherwise -> return (slotMap, Nothing)
-
- Nothing -> return (slotMap, Nothing)
-
- -- if we hit a jump, remember the current slotMap
- | LiveInstr (Instr instr) _ <- li
- , targets <- jumpDestsOfInstr instr
- , not $ null targets
- = do mapM_ (accSlotMap slotMap) targets
- return (slotMap, Nothing)
-
- | otherwise
- = return (slotMap, Nothing)
-
- -- record a slotmap for an in edge to this block
- accSlotMap slotMap blockId
- = modify (\s -> addToUFM_C (++) s blockId [slotMap])
-
- -- work out the slot map on entry to this block
- -- if we have slot maps for multiple in-edges then we need to merge them.
- getSlotMap blockId
- = do map <- get
- let slotMaps = fromMaybe [] (lookupUFM map blockId)
- return $ foldr mergeSlotMaps emptyUFM slotMaps
-
- mergeSlotMaps :: UniqFM Reg -> UniqFM Reg -> UniqFM Reg
- mergeSlotMaps map1 map2
- = listToUFM
- $ [ (k, r1)
- | (k, r1) <- nonDetUFMToList map1
- -- This is non-deterministic but we do not
- -- currently support deterministic code-generation.
- -- See Note [Unique Determinism and code generation]
- , case lookupUFM map2 k of
- Nothing -> False
- Just r2 -> r1 == r2 ]
-
-
--- | Strip away liveness information, yielding NatCmmDecl
-stripLive
- :: (Outputable statics, Outputable instr, Instruction instr)
- => DynFlags
- -> LiveCmmDecl statics instr
- -> NatCmmDecl statics instr
-
-stripLive dflags live
- = stripCmm live
-
- where stripCmm :: (Outputable statics, Outputable instr, Instruction instr)
- => LiveCmmDecl statics instr -> NatCmmDecl statics instr
- stripCmm (CmmData sec ds) = CmmData sec ds
- stripCmm (CmmProc (LiveInfo info (first_id:_) _ _) label live sccs)
- = let final_blocks = flattenSCCs sccs
-
- -- make sure the block that was first in the input list
- -- stays at the front of the output. This is the entry point
- -- of the proc, and it needs to come first.
- ((first':_), rest')
- = partition ((== first_id) . blockId) final_blocks
-
- in CmmProc info label live
- (ListGraph $ map (stripLiveBlock dflags) $ first' : rest')
-
- -- If the proc has blocks but we don't know what the first one was, then we're dead.
- stripCmm proc
- = pprPanic "RegAlloc.Liveness.stripLive: no first_id on proc" (ppr proc)
-
--- | Strip away liveness information from a basic block,
--- and make real spill instructions out of SPILL, RELOAD pseudos along the way.
-
-stripLiveBlock
- :: Instruction instr
- => DynFlags
- -> LiveBasicBlock instr
- -> NatBasicBlock instr
-
-stripLiveBlock dflags (BasicBlock i lis)
- = BasicBlock i instrs'
-
- where (instrs', _)
- = runState (spillNat [] lis) 0
-
- spillNat acc []
- = return (reverse acc)
-
- spillNat acc (LiveInstr (SPILL reg slot) _ : instrs)
- = do delta <- get
- spillNat (mkSpillInstr dflags reg delta slot : acc) instrs
-
- spillNat acc (LiveInstr (RELOAD slot reg) _ : instrs)
- = do delta <- get
- spillNat (mkLoadInstr dflags reg delta slot : acc) instrs
-
- spillNat acc (LiveInstr (Instr instr) _ : instrs)
- | Just i <- takeDeltaInstr instr
- = do put i
- spillNat acc instrs
-
- spillNat acc (LiveInstr (Instr instr) _ : instrs)
- = spillNat (instr : acc) instrs
-
-
--- | Erase Delta instructions.
-
-eraseDeltasLive
- :: Instruction instr
- => LiveCmmDecl statics instr
- -> LiveCmmDecl statics instr
-
-eraseDeltasLive cmm
- = mapBlockTop eraseBlock cmm
- where
- eraseBlock (BasicBlock id lis)
- = BasicBlock id
- $ filter (\(LiveInstr i _) -> not $ isJust $ takeDeltaInstr i)
- $ lis
-
-
--- | Patch the registers in this code according to this register mapping.
--- also erase reg -> reg moves when the reg is the same.
--- also erase reg -> reg moves when the destination dies in this instr.
-patchEraseLive
- :: Instruction instr
- => (Reg -> Reg)
- -> LiveCmmDecl statics instr -> LiveCmmDecl statics instr
-
-patchEraseLive patchF cmm
- = patchCmm cmm
- where
- patchCmm cmm@CmmData{} = cmm
-
- patchCmm (CmmProc info label live sccs)
- | LiveInfo static id blockMap mLiveSlots <- info
- = let
- patchRegSet set = mkUniqSet $ map patchF $ nonDetEltsUFM set
- -- See Note [Unique Determinism and code generation]
- blockMap' = mapMap (patchRegSet . getUniqSet) blockMap
-
- info' = LiveInfo static id blockMap' mLiveSlots
- in CmmProc info' label live $ map patchSCC sccs
-
- patchSCC (AcyclicSCC b) = AcyclicSCC (patchBlock b)
- patchSCC (CyclicSCC bs) = CyclicSCC (map patchBlock bs)
-
- patchBlock (BasicBlock id lis)
- = BasicBlock id $ patchInstrs lis
-
- patchInstrs [] = []
- patchInstrs (li : lis)
-
- | LiveInstr i (Just live) <- li'
- , Just (r1, r2) <- takeRegRegMoveInstr i
- , eatMe r1 r2 live
- = patchInstrs lis
-
- | otherwise
- = li' : patchInstrs lis
-
- where li' = patchRegsLiveInstr patchF li
-
- eatMe r1 r2 live
- -- source and destination regs are the same
- | r1 == r2 = True
-
- -- destination reg is never used
- | elementOfUniqSet r2 (liveBorn live)
- , elementOfUniqSet r2 (liveDieRead live) || elementOfUniqSet r2 (liveDieWrite live)
- = True
-
- | otherwise = False
-
-
--- | Patch registers in this LiveInstr, including the liveness information.
---
-patchRegsLiveInstr
- :: Instruction instr
- => (Reg -> Reg)
- -> LiveInstr instr -> LiveInstr instr
-
-patchRegsLiveInstr patchF li
- = case li of
- LiveInstr instr Nothing
- -> LiveInstr (patchRegsOfInstr instr patchF) Nothing
-
- LiveInstr instr (Just live)
- -> LiveInstr
- (patchRegsOfInstr instr patchF)
- (Just live
- { -- WARNING: have to go via lists here because patchF changes the uniq in the Reg
- liveBorn = mapUniqSet patchF $ liveBorn live
- , liveDieRead = mapUniqSet patchF $ liveDieRead live
- , liveDieWrite = mapUniqSet patchF $ liveDieWrite live })
- -- See Note [Unique Determinism and code generation]
-
-
---------------------------------------------------------------------------------
--- | Convert a NatCmmDecl to a LiveCmmDecl, with liveness information
-
-cmmTopLiveness
- :: (Outputable instr, Instruction instr)
- => Maybe CFG -> Platform
- -> NatCmmDecl statics instr
- -> UniqSM (LiveCmmDecl statics instr)
-cmmTopLiveness cfg platform cmm
- = regLiveness platform $ natCmmTopToLive cfg cmm
-
-natCmmTopToLive
- :: (Instruction instr, Outputable instr)
- => Maybe CFG -> NatCmmDecl statics instr
- -> LiveCmmDecl statics instr
-
-natCmmTopToLive _ (CmmData i d)
- = CmmData i d
-
-natCmmTopToLive _ (CmmProc info lbl live (ListGraph []))
- = CmmProc (LiveInfo info [] mapEmpty mapEmpty) lbl live []
-
-natCmmTopToLive mCfg proc@(CmmProc info lbl live (ListGraph blocks@(first : _)))
- = CmmProc (LiveInfo info' (first_id : entry_ids) mapEmpty mapEmpty)
- lbl live sccsLive
- where
- first_id = blockId first
- all_entry_ids = entryBlocks proc
- sccs = sccBlocks blocks all_entry_ids mCfg
- sccsLive = map (fmap (\(BasicBlock l instrs) ->
- BasicBlock l (map (\i -> LiveInstr (Instr i) Nothing) instrs)))
- $ sccs
-
- entry_ids = filter (reachable_node) .
- filter (/= first_id) $ all_entry_ids
- info' = mapFilterWithKey (\node _ -> reachable_node node) info
- reachable_node
- | Just cfg <- mCfg
- = hasNode cfg
- | otherwise
- = const True
-
---
--- Compute the liveness graph of the set of basic blocks. Important:
--- we also discard any unreachable code here, starting from the entry
--- points (the first block in the list, and any blocks with info
--- tables). Unreachable code arises when code blocks are orphaned in
--- earlier optimisation passes, and may confuse the register allocator
--- by referring to registers that are not initialised. It's easy to
--- discard the unreachable code as part of the SCC pass, so that's
--- exactly what we do. (#7574)
---
-sccBlocks
- :: forall instr . Instruction instr
- => [NatBasicBlock instr]
- -> [BlockId]
- -> Maybe CFG
- -> [SCC (NatBasicBlock instr)]
-
-sccBlocks blocks entries mcfg = map (fmap node_payload) sccs
- where
- nodes :: [ Node BlockId (NatBasicBlock instr) ]
- nodes = [ DigraphNode block id (getOutEdges instrs)
- | block@(BasicBlock id instrs) <- blocks ]
-
- g1 = graphFromEdgedVerticesUniq nodes
-
- reachable :: LabelSet
- reachable
- | Just cfg <- mcfg
- -- Our CFG only contains reachable nodes by construction at this point.
- = setFromList $ getCfgNodes cfg
- | otherwise
- = setFromList $ [ node_key node | node <- reachablesG g1 roots ]
-
- g2 = graphFromEdgedVerticesUniq [ node | node <- nodes
- , node_key node
- `setMember` reachable ]
-
- sccs = stronglyConnCompG g2
-
- getOutEdges :: Instruction instr => [instr] -> [BlockId]
- getOutEdges instrs = concatMap jumpDestsOfInstr instrs
-
- -- This is truly ugly, but I don't see a good alternative.
- -- Digraph just has the wrong API. We want to identify nodes
- -- by their keys (BlockId), but Digraph requires the whole
- -- node: (NatBasicBlock, BlockId, [BlockId]). This takes
- -- advantage of the fact that Digraph only looks at the key,
- -- even though it asks for the whole triple.
- roots = [DigraphNode (panic "sccBlocks") b (panic "sccBlocks")
- | b <- entries ]
-
---------------------------------------------------------------------------------
--- Annotate code with register liveness information
---
-
-regLiveness
- :: (Outputable instr, Instruction instr)
- => Platform
- -> LiveCmmDecl statics instr
- -> UniqSM (LiveCmmDecl statics instr)
-
-regLiveness _ (CmmData i d)
- = return $ CmmData i d
-
-regLiveness _ (CmmProc info lbl live [])
- | LiveInfo static mFirst _ _ <- info
- = return $ CmmProc
- (LiveInfo static mFirst mapEmpty mapEmpty)
- lbl live []
-
-regLiveness platform (CmmProc info lbl live sccs)
- | LiveInfo static mFirst _ liveSlotsOnEntry <- info
- = let (ann_sccs, block_live) = computeLiveness platform sccs
-
- in return $ CmmProc (LiveInfo static mFirst block_live liveSlotsOnEntry)
- lbl live ann_sccs
-
-
--- -----------------------------------------------------------------------------
--- | Check ordering of Blocks
--- The computeLiveness function requires SCCs to be in reverse
--- dependent order. If they're not the liveness information will be
--- wrong, and we'll get a bad allocation. Better to check for this
--- precondition explicitly or some other poor sucker will waste a
--- day staring at bad assembly code..
---
-checkIsReverseDependent
- :: Instruction instr
- => [SCC (LiveBasicBlock instr)] -- ^ SCCs of blocks that we're about to run the liveness determinator on.
- -> Maybe BlockId -- ^ BlockIds that fail the test (if any)
-
-checkIsReverseDependent sccs'
- = go emptyUniqSet sccs'
-
- where go _ []
- = Nothing
-
- go blocksSeen (AcyclicSCC block : sccs)
- = let dests = slurpJumpDestsOfBlock block
- blocksSeen' = unionUniqSets blocksSeen $ mkUniqSet [blockId block]
- badDests = dests `minusUniqSet` blocksSeen'
- in case nonDetEltsUniqSet badDests of
- -- See Note [Unique Determinism and code generation]
- [] -> go blocksSeen' sccs
- bad : _ -> Just bad
-
- go blocksSeen (CyclicSCC blocks : sccs)
- = let dests = unionManyUniqSets $ map slurpJumpDestsOfBlock blocks
- blocksSeen' = unionUniqSets blocksSeen $ mkUniqSet $ map blockId blocks
- badDests = dests `minusUniqSet` blocksSeen'
- in case nonDetEltsUniqSet badDests of
- -- See Note [Unique Determinism and code generation]
- [] -> go blocksSeen' sccs
- bad : _ -> Just bad
-
- slurpJumpDestsOfBlock (BasicBlock _ instrs)
- = unionManyUniqSets
- $ map (mkUniqSet . jumpDestsOfInstr)
- [ i | LiveInstr i _ <- instrs]
-
-
--- | If we've compute liveness info for this code already we have to reverse
--- the SCCs in each top to get them back to the right order so we can do it again.
-reverseBlocksInTops :: LiveCmmDecl statics instr -> LiveCmmDecl statics instr
-reverseBlocksInTops top
- = case top of
- CmmData{} -> top
- CmmProc info lbl live sccs -> CmmProc info lbl live (reverse sccs)
-
-
--- | Computing liveness
---
--- On entry, the SCCs must be in "reverse" order: later blocks may transfer
--- control to earlier ones only, else `panic`.
---
--- The SCCs returned are in the *opposite* order, which is exactly what we
--- want for the next pass.
---
-computeLiveness
- :: (Outputable instr, Instruction instr)
- => Platform
- -> [SCC (LiveBasicBlock instr)]
- -> ([SCC (LiveBasicBlock instr)], -- instructions annotated with list of registers
- -- which are "dead after this instruction".
- BlockMap RegSet) -- blocks annotated with set of live registers
- -- on entry to the block.
-
-computeLiveness platform sccs
- = case checkIsReverseDependent sccs of
- Nothing -> livenessSCCs platform mapEmpty [] sccs
- Just bad -> pprPanic "RegAlloc.Liveness.computeLiveness"
- (vcat [ text "SCCs aren't in reverse dependent order"
- , text "bad blockId" <+> ppr bad
- , ppr sccs])
-
-livenessSCCs
- :: Instruction instr
- => Platform
- -> BlockMap RegSet
- -> [SCC (LiveBasicBlock instr)] -- accum
- -> [SCC (LiveBasicBlock instr)]
- -> ( [SCC (LiveBasicBlock instr)]
- , BlockMap RegSet)
-
-livenessSCCs _ blockmap done []
- = (done, blockmap)
-
-livenessSCCs platform blockmap done (AcyclicSCC block : sccs)
- = let (blockmap', block') = livenessBlock platform blockmap block
- in livenessSCCs platform blockmap' (AcyclicSCC block' : done) sccs
-
-livenessSCCs platform blockmap done
- (CyclicSCC blocks : sccs) =
- livenessSCCs platform blockmap' (CyclicSCC blocks':done) sccs
- where (blockmap', blocks')
- = iterateUntilUnchanged linearLiveness equalBlockMaps
- blockmap blocks
-
- iterateUntilUnchanged
- :: (a -> b -> (a,c)) -> (a -> a -> Bool)
- -> a -> b
- -> (a,c)
-
- iterateUntilUnchanged f eq a b
- = head $
- concatMap tail $
- groupBy (\(a1, _) (a2, _) -> eq a1 a2) $
- iterate (\(a, _) -> f a b) $
- (a, panic "RegLiveness.livenessSCCs")
-
-
- linearLiveness
- :: Instruction instr
- => BlockMap RegSet -> [LiveBasicBlock instr]
- -> (BlockMap RegSet, [LiveBasicBlock instr])
-
- linearLiveness = mapAccumL (livenessBlock platform)
-
- -- probably the least efficient way to compare two
- -- BlockMaps for equality.
- equalBlockMaps a b
- = a' == b'
- where a' = map f $ mapToList a
- b' = map f $ mapToList b
- f (key,elt) = (key, nonDetEltsUniqSet elt)
- -- See Note [Unique Determinism and code generation]
-
-
-
--- | Annotate a basic block with register liveness information.
---
-livenessBlock
- :: Instruction instr
- => Platform
- -> BlockMap RegSet
- -> LiveBasicBlock instr
- -> (BlockMap RegSet, LiveBasicBlock instr)
-
-livenessBlock platform blockmap (BasicBlock block_id instrs)
- = let
- (regsLiveOnEntry, instrs1)
- = livenessBack platform emptyUniqSet blockmap [] (reverse instrs)
- blockmap' = mapInsert block_id regsLiveOnEntry blockmap
-
- instrs2 = livenessForward platform regsLiveOnEntry instrs1
-
- output = BasicBlock block_id instrs2
-
- in ( blockmap', output)
-
--- | Calculate liveness going forwards,
--- filling in when regs are born
-
-livenessForward
- :: Instruction instr
- => Platform
- -> RegSet -- regs live on this instr
- -> [LiveInstr instr] -> [LiveInstr instr]
-
-livenessForward _ _ [] = []
-livenessForward platform rsLiveEntry (li@(LiveInstr instr mLive) : lis)
- | Just live <- mLive
- = let
- RU _ written = regUsageOfInstr platform instr
- -- Regs that are written to but weren't live on entry to this instruction
- -- are recorded as being born here.
- rsBorn = mkUniqSet
- $ filter (\r -> not $ elementOfUniqSet r rsLiveEntry) written
-
- rsLiveNext = (rsLiveEntry `unionUniqSets` rsBorn)
- `minusUniqSet` (liveDieRead live)
- `minusUniqSet` (liveDieWrite live)
-
- in LiveInstr instr (Just live { liveBorn = rsBorn })
- : livenessForward platform rsLiveNext lis
-
- | otherwise
- = li : livenessForward platform rsLiveEntry lis
-
-
--- | Calculate liveness going backwards,
--- filling in when regs die, and what regs are live across each instruction
-
-livenessBack
- :: Instruction instr
- => Platform
- -> RegSet -- regs live on this instr
- -> BlockMap RegSet -- regs live on entry to other BBs
- -> [LiveInstr instr] -- instructions (accum)
- -> [LiveInstr instr] -- instructions
- -> (RegSet, [LiveInstr instr])
-
-livenessBack _ liveregs _ done [] = (liveregs, done)
-
-livenessBack platform liveregs blockmap acc (instr : instrs)
- = let (liveregs', instr') = liveness1 platform liveregs blockmap instr
- in livenessBack platform liveregs' blockmap (instr' : acc) instrs
-
-
--- don't bother tagging comments or deltas with liveness
-liveness1
- :: Instruction instr
- => Platform
- -> RegSet
- -> BlockMap RegSet
- -> LiveInstr instr
- -> (RegSet, LiveInstr instr)
-
-liveness1 _ liveregs _ (LiveInstr instr _)
- | isMetaInstr instr
- = (liveregs, LiveInstr instr Nothing)
-
-liveness1 platform liveregs blockmap (LiveInstr instr _)
-
- | not_a_branch
- = (liveregs1, LiveInstr instr
- (Just $ Liveness
- { liveBorn = emptyUniqSet
- , liveDieRead = mkUniqSet r_dying
- , liveDieWrite = mkUniqSet w_dying }))
-
- | otherwise
- = (liveregs_br, LiveInstr instr
- (Just $ Liveness
- { liveBorn = emptyUniqSet
- , liveDieRead = mkUniqSet r_dying_br
- , liveDieWrite = mkUniqSet w_dying }))
-
- where
- !(RU read written) = regUsageOfInstr platform instr
-
- -- registers that were written here are dead going backwards.
- -- registers that were read here are live going backwards.
- liveregs1 = (liveregs `delListFromUniqSet` written)
- `addListToUniqSet` read
-
- -- registers that are not live beyond this point, are recorded
- -- as dying here.
- r_dying = [ reg | reg <- read, reg `notElem` written,
- not (elementOfUniqSet reg liveregs) ]
-
- w_dying = [ reg | reg <- written,
- not (elementOfUniqSet reg liveregs) ]
-
- -- union in the live regs from all the jump destinations of this
- -- instruction.
- targets = jumpDestsOfInstr instr -- where we go from here
- not_a_branch = null targets
-
- targetLiveRegs target
- = case mapLookup target blockmap of
- Just ra -> ra
- Nothing -> emptyRegSet
-
- live_from_branch = unionManyUniqSets (map targetLiveRegs targets)
-
- liveregs_br = liveregs1 `unionUniqSets` live_from_branch
-
- -- registers that are live only in the branch targets should
- -- be listed as dying here.
- live_branch_only = live_from_branch `minusUniqSet` liveregs
- r_dying_br = nonDetEltsUniqSet (mkUniqSet r_dying `unionUniqSets`
- live_branch_only)
- -- See Note [Unique Determinism and code generation]
diff --git a/compiler/nativeGen/RegClass.hs b/compiler/nativeGen/RegClass.hs
deleted file mode 100644
index f4170cca94..0000000000
--- a/compiler/nativeGen/RegClass.hs
+++ /dev/null
@@ -1,32 +0,0 @@
--- | An architecture independent description of a register's class.
-module RegClass
- ( RegClass (..) )
-
-where
-
-import GhcPrelude
-
-import Outputable
-import Unique
-
-
--- | The class of a register.
--- Used in the register allocator.
--- We treat all registers in a class as being interchangeable.
---
-data RegClass
- = RcInteger
- | RcFloat
- | RcDouble
- deriving Eq
-
-
-instance Uniquable RegClass where
- getUnique RcInteger = mkRegClassUnique 0
- getUnique RcFloat = mkRegClassUnique 1
- getUnique RcDouble = mkRegClassUnique 2
-
-instance Outputable RegClass where
- ppr RcInteger = Outputable.text "I"
- ppr RcFloat = Outputable.text "F"
- ppr RcDouble = Outputable.text "D"
diff --git a/compiler/nativeGen/SPARC/AddrMode.hs b/compiler/nativeGen/SPARC/AddrMode.hs
deleted file mode 100644
index ee40843351..0000000000
--- a/compiler/nativeGen/SPARC/AddrMode.hs
+++ /dev/null
@@ -1,44 +0,0 @@
-
-module SPARC.AddrMode (
- AddrMode(..),
- addrOffset
-)
-
-where
-
-import GhcPrelude
-
-import SPARC.Imm
-import SPARC.Base
-import Reg
-
--- addressing modes ------------------------------------------------------------
-
--- | Represents a memory address in an instruction.
--- Being a RISC machine, the SPARC addressing modes are very regular.
---
-data AddrMode
- = AddrRegReg Reg Reg -- addr = r1 + r2
- | AddrRegImm Reg Imm -- addr = r1 + imm
-
-
--- | Add an integer offset to the address in an AddrMode.
---
-addrOffset :: AddrMode -> Int -> Maybe AddrMode
-addrOffset addr off
- = case addr of
- AddrRegImm r (ImmInt n)
- | fits13Bits n2 -> Just (AddrRegImm r (ImmInt n2))
- | otherwise -> Nothing
- where n2 = n + off
-
- AddrRegImm r (ImmInteger n)
- | fits13Bits n2 -> Just (AddrRegImm r (ImmInt (fromInteger n2)))
- | otherwise -> Nothing
- where n2 = n + toInteger off
-
- AddrRegReg r (RegReal (RealRegSingle 0))
- | fits13Bits off -> Just (AddrRegImm r (ImmInt off))
- | otherwise -> Nothing
-
- _ -> Nothing
diff --git a/compiler/nativeGen/SPARC/Base.hs b/compiler/nativeGen/SPARC/Base.hs
deleted file mode 100644
index 4b3b7c984c..0000000000
--- a/compiler/nativeGen/SPARC/Base.hs
+++ /dev/null
@@ -1,77 +0,0 @@
-
--- | Bits and pieces on the bottom of the module dependency tree.
--- Also import the required constants, so we know what we're using.
---
--- In the interests of cross-compilation, we want to free ourselves
--- from the autoconf generated modules like main/Constants
-
-module SPARC.Base (
- wordLength,
- wordLengthInBits,
- spillAreaLength,
- spillSlotSize,
- extraStackArgsHere,
- fits13Bits,
- is32BitInteger,
- largeOffsetError
-)
-
-where
-
-import GhcPrelude
-
-import GHC.Driver.Session
-import Panic
-
-import Data.Int
-
-
--- On 32 bit SPARC, pointers are 32 bits.
-wordLength :: Int
-wordLength = 4
-
-wordLengthInBits :: Int
-wordLengthInBits
- = wordLength * 8
-
--- Size of the available spill area
-spillAreaLength :: DynFlags -> Int
-spillAreaLength
- = rESERVED_C_STACK_BYTES
-
--- | We need 8 bytes because our largest registers are 64 bit.
-spillSlotSize :: Int
-spillSlotSize = 8
-
-
--- | We (allegedly) put the first six C-call arguments in registers;
--- where do we start putting the rest of them?
-extraStackArgsHere :: Int
-extraStackArgsHere = 23
-
-
-{-# SPECIALIZE fits13Bits :: Int -> Bool, Integer -> Bool #-}
--- | Check whether an offset is representable with 13 bits.
-fits13Bits :: Integral a => a -> Bool
-fits13Bits x = x >= -4096 && x < 4096
-
--- | Check whether an integer will fit in 32 bits.
--- A CmmInt is intended to be truncated to the appropriate
--- number of bits, so here we truncate it to Int64. This is
--- important because e.g. -1 as a CmmInt might be either
--- -1 or 18446744073709551615.
---
-is32BitInteger :: Integer -> Bool
-is32BitInteger i
- = i64 <= 0x7fffffff && i64 >= -0x80000000
- where i64 = fromIntegral i :: Int64
-
-
--- | Sadness.
-largeOffsetError :: (Show a) => a -> b
-largeOffsetError i
- = panic ("ERROR: SPARC native-code generator cannot handle large offset ("
- ++ show i ++ ");\nprobably because of large constant data structures;" ++
- "\nworkaround: use -fllvm on this module.\n")
-
-
diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs
deleted file mode 100644
index b9f79a6b9f..0000000000
--- a/compiler/nativeGen/SPARC/CodeGen.hs
+++ /dev/null
@@ -1,700 +0,0 @@
-{-# LANGUAGE CPP #-}
-
------------------------------------------------------------------------------
---
--- Generating machine code (instruction selection)
---
--- (c) The University of Glasgow 1996-2013
---
------------------------------------------------------------------------------
-
-{-# LANGUAGE GADTs #-}
-module SPARC.CodeGen (
- cmmTopCodeGen,
- generateJumpTableForInstr,
- InstrBlock
-)
-
-where
-
-#include "HsVersions.h"
-
--- NCG stuff:
-import GhcPrelude
-
-import SPARC.Base
-import SPARC.CodeGen.Sanity
-import SPARC.CodeGen.Amode
-import SPARC.CodeGen.CondCode
-import SPARC.CodeGen.Gen64
-import SPARC.CodeGen.Gen32
-import SPARC.CodeGen.Base
-import SPARC.Instr
-import SPARC.Imm
-import SPARC.AddrMode
-import SPARC.Regs
-import SPARC.Stack
-import Instruction
-import Format
-import NCGMonad ( NatM, getNewRegNat, getNewLabelNat )
-
--- Our intermediate code:
-import GHC.Cmm.BlockId
-import GHC.Cmm
-import GHC.Cmm.Utils
-import GHC.Cmm.Switch
-import GHC.Cmm.Dataflow.Block
-import GHC.Cmm.Dataflow.Graph
-import PIC
-import Reg
-import GHC.Cmm.CLabel
-import CPrim
-
--- The rest:
-import BasicTypes
-import GHC.Driver.Session
-import FastString
-import OrdList
-import Outputable
-import GHC.Platform
-
-import Control.Monad ( mapAndUnzipM )
-
--- | Top level code generation
-cmmTopCodeGen :: RawCmmDecl
- -> NatM [NatCmmDecl RawCmmStatics Instr]
-
-cmmTopCodeGen (CmmProc info lab live graph)
- = do let blocks = toBlockListEntryFirst graph
- (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
-
- let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
- let tops = proc : concat statics
-
- return tops
-
-cmmTopCodeGen (CmmData sec dat) = do
- return [CmmData sec dat] -- no translation, we just use CmmStatic
-
-
--- | Do code generation on a single block of CMM code.
--- code generation may introduce new basic block boundaries, which
--- are indicated by the NEWBLOCK instruction. We must split up the
--- instruction stream into basic blocks again. Also, we extract
--- LDATAs here too.
-basicBlockCodeGen :: CmmBlock
- -> NatM ( [NatBasicBlock Instr]
- , [NatCmmDecl RawCmmStatics Instr])
-
-basicBlockCodeGen block = do
- let (_, nodes, tail) = blockSplit block
- id = entryLabel block
- stmts = blockToList nodes
- mid_instrs <- stmtsToInstrs stmts
- tail_instrs <- stmtToInstrs tail
- let instrs = mid_instrs `appOL` tail_instrs
- let
- (top,other_blocks,statics)
- = foldrOL mkBlocks ([],[],[]) instrs
-
- mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
- = ([], BasicBlock id instrs : blocks, statics)
-
- mkBlocks (LDATA sec dat) (instrs,blocks,statics)
- = (instrs, blocks, CmmData sec dat:statics)
-
- mkBlocks instr (instrs,blocks,statics)
- = (instr:instrs, blocks, statics)
-
- -- do intra-block sanity checking
- blocksChecked
- = map (checkBlock block)
- $ BasicBlock id top : other_blocks
-
- return (blocksChecked, statics)
-
-
--- | Convert some Cmm statements to SPARC instructions.
-stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
-stmtsToInstrs stmts
- = do instrss <- mapM stmtToInstrs stmts
- return (concatOL instrss)
-
-
-stmtToInstrs :: CmmNode e x -> NatM InstrBlock
-stmtToInstrs stmt = do
- dflags <- getDynFlags
- case stmt of
- CmmComment s -> return (unitOL (COMMENT s))
- CmmTick {} -> return nilOL
- CmmUnwind {} -> return nilOL
-
- CmmAssign reg src
- | isFloatType ty -> assignReg_FltCode format reg src
- | isWord64 ty -> assignReg_I64Code reg src
- | otherwise -> assignReg_IntCode format reg src
- where ty = cmmRegType dflags reg
- format = cmmTypeFormat ty
-
- CmmStore addr src
- | isFloatType ty -> assignMem_FltCode format addr src
- | isWord64 ty -> assignMem_I64Code addr src
- | otherwise -> assignMem_IntCode format addr src
- where ty = cmmExprType dflags src
- format = cmmTypeFormat ty
-
- CmmUnsafeForeignCall target result_regs args
- -> genCCall target result_regs args
-
- CmmBranch id -> genBranch id
- CmmCondBranch arg true false _ -> do
- b1 <- genCondJump true arg
- b2 <- genBranch false
- return (b1 `appOL` b2)
- CmmSwitch arg ids -> do dflags <- getDynFlags
- genSwitch dflags arg ids
- CmmCall { cml_target = arg } -> genJump arg
-
- _
- -> panic "stmtToInstrs: statement should have been cps'd away"
-
-
-{-
-Now, given a tree (the argument to a CmmLoad) that references memory,
-produce a suitable addressing mode.
-
-A Rule of the Game (tm) for Amodes: use of the addr bit must
-immediately follow use of the code part, since the code part puts
-values in registers which the addr then refers to. So you can't put
-anything in between, lest it overwrite some of those registers. If
-you need to do some other computation between the code part and use of
-the addr bit, first store the effective address from the amode in a
-temporary, then do the other computation, and then use the temporary:
-
- code
- LEA amode, tmp
- ... other computation ...
- ... (tmp) ...
--}
-
-
-
--- | Convert a BlockId to some CmmStatic data
-jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
-jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
-jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
- where blockLabel = blockLbl blockid
-
-
-
--- -----------------------------------------------------------------------------
--- Generating assignments
-
--- Assignments are really at the heart of the whole code generation
--- business. Almost all top-level nodes of any real importance are
--- assignments, which correspond to loads, stores, or register
--- transfers. If we're really lucky, some of the register transfers
--- will go away, because we can use the destination register to
--- complete the code generation for the right hand side. This only
--- fails when the right hand side is forced into a fixed register
--- (e.g. the result of a call).
-
-assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
-assignMem_IntCode pk addr src = do
- (srcReg, code) <- getSomeReg src
- Amode dstAddr addr_code <- getAmode addr
- return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr
-
-
-assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
-assignReg_IntCode _ reg src = do
- dflags <- getDynFlags
- r <- getRegister src
- let dst = getRegisterReg (targetPlatform dflags) reg
- return $ case r of
- Any _ code -> code dst
- Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg freg) dst
-
-
-
--- Floating point assignment to memory
-assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
-assignMem_FltCode pk addr src = do
- dflags <- getDynFlags
- Amode dst__2 code1 <- getAmode addr
- (src__2, code2) <- getSomeReg src
- tmp1 <- getNewRegNat pk
- let
- pk__2 = cmmExprType dflags src
- code__2 = code1 `appOL` code2 `appOL`
- if formatToWidth pk == typeWidth pk__2
- then unitOL (ST pk src__2 dst__2)
- else toOL [ FxTOy (cmmTypeFormat pk__2) pk src__2 tmp1
- , ST pk tmp1 dst__2]
- return code__2
-
--- Floating point assignment to a register/temporary
-assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
-assignReg_FltCode pk dstCmmReg srcCmmExpr = do
- dflags <- getDynFlags
- let platform = targetPlatform dflags
- srcRegister <- getRegister srcCmmExpr
- let dstReg = getRegisterReg platform dstCmmReg
-
- return $ case srcRegister of
- Any _ code -> code dstReg
- Fixed _ srcFixedReg srcCode -> srcCode `snocOL` FMOV pk srcFixedReg dstReg
-
-
-
-
-genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
-
-genJump (CmmLit (CmmLabel lbl))
- = return (toOL [CALL (Left target) 0 True, NOP])
- where
- target = ImmCLbl lbl
-
-genJump tree
- = do
- (target, code) <- getSomeReg tree
- return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP)
-
--- -----------------------------------------------------------------------------
--- Unconditional branches
-
-genBranch :: BlockId -> NatM InstrBlock
-genBranch = return . toOL . mkJumpInstr
-
-
--- -----------------------------------------------------------------------------
--- Conditional jumps
-
-{-
-Conditional jumps are always to local labels, so we can use branch
-instructions. We peek at the arguments to decide what kind of
-comparison to do.
-
-SPARC: First, we have to ensure that the condition codes are set
-according to the supplied comparison operation. We generate slightly
-different code for floating point comparisons, because a floating
-point operation cannot directly precede a @BF@. We assume the worst
-and fill that slot with a @NOP@.
-
-SPARC: Do not fill the delay slots here; you will confuse the register
-allocator.
--}
-
-
-genCondJump
- :: BlockId -- the branch target
- -> CmmExpr -- the condition on which to branch
- -> NatM InstrBlock
-
-
-
-genCondJump bid bool = do
- CondCode is_float cond code <- getCondCode bool
- return (
- code `appOL`
- toOL (
- if is_float
- then [NOP, BF cond False bid, NOP]
- else [BI cond False bid, NOP]
- )
- )
-
-
-
--- -----------------------------------------------------------------------------
--- Generating a table-branch
-
-genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock
-genSwitch dflags expr targets
- | positionIndependent dflags
- = error "MachCodeGen: sparc genSwitch PIC not finished\n"
-
- | otherwise
- = do (e_reg, e_code) <- getSomeReg (cmmOffset dflags expr offset)
-
- base_reg <- getNewRegNat II32
- offset_reg <- getNewRegNat II32
- dst <- getNewRegNat II32
-
- label <- getNewLabelNat
-
- return $ e_code `appOL`
- toOL
- [ -- load base of jump table
- SETHI (HI (ImmCLbl label)) base_reg
- , OR False base_reg (RIImm $ LO $ ImmCLbl label) base_reg
-
- -- the addrs in the table are 32 bits wide..
- , SLL e_reg (RIImm $ ImmInt 2) offset_reg
-
- -- load and jump to the destination
- , LD II32 (AddrRegReg base_reg offset_reg) dst
- , JMP_TBL (AddrRegImm dst (ImmInt 0)) ids label
- , NOP ]
- where (offset, ids) = switchTargetsToTable targets
-
-generateJumpTableForInstr :: DynFlags -> Instr
- -> Maybe (NatCmmDecl RawCmmStatics Instr)
-generateJumpTableForInstr dflags (JMP_TBL _ ids label) =
- let jumpTable = map (jumpTableEntry dflags) ids
- in Just (CmmData (Section ReadOnlyData label) (RawCmmStatics label jumpTable))
-generateJumpTableForInstr _ _ = Nothing
-
-
-
--- -----------------------------------------------------------------------------
--- Generating C calls
-
-{-
- Now the biggest nightmare---calls. Most of the nastiness is buried in
- @get_arg@, which moves the arguments to the correct registers/stack
- locations. Apart from that, the code is easy.
-
- The SPARC calling convention is an absolute
- nightmare. The first 6x32 bits of arguments are mapped into
- %o0 through %o5, and the remaining arguments are dumped to the
- stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
-
- If we have to put args on the stack, move %o6==%sp down by
- the number of words to go on the stack, to ensure there's enough space.
-
- According to Fraser and Hanson's lcc book, page 478, fig 17.2,
- 16 words above the stack pointer is a word for the address of
- a structure return value. I use this as a temporary location
- for moving values from float to int regs. Certainly it isn't
- safe to put anything in the 16 words starting at %sp, since
- this area can get trashed at any time due to window overflows
- caused by signal handlers.
-
- A final complication (if the above isn't enough) is that
- we can't blithely calculate the arguments one by one into
- %o0 .. %o5. Consider the following nested calls:
-
- fff a (fff b c)
-
- Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
- the inner call will itself use %o0, which trashes the value put there
- in preparation for the outer call. Upshot: we need to calculate the
- args into temporary regs, and move those to arg regs or onto the
- stack only immediately prior to the call proper. Sigh.
--}
-
-genCCall
- :: ForeignTarget -- function to call
- -> [CmmFormal] -- where to put the result
- -> [CmmActual] -- arguments (of mixed type)
- -> NatM InstrBlock
-
-
-
--- On SPARC under TSO (Total Store Ordering), writes earlier in the instruction stream
--- are guaranteed to take place before writes afterwards (unlike on PowerPC).
--- Ref: Section 8.4 of the SPARC V9 Architecture manual.
---
--- In the SPARC case we don't need a barrier.
---
-genCCall (PrimTarget MO_ReadBarrier) _ _
- = return $ nilOL
-genCCall (PrimTarget MO_WriteBarrier) _ _
- = return $ nilOL
-
-genCCall (PrimTarget (MO_Prefetch_Data _)) _ _
- = return $ nilOL
-
-genCCall target dest_regs args
- = do -- work out the arguments, and assign them to integer regs
- argcode_and_vregs <- mapM arg_to_int_vregs args
- let (argcodes, vregss) = unzip argcode_and_vregs
- let vregs = concat vregss
-
- let n_argRegs = length allArgRegs
- let n_argRegs_used = min (length vregs) n_argRegs
-
-
- -- deal with static vs dynamic call targets
- callinsns <- case target of
- ForeignTarget (CmmLit (CmmLabel lbl)) _ ->
- return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
-
- ForeignTarget expr _
- -> do (dyn_c, dyn_rs) <- arg_to_int_vregs expr
- let dyn_r = case dyn_rs of
- [dyn_r'] -> dyn_r'
- _ -> panic "SPARC.CodeGen.genCCall: arg_to_int"
- return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
-
- PrimTarget mop
- -> do res <- outOfLineMachOp mop
- lblOrMopExpr <- case res of
- Left lbl -> do
- return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
-
- Right mopExpr -> do
- (dyn_c, dyn_rs) <- arg_to_int_vregs mopExpr
- let dyn_r = case dyn_rs of
- [dyn_r'] -> dyn_r'
- _ -> panic "SPARC.CodeGen.genCCall: arg_to_int"
- return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
-
- return lblOrMopExpr
-
- let argcode = concatOL argcodes
-
- let (move_sp_down, move_sp_up)
- = let diff = length vregs - n_argRegs
- nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
- in if nn <= 0
- then (nilOL, nilOL)
- else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
-
- let transfer_code
- = toOL (move_final vregs allArgRegs extraStackArgsHere)
-
- dflags <- getDynFlags
- return
- $ argcode `appOL`
- move_sp_down `appOL`
- transfer_code `appOL`
- callinsns `appOL`
- unitOL NOP `appOL`
- move_sp_up `appOL`
- assign_code (targetPlatform dflags) dest_regs
-
-
--- | Generate code to calculate an argument, and move it into one
--- or two integer vregs.
-arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
-arg_to_int_vregs arg = do dflags <- getDynFlags
- arg_to_int_vregs' dflags arg
-
-arg_to_int_vregs' :: DynFlags -> CmmExpr -> NatM (OrdList Instr, [Reg])
-arg_to_int_vregs' dflags arg
-
- -- If the expr produces a 64 bit int, then we can just use iselExpr64
- | isWord64 (cmmExprType dflags arg)
- = do (ChildCode64 code r_lo) <- iselExpr64 arg
- let r_hi = getHiVRegFromLo r_lo
- return (code, [r_hi, r_lo])
-
- | otherwise
- = do (src, code) <- getSomeReg arg
- let pk = cmmExprType dflags arg
-
- case cmmTypeFormat pk of
-
- -- Load a 64 bit float return value into two integer regs.
- FF64 -> do
- v1 <- getNewRegNat II32
- v2 <- getNewRegNat II32
-
- let code2 =
- code `snocOL`
- FMOV FF64 src f0 `snocOL`
- ST FF32 f0 (spRel 16) `snocOL`
- LD II32 (spRel 16) v1 `snocOL`
- ST FF32 f1 (spRel 16) `snocOL`
- LD II32 (spRel 16) v2
-
- return (code2, [v1,v2])
-
- -- Load a 32 bit float return value into an integer reg
- FF32 -> do
- v1 <- getNewRegNat II32
-
- let code2 =
- code `snocOL`
- ST FF32 src (spRel 16) `snocOL`
- LD II32 (spRel 16) v1
-
- return (code2, [v1])
-
- -- Move an integer return value into its destination reg.
- _ -> do
- v1 <- getNewRegNat II32
-
- let code2 =
- code `snocOL`
- OR False g0 (RIReg src) v1
-
- return (code2, [v1])
-
-
--- | Move args from the integer vregs into which they have been
--- marshalled, into %o0 .. %o5, and the rest onto the stack.
---
-move_final :: [Reg] -> [Reg] -> Int -> [Instr]
-
--- all args done
-move_final [] _ _
- = []
-
--- out of aregs; move to stack
-move_final (v:vs) [] offset
- = ST II32 v (spRel offset)
- : move_final vs [] (offset+1)
-
--- move into an arg (%o[0..5]) reg
-move_final (v:vs) (a:az) offset
- = OR False g0 (RIReg v) a
- : move_final vs az offset
-
-
--- | Assign results returned from the call into their
--- destination regs.
---
-assign_code :: Platform -> [LocalReg] -> OrdList Instr
-
-assign_code _ [] = nilOL
-
-assign_code platform [dest]
- = let rep = localRegType dest
- width = typeWidth rep
- r_dest = getRegisterReg platform (CmmLocal dest)
-
- result
- | isFloatType rep
- , W32 <- width
- = unitOL $ FMOV FF32 (regSingle $ fReg 0) r_dest
-
- | isFloatType rep
- , W64 <- width
- = unitOL $ FMOV FF64 (regSingle $ fReg 0) r_dest
-
- | not $ isFloatType rep
- , W32 <- width
- = unitOL $ mkRegRegMoveInstr platform (regSingle $ oReg 0) r_dest
-
- | not $ isFloatType rep
- , W64 <- width
- , r_dest_hi <- getHiVRegFromLo r_dest
- = toOL [ mkRegRegMoveInstr platform (regSingle $ oReg 0) r_dest_hi
- , mkRegRegMoveInstr platform (regSingle $ oReg 1) r_dest]
-
- | otherwise
- = panic "SPARC.CodeGen.GenCCall: no match"
-
- in result
-
-assign_code _ _
- = panic "SPARC.CodeGen.GenCCall: no match"
-
-
-
--- | Generate a call to implement an out-of-line floating point operation
-outOfLineMachOp
- :: CallishMachOp
- -> NatM (Either CLabel CmmExpr)
-
-outOfLineMachOp mop
- = do let functionName
- = outOfLineMachOp_table mop
-
- dflags <- getDynFlags
- mopExpr <- cmmMakeDynamicReference dflags CallReference
- $ mkForeignLabel functionName Nothing ForeignLabelInExternalPackage IsFunction
-
- let mopLabelOrExpr
- = case mopExpr of
- CmmLit (CmmLabel lbl) -> Left lbl
- _ -> Right mopExpr
-
- return mopLabelOrExpr
-
-
--- | Decide what C function to use to implement a CallishMachOp
---
-outOfLineMachOp_table
- :: CallishMachOp
- -> FastString
-
-outOfLineMachOp_table mop
- = case mop of
- MO_F32_Exp -> fsLit "expf"
- MO_F32_ExpM1 -> fsLit "expm1f"
- MO_F32_Log -> fsLit "logf"
- MO_F32_Log1P -> fsLit "log1pf"
- MO_F32_Sqrt -> fsLit "sqrtf"
- MO_F32_Fabs -> unsupported
- MO_F32_Pwr -> fsLit "powf"
-
- MO_F32_Sin -> fsLit "sinf"
- MO_F32_Cos -> fsLit "cosf"
- MO_F32_Tan -> fsLit "tanf"
-
- MO_F32_Asin -> fsLit "asinf"
- MO_F32_Acos -> fsLit "acosf"
- MO_F32_Atan -> fsLit "atanf"
-
- MO_F32_Sinh -> fsLit "sinhf"
- MO_F32_Cosh -> fsLit "coshf"
- MO_F32_Tanh -> fsLit "tanhf"
-
- MO_F32_Asinh -> fsLit "asinhf"
- MO_F32_Acosh -> fsLit "acoshf"
- MO_F32_Atanh -> fsLit "atanhf"
-
- MO_F64_Exp -> fsLit "exp"
- MO_F64_ExpM1 -> fsLit "expm1"
- MO_F64_Log -> fsLit "log"
- MO_F64_Log1P -> fsLit "log1p"
- MO_F64_Sqrt -> fsLit "sqrt"
- MO_F64_Fabs -> unsupported
- MO_F64_Pwr -> fsLit "pow"
-
- MO_F64_Sin -> fsLit "sin"
- MO_F64_Cos -> fsLit "cos"
- MO_F64_Tan -> fsLit "tan"
-
- MO_F64_Asin -> fsLit "asin"
- MO_F64_Acos -> fsLit "acos"
- MO_F64_Atan -> fsLit "atan"
-
- MO_F64_Sinh -> fsLit "sinh"
- MO_F64_Cosh -> fsLit "cosh"
- MO_F64_Tanh -> fsLit "tanh"
-
- MO_F64_Asinh -> fsLit "asinh"
- MO_F64_Acosh -> fsLit "acosh"
- MO_F64_Atanh -> fsLit "atanh"
-
- MO_UF_Conv w -> fsLit $ word2FloatLabel w
-
- MO_Memcpy _ -> fsLit "memcpy"
- MO_Memset _ -> fsLit "memset"
- MO_Memmove _ -> fsLit "memmove"
- MO_Memcmp _ -> fsLit "memcmp"
-
- MO_BSwap w -> fsLit $ bSwapLabel w
- MO_BRev w -> fsLit $ bRevLabel w
- MO_PopCnt w -> fsLit $ popCntLabel w
- MO_Pdep w -> fsLit $ pdepLabel w
- MO_Pext w -> fsLit $ pextLabel w
- MO_Clz w -> fsLit $ clzLabel w
- MO_Ctz w -> fsLit $ ctzLabel w
- MO_AtomicRMW w amop -> fsLit $ atomicRMWLabel w amop
- MO_Cmpxchg w -> fsLit $ cmpxchgLabel w
- MO_AtomicRead w -> fsLit $ atomicReadLabel w
- MO_AtomicWrite w -> fsLit $ atomicWriteLabel w
-
- MO_S_Mul2 {} -> unsupported
- MO_S_QuotRem {} -> unsupported
- MO_U_QuotRem {} -> unsupported
- MO_U_QuotRem2 {} -> unsupported
- MO_Add2 {} -> unsupported
- MO_AddWordC {} -> unsupported
- MO_SubWordC {} -> unsupported
- MO_AddIntC {} -> unsupported
- MO_SubIntC {} -> unsupported
- MO_U_Mul2 {} -> unsupported
- MO_ReadBarrier -> unsupported
- MO_WriteBarrier -> unsupported
- MO_Touch -> unsupported
- (MO_Prefetch_Data _) -> unsupported
- where unsupported = panic ("outOfLineCmmOp: " ++ show mop
- ++ " not supported here")
-
diff --git a/compiler/nativeGen/SPARC/CodeGen/Amode.hs b/compiler/nativeGen/SPARC/CodeGen/Amode.hs
deleted file mode 100644
index 5351fc054b..0000000000
--- a/compiler/nativeGen/SPARC/CodeGen/Amode.hs
+++ /dev/null
@@ -1,74 +0,0 @@
-module SPARC.CodeGen.Amode (
- getAmode
-)
-
-where
-
-import GhcPrelude
-
-import {-# SOURCE #-} SPARC.CodeGen.Gen32
-import SPARC.CodeGen.Base
-import SPARC.AddrMode
-import SPARC.Imm
-import SPARC.Instr
-import SPARC.Regs
-import SPARC.Base
-import NCGMonad
-import Format
-
-import GHC.Cmm
-
-import OrdList
-
-
--- | Generate code to reference a memory address.
-getAmode
- :: CmmExpr -- ^ expr producing an address
- -> NatM Amode
-
-getAmode tree@(CmmRegOff _ _)
- = do dflags <- getDynFlags
- getAmode (mangleIndexTree dflags tree)
-
-getAmode (CmmMachOp (MO_Sub _) [x, CmmLit (CmmInt i _)])
- | fits13Bits (-i)
- = do
- (reg, code) <- getSomeReg x
- let
- off = ImmInt (-(fromInteger i))
- return (Amode (AddrRegImm reg off) code)
-
-
-getAmode (CmmMachOp (MO_Add _) [x, CmmLit (CmmInt i _)])
- | fits13Bits i
- = do
- (reg, code) <- getSomeReg x
- let
- off = ImmInt (fromInteger i)
- return (Amode (AddrRegImm reg off) code)
-
-getAmode (CmmMachOp (MO_Add _) [x, y])
- = do
- (regX, codeX) <- getSomeReg x
- (regY, codeY) <- getSomeReg y
- let
- code = codeX `appOL` codeY
- return (Amode (AddrRegReg regX regY) code)
-
-getAmode (CmmLit lit)
- = do
- let imm__2 = litToImm lit
- tmp1 <- getNewRegNat II32
- tmp2 <- getNewRegNat II32
-
- let code = toOL [ SETHI (HI imm__2) tmp1
- , OR False tmp1 (RIImm (LO imm__2)) tmp2]
-
- return (Amode (AddrRegReg tmp2 g0) code)
-
-getAmode other
- = do
- (reg, code) <- getSomeReg other
- let
- off = ImmInt 0
- return (Amode (AddrRegImm reg off) code)
diff --git a/compiler/nativeGen/SPARC/CodeGen/Base.hs b/compiler/nativeGen/SPARC/CodeGen/Base.hs
deleted file mode 100644
index ba07f2311d..0000000000
--- a/compiler/nativeGen/SPARC/CodeGen/Base.hs
+++ /dev/null
@@ -1,119 +0,0 @@
-module SPARC.CodeGen.Base (
- InstrBlock,
- CondCode(..),
- ChildCode64(..),
- Amode(..),
-
- Register(..),
- setFormatOfRegister,
-
- getRegisterReg,
- mangleIndexTree
-)
-
-where
-
-import GhcPrelude
-
-import SPARC.Instr
-import SPARC.Cond
-import SPARC.AddrMode
-import SPARC.Regs
-import Format
-import Reg
-
-import GHC.Platform.Regs
-import GHC.Driver.Session
-import GHC.Cmm
-import GHC.Cmm.Ppr.Expr () -- For Outputable instances
-import GHC.Platform
-
-import Outputable
-import OrdList
-
---------------------------------------------------------------------------------
--- | 'InstrBlock's are the insn sequences generated by the insn selectors.
--- They are really trees of insns to facilitate fast appending, where a
--- left-to-right traversal yields the insns in the correct order.
---
-type InstrBlock
- = OrdList Instr
-
-
--- | Condition codes passed up the tree.
---
-data CondCode
- = CondCode Bool Cond InstrBlock
-
-
--- | a.k.a "Register64"
--- Reg is the lower 32-bit temporary which contains the result.
--- Use getHiVRegFromLo to find the other VRegUnique.
---
--- Rules of this simplified insn selection game are therefore that
--- the returned Reg may be modified
---
-data ChildCode64
- = ChildCode64
- InstrBlock
- Reg
-
-
--- | Holds code that references a memory address.
-data Amode
- = Amode
- -- the AddrMode we can use in the instruction
- -- that does the real load\/store.
- AddrMode
-
- -- other setup code we have to run first before we can use the
- -- above AddrMode.
- InstrBlock
-
-
-
---------------------------------------------------------------------------------
--- | Code to produce a result into a register.
--- If the result must go in a specific register, it comes out as Fixed.
--- Otherwise, the parent can decide which register to put it in.
---
-data Register
- = Fixed Format Reg InstrBlock
- | Any Format (Reg -> InstrBlock)
-
-
--- | Change the format field in a Register.
-setFormatOfRegister
- :: Register -> Format -> Register
-
-setFormatOfRegister reg format
- = case reg of
- Fixed _ reg code -> Fixed format reg code
- Any _ codefn -> Any format codefn
-
-
---------------------------------------------------------------------------------
--- | Grab the Reg for a CmmReg
-getRegisterReg :: Platform -> CmmReg -> Reg
-
-getRegisterReg _ (CmmLocal (LocalReg u pk))
- = RegVirtual $ mkVirtualReg u (cmmTypeFormat pk)
-
-getRegisterReg platform (CmmGlobal mid)
- = case globalRegMaybe platform mid of
- Just reg -> RegReal reg
- Nothing -> pprPanic
- "SPARC.CodeGen.Base.getRegisterReg: global is in memory"
- (ppr $ CmmGlobal mid)
-
-
--- Expand CmmRegOff. ToDo: should we do it this way around, or convert
--- CmmExprs into CmmRegOff?
-mangleIndexTree :: DynFlags -> CmmExpr -> CmmExpr
-
-mangleIndexTree dflags (CmmRegOff reg off)
- = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
- where width = typeWidth (cmmRegType dflags reg)
-
-mangleIndexTree _ _
- = panic "SPARC.CodeGen.Base.mangleIndexTree: no match"
diff --git a/compiler/nativeGen/SPARC/CodeGen/CondCode.hs b/compiler/nativeGen/SPARC/CodeGen/CondCode.hs
deleted file mode 100644
index 892cbb1a8f..0000000000
--- a/compiler/nativeGen/SPARC/CodeGen/CondCode.hs
+++ /dev/null
@@ -1,110 +0,0 @@
-module SPARC.CodeGen.CondCode (
- getCondCode,
- condIntCode,
- condFltCode
-)
-
-where
-
-import GhcPrelude
-
-import {-# SOURCE #-} SPARC.CodeGen.Gen32
-import SPARC.CodeGen.Base
-import SPARC.Instr
-import SPARC.Regs
-import SPARC.Cond
-import SPARC.Imm
-import SPARC.Base
-import NCGMonad
-import Format
-
-import GHC.Cmm
-
-import OrdList
-import Outputable
-
-
-getCondCode :: CmmExpr -> NatM CondCode
-getCondCode (CmmMachOp mop [x, y])
- =
- case mop of
- MO_F_Eq W32 -> condFltCode EQQ x y
- MO_F_Ne W32 -> condFltCode NE x y
- MO_F_Gt W32 -> condFltCode GTT x y
- MO_F_Ge W32 -> condFltCode GE x y
- MO_F_Lt W32 -> condFltCode LTT x y
- MO_F_Le W32 -> condFltCode LE x y
-
- MO_F_Eq W64 -> condFltCode EQQ x y
- MO_F_Ne W64 -> condFltCode NE x y
- MO_F_Gt W64 -> condFltCode GTT x y
- MO_F_Ge W64 -> condFltCode GE x y
- MO_F_Lt W64 -> condFltCode LTT x y
- MO_F_Le W64 -> condFltCode LE x y
-
- MO_Eq _ -> condIntCode EQQ x y
- MO_Ne _ -> condIntCode NE x y
-
- MO_S_Gt _ -> condIntCode GTT x y
- MO_S_Ge _ -> condIntCode GE x y
- MO_S_Lt _ -> condIntCode LTT x y
- MO_S_Le _ -> condIntCode LE x y
-
- MO_U_Gt _ -> condIntCode GU x y
- MO_U_Ge _ -> condIntCode GEU x y
- MO_U_Lt _ -> condIntCode LU x y
- MO_U_Le _ -> condIntCode LEU x y
-
- _ -> pprPanic "SPARC.CodeGen.CondCode.getCondCode" (ppr (CmmMachOp mop [x,y]))
-
-getCondCode other = pprPanic "SPARC.CodeGen.CondCode.getCondCode" (ppr other)
-
-
-
-
-
--- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
--- passed back up the tree.
-
-condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
-condIntCode cond x (CmmLit (CmmInt y _))
- | fits13Bits y
- = do
- (src1, code) <- getSomeReg x
- let
- src2 = ImmInt (fromInteger y)
- code' = code `snocOL` SUB False True src1 (RIImm src2) g0
- return (CondCode False cond code')
-
-condIntCode cond x y = do
- (src1, code1) <- getSomeReg x
- (src2, code2) <- getSomeReg y
- let
- code__2 = code1 `appOL` code2 `snocOL`
- SUB False True src1 (RIReg src2) g0
- return (CondCode False cond code__2)
-
-
-condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
-condFltCode cond x y = do
- dflags <- getDynFlags
- (src1, code1) <- getSomeReg x
- (src2, code2) <- getSomeReg y
- tmp <- getNewRegNat FF64
- let
- promote x = FxTOy FF32 FF64 x tmp
-
- pk1 = cmmExprType dflags x
- pk2 = cmmExprType dflags y
-
- code__2 =
- if pk1 `cmmEqType` pk2 then
- code1 `appOL` code2 `snocOL`
- FCMP True (cmmTypeFormat pk1) src1 src2
- else if typeWidth pk1 == W32 then
- code1 `snocOL` promote src1 `appOL` code2 `snocOL`
- FCMP True FF64 tmp src2
- else
- code1 `appOL` code2 `snocOL` promote src2 `snocOL`
- FCMP True FF64 src1 tmp
- return (CondCode True cond code__2)
diff --git a/compiler/nativeGen/SPARC/CodeGen/Expand.hs b/compiler/nativeGen/SPARC/CodeGen/Expand.hs
deleted file mode 100644
index b6d78a9f79..0000000000
--- a/compiler/nativeGen/SPARC/CodeGen/Expand.hs
+++ /dev/null
@@ -1,156 +0,0 @@
-{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
-
--- | Expand out synthetic instructions into single machine instrs.
-module SPARC.CodeGen.Expand (
- expandTop
-)
-
-where
-
-import GhcPrelude
-
-import SPARC.Instr
-import SPARC.Imm
-import SPARC.AddrMode
-import SPARC.Regs
-import Instruction
-import Reg
-import Format
-import GHC.Cmm
-
-
-import Outputable
-import OrdList
-
--- | Expand out synthetic instructions in this top level thing
-expandTop :: NatCmmDecl RawCmmStatics Instr -> NatCmmDecl RawCmmStatics Instr
-expandTop top@(CmmData{})
- = top
-
-expandTop (CmmProc info lbl live (ListGraph blocks))
- = CmmProc info lbl live (ListGraph $ map expandBlock blocks)
-
-
--- | Expand out synthetic instructions in this block
-expandBlock :: NatBasicBlock Instr -> NatBasicBlock Instr
-
-expandBlock (BasicBlock label instrs)
- = let instrs_ol = expandBlockInstrs instrs
- instrs' = fromOL instrs_ol
- in BasicBlock label instrs'
-
-
--- | Expand out some instructions
-expandBlockInstrs :: [Instr] -> OrdList Instr
-expandBlockInstrs [] = nilOL
-
-expandBlockInstrs (ii:is)
- = let ii_doubleRegs = remapRegPair ii
- is_misaligned = expandMisalignedDoubles ii_doubleRegs
-
- in is_misaligned `appOL` expandBlockInstrs is
-
-
-
--- | In the SPARC instruction set the FP register pairs that are used
--- to hold 64 bit floats are referred to by just the first reg
--- of the pair. Remap our internal reg pairs to the appropriate reg.
---
--- For example:
--- ldd [%l1], (%f0 | %f1)
---
--- gets mapped to
--- ldd [$l1], %f0
---
-remapRegPair :: Instr -> Instr
-remapRegPair instr
- = let patchF reg
- = case reg of
- RegReal (RealRegSingle _)
- -> reg
-
- RegReal (RealRegPair r1 r2)
-
- -- sanity checking
- | r1 >= 32
- , r1 <= 63
- , r1 `mod` 2 == 0
- , r2 == r1 + 1
- -> RegReal (RealRegSingle r1)
-
- | otherwise
- -> pprPanic "SPARC.CodeGen.Expand: not remapping dodgy looking reg pair " (ppr reg)
-
- RegVirtual _
- -> pprPanic "SPARC.CodeGen.Expand: not remapping virtual reg " (ppr reg)
-
- in patchRegsOfInstr instr patchF
-
-
-
-
--- Expand out 64 bit load/stores into individual instructions to handle
--- possible double alignment problems.
---
--- TODO: It'd be better to use a scratch reg instead of the add/sub thing.
--- We might be able to do this faster if we use the UA2007 instr set
--- instead of restricting ourselves to SPARC V9.
---
-expandMisalignedDoubles :: Instr -> OrdList Instr
-expandMisalignedDoubles instr
-
- -- Translate to:
- -- add g1,g2,g1
- -- ld [g1],%fn
- -- ld [g1+4],%f(n+1)
- -- sub g1,g2,g1 -- to restore g1
- | LD FF64 (AddrRegReg r1 r2) fReg <- instr
- = toOL [ ADD False False r1 (RIReg r2) r1
- , LD FF32 (AddrRegReg r1 g0) fReg
- , LD FF32 (AddrRegImm r1 (ImmInt 4)) (fRegHi fReg)
- , SUB False False r1 (RIReg r2) r1 ]
-
- -- Translate to
- -- ld [addr],%fn
- -- ld [addr+4],%f(n+1)
- | LD FF64 addr fReg <- instr
- = let Just addr' = addrOffset addr 4
- in toOL [ LD FF32 addr fReg
- , LD FF32 addr' (fRegHi fReg) ]
-
- -- Translate to:
- -- add g1,g2,g1
- -- st %fn,[g1]
- -- st %f(n+1),[g1+4]
- -- sub g1,g2,g1 -- to restore g1
- | ST FF64 fReg (AddrRegReg r1 r2) <- instr
- = toOL [ ADD False False r1 (RIReg r2) r1
- , ST FF32 fReg (AddrRegReg r1 g0)
- , ST FF32 (fRegHi fReg) (AddrRegImm r1 (ImmInt 4))
- , SUB False False r1 (RIReg r2) r1 ]
-
- -- Translate to
- -- ld [addr],%fn
- -- ld [addr+4],%f(n+1)
- | ST FF64 fReg addr <- instr
- = let Just addr' = addrOffset addr 4
- in toOL [ ST FF32 fReg addr
- , ST FF32 (fRegHi fReg) addr' ]
-
- -- some other instr
- | otherwise
- = unitOL instr
-
-
-
--- | The high partner for this float reg.
-fRegHi :: Reg -> Reg
-fRegHi (RegReal (RealRegSingle r1))
- | r1 >= 32
- , r1 <= 63
- , r1 `mod` 2 == 0
- = (RegReal $ RealRegSingle (r1 + 1))
-
--- Can't take high partner for non-low reg.
-fRegHi reg
- = pprPanic "SPARC.CodeGen.Expand: can't take fRegHi from " (ppr reg)
diff --git a/compiler/nativeGen/SPARC/CodeGen/Gen32.hs b/compiler/nativeGen/SPARC/CodeGen/Gen32.hs
deleted file mode 100644
index 4f3409ebc5..0000000000
--- a/compiler/nativeGen/SPARC/CodeGen/Gen32.hs
+++ /dev/null
@@ -1,692 +0,0 @@
--- | Evaluation of 32 bit values.
-module SPARC.CodeGen.Gen32 (
- getSomeReg,
- getRegister
-)
-
-where
-
-import GhcPrelude
-
-import SPARC.CodeGen.CondCode
-import SPARC.CodeGen.Amode
-import SPARC.CodeGen.Gen64
-import SPARC.CodeGen.Base
-import SPARC.Stack
-import SPARC.Instr
-import SPARC.Cond
-import SPARC.AddrMode
-import SPARC.Imm
-import SPARC.Regs
-import SPARC.Base
-import NCGMonad
-import Format
-import Reg
-
-import GHC.Cmm
-
-import Control.Monad (liftM)
-import GHC.Driver.Session
-import OrdList
-import Outputable
-
--- | The dual to getAnyReg: compute an expression into a register, but
--- we don't mind which one it is.
-getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
-getSomeReg expr = do
- r <- getRegister expr
- case r of
- Any rep code -> do
- tmp <- getNewRegNat rep
- return (tmp, code tmp)
- Fixed _ reg code ->
- return (reg, code)
-
-
-
--- | Make code to evaluate a 32 bit expression.
---
-getRegister :: CmmExpr -> NatM Register
-
-getRegister (CmmReg reg)
- = do dflags <- getDynFlags
- let platform = targetPlatform dflags
- return (Fixed (cmmTypeFormat (cmmRegType dflags reg))
- (getRegisterReg platform reg) nilOL)
-
-getRegister tree@(CmmRegOff _ _)
- = do dflags <- getDynFlags
- getRegister (mangleIndexTree dflags tree)
-
-getRegister (CmmMachOp (MO_UU_Conv W64 W32)
- [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
- ChildCode64 code rlo <- iselExpr64 x
- return $ Fixed II32 (getHiVRegFromLo rlo) code
-
-getRegister (CmmMachOp (MO_SS_Conv W64 W32)
- [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
- ChildCode64 code rlo <- iselExpr64 x
- return $ Fixed II32 (getHiVRegFromLo rlo) code
-
-getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
- ChildCode64 code rlo <- iselExpr64 x
- return $ Fixed II32 rlo code
-
-getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
- ChildCode64 code rlo <- iselExpr64 x
- return $ Fixed II32 rlo code
-
-
--- Load a literal float into a float register.
--- The actual literal is stored in a new data area, and we load it
--- at runtime.
-getRegister (CmmLit (CmmFloat f W32)) = do
-
- -- a label for the new data area
- lbl <- getNewLabelNat
- tmp <- getNewRegNat II32
-
- let code dst = toOL [
- -- the data area
- LDATA (Section ReadOnlyData lbl) $ RawCmmStatics lbl
- [CmmStaticLit (CmmFloat f W32)],
-
- -- load the literal
- SETHI (HI (ImmCLbl lbl)) tmp,
- LD II32 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
-
- return (Any FF32 code)
-
-getRegister (CmmLit (CmmFloat d W64)) = do
- lbl <- getNewLabelNat
- tmp <- getNewRegNat II32
- let code dst = toOL [
- LDATA (Section ReadOnlyData lbl) $ RawCmmStatics lbl
- [CmmStaticLit (CmmFloat d W64)],
- SETHI (HI (ImmCLbl lbl)) tmp,
- LD II64 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst]
- return (Any FF64 code)
-
-
--- Unary machine ops
-getRegister (CmmMachOp mop [x])
- = case mop of
- -- Floating point negation -------------------------
- MO_F_Neg W32 -> trivialUFCode FF32 (FNEG FF32) x
- MO_F_Neg W64 -> trivialUFCode FF64 (FNEG FF64) x
-
-
- -- Integer negation --------------------------------
- MO_S_Neg rep -> trivialUCode (intFormat rep) (SUB False False g0) x
- MO_Not rep -> trivialUCode (intFormat rep) (XNOR False g0) x
-
-
- -- Float word size conversion ----------------------
- MO_FF_Conv W64 W32 -> coerceDbl2Flt x
- MO_FF_Conv W32 W64 -> coerceFlt2Dbl x
-
-
- -- Float <-> Signed Int conversion -----------------
- MO_FS_Conv from to -> coerceFP2Int from to x
- MO_SF_Conv from to -> coerceInt2FP from to x
-
-
- -- Unsigned integer word size conversions ----------
-
- -- If it's the same size, then nothing needs to be done.
- MO_UU_Conv from to
- | from == to -> conversionNop (intFormat to) x
-
- -- To narrow an unsigned word, mask out the high bits to simulate what would
- -- happen if we copied the value into a smaller register.
- MO_UU_Conv W16 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8))
- MO_UU_Conv W32 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8))
-
- -- for narrowing 32 bit to 16 bit, don't use a literal mask value like the W16->W8
- -- case because the only way we can load it is via SETHI, which needs 2 ops.
- -- Do some shifts to chop out the high bits instead.
- MO_UU_Conv W32 W16
- -> do tmpReg <- getNewRegNat II32
- (xReg, xCode) <- getSomeReg x
- let code dst
- = xCode
- `appOL` toOL
- [ SLL xReg (RIImm $ ImmInt 16) tmpReg
- , SRL tmpReg (RIImm $ ImmInt 16) dst]
-
- return $ Any II32 code
-
- -- trivialCode W16 (AND False) x (CmmLit (CmmInt 65535 W16))
-
- -- To widen an unsigned word we don't have to do anything.
- -- Just leave it in the same register and mark the result as the new size.
- MO_UU_Conv W8 W16 -> conversionNop (intFormat W16) x
- MO_UU_Conv W8 W32 -> conversionNop (intFormat W32) x
- MO_UU_Conv W16 W32 -> conversionNop (intFormat W32) x
-
-
- -- Signed integer word size conversions ------------
-
- -- Mask out high bits when narrowing them
- MO_SS_Conv W16 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8))
- MO_SS_Conv W32 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8))
- MO_SS_Conv W32 W16 -> trivialCode W16 (AND False) x (CmmLit (CmmInt 65535 W16))
-
- -- Sign extend signed words when widening them.
- MO_SS_Conv W8 W16 -> integerExtend W8 W16 x
- MO_SS_Conv W8 W32 -> integerExtend W8 W32 x
- MO_SS_Conv W16 W32 -> integerExtend W16 W32 x
-
- _ -> panic ("Unknown unary mach op: " ++ show mop)
-
-
--- Binary machine ops
-getRegister (CmmMachOp mop [x, y])
- = case mop of
- MO_Eq _ -> condIntReg EQQ x y
- MO_Ne _ -> condIntReg NE x y
-
- MO_S_Gt _ -> condIntReg GTT x y
- MO_S_Ge _ -> condIntReg GE x y
- MO_S_Lt _ -> condIntReg LTT x y
- MO_S_Le _ -> condIntReg LE x y
-
- MO_U_Gt W32 -> condIntReg GU x y
- MO_U_Ge W32 -> condIntReg GEU x y
- MO_U_Lt W32 -> condIntReg LU x y
- MO_U_Le W32 -> condIntReg LEU x y
-
- MO_U_Gt W16 -> condIntReg GU x y
- MO_U_Ge W16 -> condIntReg GEU x y
- MO_U_Lt W16 -> condIntReg LU x y
- MO_U_Le W16 -> condIntReg LEU x y
-
- MO_Add W32 -> trivialCode W32 (ADD False False) x y
- MO_Sub W32 -> trivialCode W32 (SUB False False) x y
-
- MO_S_MulMayOflo rep -> imulMayOflo rep x y
-
- MO_S_Quot W32 -> idiv True False x y
- MO_U_Quot W32 -> idiv False False x y
-
- MO_S_Rem W32 -> irem True x y
- MO_U_Rem W32 -> irem False x y
-
- MO_F_Eq _ -> condFltReg EQQ x y
- MO_F_Ne _ -> condFltReg NE x y
-
- MO_F_Gt _ -> condFltReg GTT x y
- MO_F_Ge _ -> condFltReg GE x y
- MO_F_Lt _ -> condFltReg LTT x y
- MO_F_Le _ -> condFltReg LE x y
-
- MO_F_Add w -> trivialFCode w FADD x y
- MO_F_Sub w -> trivialFCode w FSUB x y
- MO_F_Mul w -> trivialFCode w FMUL x y
- MO_F_Quot w -> trivialFCode w FDIV x y
-
- MO_And rep -> trivialCode rep (AND False) x y
- MO_Or rep -> trivialCode rep (OR False) x y
- MO_Xor rep -> trivialCode rep (XOR False) x y
-
- MO_Mul rep -> trivialCode rep (SMUL False) x y
-
- MO_Shl rep -> trivialCode rep SLL x y
- MO_U_Shr rep -> trivialCode rep SRL x y
- MO_S_Shr rep -> trivialCode rep SRA x y
-
- _ -> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
-
-getRegister (CmmLoad mem pk) = do
- Amode src code <- getAmode mem
- let
- code__2 dst = code `snocOL` LD (cmmTypeFormat pk) src dst
- return (Any (cmmTypeFormat pk) code__2)
-
-getRegister (CmmLit (CmmInt i _))
- | fits13Bits i
- = let
- src = ImmInt (fromInteger i)
- code dst = unitOL (OR False g0 (RIImm src) dst)
- in
- return (Any II32 code)
-
-getRegister (CmmLit lit)
- = let imm = litToImm lit
- code dst = toOL [
- SETHI (HI imm) dst,
- OR False dst (RIImm (LO imm)) dst]
- in return (Any II32 code)
-
-
-getRegister _
- = panic "SPARC.CodeGen.Gen32.getRegister: no match"
-
-
--- | sign extend and widen
-integerExtend
- :: Width -- ^ width of source expression
- -> Width -- ^ width of result
- -> CmmExpr -- ^ source expression
- -> NatM Register
-
-integerExtend from to expr
- = do -- load the expr into some register
- (reg, e_code) <- getSomeReg expr
- tmp <- getNewRegNat II32
- let bitCount
- = case (from, to) of
- (W8, W32) -> 24
- (W16, W32) -> 16
- (W8, W16) -> 24
- _ -> panic "SPARC.CodeGen.Gen32: no match"
- let code dst
- = e_code
-
- -- local shift word left to load the sign bit
- `snocOL` SLL reg (RIImm (ImmInt bitCount)) tmp
-
- -- arithmetic shift right to sign extend
- `snocOL` SRA tmp (RIImm (ImmInt bitCount)) dst
-
- return (Any (intFormat to) code)
-
-
--- | For nop word format conversions we set the resulting value to have the
--- required size, but don't need to generate any actual code.
---
-conversionNop
- :: Format -> CmmExpr -> NatM Register
-
-conversionNop new_rep expr
- = do e_code <- getRegister expr
- return (setFormatOfRegister e_code new_rep)
-
-
-
--- | Generate an integer division instruction.
-idiv :: Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
-
--- For unsigned division with a 32 bit numerator,
--- we can just clear the Y register.
-idiv False cc x y
- = do
- (a_reg, a_code) <- getSomeReg x
- (b_reg, b_code) <- getSomeReg y
-
- let code dst
- = a_code
- `appOL` b_code
- `appOL` toOL
- [ WRY g0 g0
- , UDIV cc a_reg (RIReg b_reg) dst]
-
- return (Any II32 code)
-
-
--- For _signed_ division with a 32 bit numerator,
--- we have to sign extend the numerator into the Y register.
-idiv True cc x y
- = do
- (a_reg, a_code) <- getSomeReg x
- (b_reg, b_code) <- getSomeReg y
-
- tmp <- getNewRegNat II32
-
- let code dst
- = a_code
- `appOL` b_code
- `appOL` toOL
- [ SRA a_reg (RIImm (ImmInt 16)) tmp -- sign extend
- , SRA tmp (RIImm (ImmInt 16)) tmp
-
- , WRY tmp g0
- , SDIV cc a_reg (RIReg b_reg) dst]
-
- return (Any II32 code)
-
-
--- | Do an integer remainder.
---
--- NOTE: The SPARC v8 architecture manual says that integer division
--- instructions _may_ generate a remainder, depending on the implementation.
--- If so it is _recommended_ that the remainder is placed in the Y register.
---
--- The UltraSparc 2007 manual says Y is _undefined_ after division.
---
--- The SPARC T2 doesn't store the remainder, not sure about the others.
--- It's probably best not to worry about it, and just generate our own
--- remainders.
---
-irem :: Bool -> CmmExpr -> CmmExpr -> NatM Register
-
--- For unsigned operands:
--- Division is between a 64 bit numerator and a 32 bit denominator,
--- so we still have to clear the Y register.
-irem False x y
- = do
- (a_reg, a_code) <- getSomeReg x
- (b_reg, b_code) <- getSomeReg y
-
- tmp_reg <- getNewRegNat II32
-
- let code dst
- = a_code
- `appOL` b_code
- `appOL` toOL
- [ WRY g0 g0
- , UDIV False a_reg (RIReg b_reg) tmp_reg
- , UMUL False tmp_reg (RIReg b_reg) tmp_reg
- , SUB False False a_reg (RIReg tmp_reg) dst]
-
- return (Any II32 code)
-
-
-
--- For signed operands:
--- Make sure to sign extend into the Y register, or the remainder
--- will have the wrong sign when the numerator is negative.
---
--- TODO: When sign extending, GCC only shifts the a_reg right by 17 bits,
--- not the full 32. Not sure why this is, something to do with overflow?
--- If anyone cares enough about the speed of signed remainder they
--- can work it out themselves (then tell me). -- BL 2009/01/20
-irem True x y
- = do
- (a_reg, a_code) <- getSomeReg x
- (b_reg, b_code) <- getSomeReg y
-
- tmp1_reg <- getNewRegNat II32
- tmp2_reg <- getNewRegNat II32
-
- let code dst
- = a_code
- `appOL` b_code
- `appOL` toOL
- [ SRA a_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend
- , SRA tmp1_reg (RIImm (ImmInt 16)) tmp1_reg -- sign extend
- , WRY tmp1_reg g0
-
- , SDIV False a_reg (RIReg b_reg) tmp2_reg
- , SMUL False tmp2_reg (RIReg b_reg) tmp2_reg
- , SUB False False a_reg (RIReg tmp2_reg) dst]
-
- return (Any II32 code)
-
-
-imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
-imulMayOflo rep a b
- = do
- (a_reg, a_code) <- getSomeReg a
- (b_reg, b_code) <- getSomeReg b
- res_lo <- getNewRegNat II32
- res_hi <- getNewRegNat II32
-
- let shift_amt = case rep of
- W32 -> 31
- W64 -> 63
- _ -> panic "shift_amt"
-
- let code dst = a_code `appOL` b_code `appOL`
- toOL [
- SMUL False a_reg (RIReg b_reg) res_lo,
- RDY res_hi,
- SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
- SUB False False res_lo (RIReg res_hi) dst
- ]
- return (Any II32 code)
-
-
--- -----------------------------------------------------------------------------
--- 'trivial*Code': deal with trivial instructions
-
--- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
--- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
--- Only look for constants on the right hand side, because that's
--- where the generic optimizer will have put them.
-
--- Similarly, for unary instructions, we don't have to worry about
--- matching an StInt as the argument, because genericOpt will already
--- have handled the constant-folding.
-
-trivialCode
- :: Width
- -> (Reg -> RI -> Reg -> Instr)
- -> CmmExpr
- -> CmmExpr
- -> NatM Register
-
-trivialCode _ instr x (CmmLit (CmmInt y _))
- | fits13Bits y
- = do
- (src1, code) <- getSomeReg x
- let
- src2 = ImmInt (fromInteger y)
- code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
- return (Any II32 code__2)
-
-
-trivialCode _ instr x y = do
- (src1, code1) <- getSomeReg x
- (src2, code2) <- getSomeReg y
- let
- code__2 dst = code1 `appOL` code2 `snocOL`
- instr src1 (RIReg src2) dst
- return (Any II32 code__2)
-
-
-trivialFCode
- :: Width
- -> (Format -> Reg -> Reg -> Reg -> Instr)
- -> CmmExpr
- -> CmmExpr
- -> NatM Register
-
-trivialFCode pk instr x y = do
- dflags <- getDynFlags
- (src1, code1) <- getSomeReg x
- (src2, code2) <- getSomeReg y
- tmp <- getNewRegNat FF64
- let
- promote x = FxTOy FF32 FF64 x tmp
-
- pk1 = cmmExprType dflags x
- pk2 = cmmExprType dflags y
-
- code__2 dst =
- if pk1 `cmmEqType` pk2 then
- code1 `appOL` code2 `snocOL`
- instr (floatFormat pk) src1 src2 dst
- else if typeWidth pk1 == W32 then
- code1 `snocOL` promote src1 `appOL` code2 `snocOL`
- instr FF64 tmp src2 dst
- else
- code1 `appOL` code2 `snocOL` promote src2 `snocOL`
- instr FF64 src1 tmp dst
- return (Any (cmmTypeFormat $ if pk1 `cmmEqType` pk2 then pk1 else cmmFloat W64)
- code__2)
-
-
-
-trivialUCode
- :: Format
- -> (RI -> Reg -> Instr)
- -> CmmExpr
- -> NatM Register
-
-trivialUCode format instr x = do
- (src, code) <- getSomeReg x
- let
- code__2 dst = code `snocOL` instr (RIReg src) dst
- return (Any format code__2)
-
-
-trivialUFCode
- :: Format
- -> (Reg -> Reg -> Instr)
- -> CmmExpr
- -> NatM Register
-
-trivialUFCode pk instr x = do
- (src, code) <- getSomeReg x
- let
- code__2 dst = code `snocOL` instr src dst
- return (Any pk code__2)
-
-
-
-
--- Coercions -------------------------------------------------------------------
-
--- | Coerce a integer value to floating point
-coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
-coerceInt2FP width1 width2 x = do
- (src, code) <- getSomeReg x
- let
- code__2 dst = code `appOL` toOL [
- ST (intFormat width1) src (spRel (-2)),
- LD (intFormat width1) (spRel (-2)) dst,
- FxTOy (intFormat width1) (floatFormat width2) dst dst]
- return (Any (floatFormat $ width2) code__2)
-
-
-
--- | Coerce a floating point value to integer
---
--- NOTE: On sparc v9 there are no instructions to move a value from an
--- FP register directly to an int register, so we have to use a load/store.
---
-coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
-coerceFP2Int width1 width2 x
- = do let fformat1 = floatFormat width1
- fformat2 = floatFormat width2
-
- iformat2 = intFormat width2
-
- (fsrc, code) <- getSomeReg x
- fdst <- getNewRegNat fformat2
-
- let code2 dst
- = code
- `appOL` toOL
- -- convert float to int format, leaving it in a float reg.
- [ FxTOy fformat1 iformat2 fsrc fdst
-
- -- store the int into mem, then load it back to move
- -- it into an actual int reg.
- , ST fformat2 fdst (spRel (-2))
- , LD iformat2 (spRel (-2)) dst]
-
- return (Any iformat2 code2)
-
-
--- | Coerce a double precision floating point value to single precision.
-coerceDbl2Flt :: CmmExpr -> NatM Register
-coerceDbl2Flt x = do
- (src, code) <- getSomeReg x
- return (Any FF32 (\dst -> code `snocOL` FxTOy FF64 FF32 src dst))
-
-
--- | Coerce a single precision floating point value to double precision
-coerceFlt2Dbl :: CmmExpr -> NatM Register
-coerceFlt2Dbl x = do
- (src, code) <- getSomeReg x
- return (Any FF64 (\dst -> code `snocOL` FxTOy FF32 FF64 src dst))
-
-
-
-
--- Condition Codes -------------------------------------------------------------
---
--- Evaluate a comparison, and get the result into a register.
---
--- Do not fill the delay slots here. you will confuse the register allocator.
---
-condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
-condIntReg EQQ x (CmmLit (CmmInt 0 _)) = do
- (src, code) <- getSomeReg x
- let
- code__2 dst = code `appOL` toOL [
- SUB False True g0 (RIReg src) g0,
- SUB True False g0 (RIImm (ImmInt (-1))) dst]
- return (Any II32 code__2)
-
-condIntReg EQQ x y = do
- (src1, code1) <- getSomeReg x
- (src2, code2) <- getSomeReg y
- let
- code__2 dst = code1 `appOL` code2 `appOL` toOL [
- XOR False src1 (RIReg src2) dst,
- SUB False True g0 (RIReg dst) g0,
- SUB True False g0 (RIImm (ImmInt (-1))) dst]
- return (Any II32 code__2)
-
-condIntReg NE x (CmmLit (CmmInt 0 _)) = do
- (src, code) <- getSomeReg x
- let
- code__2 dst = code `appOL` toOL [
- SUB False True g0 (RIReg src) g0,
- ADD True False g0 (RIImm (ImmInt 0)) dst]
- return (Any II32 code__2)
-
-condIntReg NE x y = do
- (src1, code1) <- getSomeReg x
- (src2, code2) <- getSomeReg y
- let
- code__2 dst = code1 `appOL` code2 `appOL` toOL [
- XOR False src1 (RIReg src2) dst,
- SUB False True g0 (RIReg dst) g0,
- ADD True False g0 (RIImm (ImmInt 0)) dst]
- return (Any II32 code__2)
-
-condIntReg cond x y = do
- bid1 <- liftM (\a -> seq a a) getBlockIdNat
- bid2 <- liftM (\a -> seq a a) getBlockIdNat
- CondCode _ cond cond_code <- condIntCode cond x y
- let
- code__2 dst
- = cond_code
- `appOL` toOL
- [ BI cond False bid1
- , NOP
-
- , OR False g0 (RIImm (ImmInt 0)) dst
- , BI ALWAYS False bid2
- , NOP
-
- , NEWBLOCK bid1
- , OR False g0 (RIImm (ImmInt 1)) dst
- , BI ALWAYS False bid2
- , NOP
-
- , NEWBLOCK bid2]
-
- return (Any II32 code__2)
-
-
-condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
-condFltReg cond x y = do
- bid1 <- liftM (\a -> seq a a) getBlockIdNat
- bid2 <- liftM (\a -> seq a a) getBlockIdNat
-
- CondCode _ cond cond_code <- condFltCode cond x y
- let
- code__2 dst
- = cond_code
- `appOL` toOL
- [ NOP
- , BF cond False bid1
- , NOP
-
- , OR False g0 (RIImm (ImmInt 0)) dst
- , BI ALWAYS False bid2
- , NOP
-
- , NEWBLOCK bid1
- , OR False g0 (RIImm (ImmInt 1)) dst
- , BI ALWAYS False bid2
- , NOP
-
- , NEWBLOCK bid2 ]
-
- return (Any II32 code__2)
diff --git a/compiler/nativeGen/SPARC/CodeGen/Gen32.hs-boot b/compiler/nativeGen/SPARC/CodeGen/Gen32.hs-boot
deleted file mode 100644
index 1dbd2d3612..0000000000
--- a/compiler/nativeGen/SPARC/CodeGen/Gen32.hs-boot
+++ /dev/null
@@ -1,16 +0,0 @@
-
-module SPARC.CodeGen.Gen32 (
- getSomeReg,
- getRegister
-)
-
-where
-
-import SPARC.CodeGen.Base
-import NCGMonad
-import Reg
-
-import GHC.Cmm
-
-getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
-getRegister :: CmmExpr -> NatM Register
diff --git a/compiler/nativeGen/SPARC/CodeGen/Gen64.hs b/compiler/nativeGen/SPARC/CodeGen/Gen64.hs
deleted file mode 100644
index 7c04101ec4..0000000000
--- a/compiler/nativeGen/SPARC/CodeGen/Gen64.hs
+++ /dev/null
@@ -1,216 +0,0 @@
--- | Evaluation of 64 bit values on 32 bit platforms.
-module SPARC.CodeGen.Gen64 (
- assignMem_I64Code,
- assignReg_I64Code,
- iselExpr64
-)
-
-where
-
-import GhcPrelude
-
-import {-# SOURCE #-} SPARC.CodeGen.Gen32
-import SPARC.CodeGen.Base
-import SPARC.CodeGen.Amode
-import SPARC.Regs
-import SPARC.AddrMode
-import SPARC.Imm
-import SPARC.Instr
--- import SPARC.Ppr()
-import NCGMonad
-import Instruction
-import Format
-import Reg
-
-import GHC.Cmm
-
-import GHC.Driver.Session
-import OrdList
-import Outputable
-
--- | Code to assign a 64 bit value to memory.
-assignMem_I64Code
- :: CmmExpr -- ^ expr producing the destination address
- -> CmmExpr -- ^ expr producing the source value.
- -> NatM InstrBlock
-
-assignMem_I64Code addrTree valueTree
- = do
- ChildCode64 vcode rlo <- iselExpr64 valueTree
-
- (src, acode) <- getSomeReg addrTree
- let
- rhi = getHiVRegFromLo rlo
-
- -- Big-endian store
- mov_hi = ST II32 rhi (AddrRegImm src (ImmInt 0))
- mov_lo = ST II32 rlo (AddrRegImm src (ImmInt 4))
-
- code = vcode `appOL` acode `snocOL` mov_hi `snocOL` mov_lo
-
-{- pprTrace "assignMem_I64Code"
- (vcat [ text "addrTree: " <+> ppr addrTree
- , text "valueTree: " <+> ppr valueTree
- , text "vcode:"
- , vcat $ map ppr $ fromOL vcode
- , text ""
- , text "acode:"
- , vcat $ map ppr $ fromOL acode ])
- $ -}
- return code
-
-
--- | Code to assign a 64 bit value to a register.
-assignReg_I64Code
- :: CmmReg -- ^ the destination register
- -> CmmExpr -- ^ expr producing the source value
- -> NatM InstrBlock
-
-assignReg_I64Code (CmmLocal (LocalReg u_dst pk)) valueTree
- = do
- ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
- let
- r_dst_lo = RegVirtual $ mkVirtualReg u_dst (cmmTypeFormat pk)
- r_dst_hi = getHiVRegFromLo r_dst_lo
- r_src_hi = getHiVRegFromLo r_src_lo
- mov_lo = mkMOV r_src_lo r_dst_lo
- mov_hi = mkMOV r_src_hi r_dst_hi
- mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
-
- return (vcode `snocOL` mov_hi `snocOL` mov_lo)
-
-assignReg_I64Code _ _
- = panic "assignReg_I64Code(sparc): invalid lvalue"
-
-
-
-
--- | Get the value of an expression into a 64 bit register.
-
-iselExpr64 :: CmmExpr -> NatM ChildCode64
-
--- Load a 64 bit word
-iselExpr64 (CmmLoad addrTree ty)
- | isWord64 ty
- = do Amode amode addr_code <- getAmode addrTree
- let result
-
- | AddrRegReg r1 r2 <- amode
- = do rlo <- getNewRegNat II32
- tmp <- getNewRegNat II32
- let rhi = getHiVRegFromLo rlo
-
- return $ ChildCode64
- ( addr_code
- `appOL` toOL
- [ ADD False False r1 (RIReg r2) tmp
- , LD II32 (AddrRegImm tmp (ImmInt 0)) rhi
- , LD II32 (AddrRegImm tmp (ImmInt 4)) rlo ])
- rlo
-
- | AddrRegImm r1 (ImmInt i) <- amode
- = do rlo <- getNewRegNat II32
- let rhi = getHiVRegFromLo rlo
-
- return $ ChildCode64
- ( addr_code
- `appOL` toOL
- [ LD II32 (AddrRegImm r1 (ImmInt $ 0 + i)) rhi
- , LD II32 (AddrRegImm r1 (ImmInt $ 4 + i)) rlo ])
- rlo
-
- | otherwise
- = panic "SPARC.CodeGen.Gen64: no match"
-
- result
-
-
--- Add a literal to a 64 bit integer
-iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)])
- = do ChildCode64 code1 r1_lo <- iselExpr64 e1
- let r1_hi = getHiVRegFromLo r1_lo
-
- r_dst_lo <- getNewRegNat II32
- let r_dst_hi = getHiVRegFromLo r_dst_lo
-
- let code = code1
- `appOL` toOL
- [ ADD False True r1_lo (RIImm (ImmInteger i)) r_dst_lo
- , ADD True False r1_hi (RIReg g0) r_dst_hi ]
-
- return $ ChildCode64 code r_dst_lo
-
-
--- Addition of II64
-iselExpr64 (CmmMachOp (MO_Add _) [e1, e2])
- = do ChildCode64 code1 r1_lo <- iselExpr64 e1
- let r1_hi = getHiVRegFromLo r1_lo
-
- ChildCode64 code2 r2_lo <- iselExpr64 e2
- let r2_hi = getHiVRegFromLo r2_lo
-
- r_dst_lo <- getNewRegNat II32
- let r_dst_hi = getHiVRegFromLo r_dst_lo
-
- let code = code1
- `appOL` code2
- `appOL` toOL
- [ ADD False True r1_lo (RIReg r2_lo) r_dst_lo
- , ADD True False r1_hi (RIReg r2_hi) r_dst_hi ]
-
- return $ ChildCode64 code r_dst_lo
-
-
-iselExpr64 (CmmReg (CmmLocal (LocalReg uq ty)))
- | isWord64 ty
- = do
- r_dst_lo <- getNewRegNat II32
- let r_dst_hi = getHiVRegFromLo r_dst_lo
- r_src_lo = RegVirtual $ mkVirtualReg uq II32
- r_src_hi = getHiVRegFromLo r_src_lo
- mov_lo = mkMOV r_src_lo r_dst_lo
- mov_hi = mkMOV r_src_hi r_dst_hi
- mkMOV sreg dreg = OR False g0 (RIReg sreg) dreg
- return (
- ChildCode64 (toOL [mov_hi, mov_lo]) r_dst_lo
- )
-
--- Convert something into II64
-iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr])
- = do
- r_dst_lo <- getNewRegNat II32
- let r_dst_hi = getHiVRegFromLo r_dst_lo
-
- -- compute expr and load it into r_dst_lo
- (a_reg, a_code) <- getSomeReg expr
-
- dflags <- getDynFlags
- let platform = targetPlatform dflags
- code = a_code
- `appOL` toOL
- [ mkRegRegMoveInstr platform g0 r_dst_hi -- clear high 32 bits
- , mkRegRegMoveInstr platform a_reg r_dst_lo ]
-
- return $ ChildCode64 code r_dst_lo
-
--- only W32 supported for now
-iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr])
- = do
- r_dst_lo <- getNewRegNat II32
- let r_dst_hi = getHiVRegFromLo r_dst_lo
-
- -- compute expr and load it into r_dst_lo
- (a_reg, a_code) <- getSomeReg expr
-
- dflags <- getDynFlags
- let platform = targetPlatform dflags
- code = a_code
- `appOL` toOL
- [ SRA a_reg (RIImm (ImmInt 31)) r_dst_hi
- , mkRegRegMoveInstr platform a_reg r_dst_lo ]
-
- return $ ChildCode64 code r_dst_lo
-
-
-iselExpr64 expr
- = pprPanic "iselExpr64(sparc)" (ppr expr)
diff --git a/compiler/nativeGen/SPARC/CodeGen/Sanity.hs b/compiler/nativeGen/SPARC/CodeGen/Sanity.hs
deleted file mode 100644
index b60c958a73..0000000000
--- a/compiler/nativeGen/SPARC/CodeGen/Sanity.hs
+++ /dev/null
@@ -1,69 +0,0 @@
--- | One ounce of sanity checking is worth 10000000000000000 ounces
--- of staring blindly at assembly code trying to find the problem..
-module SPARC.CodeGen.Sanity (
- checkBlock
-)
-
-where
-
-import GhcPrelude
-
-import SPARC.Instr
-import SPARC.Ppr () -- For Outputable instances
-import Instruction
-
-import GHC.Cmm
-
-import Outputable
-
-
--- | Enforce intra-block invariants.
---
-checkBlock :: CmmBlock
- -> NatBasicBlock Instr
- -> NatBasicBlock Instr
-
-checkBlock cmm block@(BasicBlock _ instrs)
- | checkBlockInstrs instrs
- = block
-
- | otherwise
- = pprPanic
- ("SPARC.CodeGen: bad block\n")
- ( vcat [ text " -- cmm -----------------\n"
- , ppr cmm
- , text " -- native code ---------\n"
- , ppr block ])
-
-
-checkBlockInstrs :: [Instr] -> Bool
-checkBlockInstrs ii
-
- -- An unconditional jumps end the block.
- -- There must be an unconditional jump in the block, otherwise
- -- the register liveness determinator will get the liveness
- -- information wrong.
- --
- -- If the block ends with a cmm call that never returns
- -- then there can be unreachable instructions after the jump,
- -- but we don't mind here.
- --
- | instr : NOP : _ <- ii
- , isUnconditionalJump instr
- = True
-
- -- All jumps must have a NOP in their branch delay slot.
- -- The liveness determinator and register allocators aren't smart
- -- enough to handle branch delay slots.
- --
- | instr : NOP : is <- ii
- , isJumpishInstr instr
- = checkBlockInstrs is
-
- -- keep checking
- | _:i2:is <- ii
- = checkBlockInstrs (i2:is)
-
- -- this block is no good
- | otherwise
- = False
diff --git a/compiler/nativeGen/SPARC/Cond.hs b/compiler/nativeGen/SPARC/Cond.hs
deleted file mode 100644
index 3fbfb8603f..0000000000
--- a/compiler/nativeGen/SPARC/Cond.hs
+++ /dev/null
@@ -1,54 +0,0 @@
-module SPARC.Cond (
- Cond(..),
- condUnsigned,
- condToSigned,
- condToUnsigned
-)
-
-where
-
-import GhcPrelude
-
--- | Branch condition codes.
-data Cond
- = ALWAYS
- | EQQ
- | GE
- | GEU
- | GTT
- | GU
- | LE
- | LEU
- | LTT
- | LU
- | NE
- | NEG
- | NEVER
- | POS
- | VC
- | VS
- deriving Eq
-
-
-condUnsigned :: Cond -> Bool
-condUnsigned GU = True
-condUnsigned LU = True
-condUnsigned GEU = True
-condUnsigned LEU = True
-condUnsigned _ = False
-
-
-condToSigned :: Cond -> Cond
-condToSigned GU = GTT
-condToSigned LU = LTT
-condToSigned GEU = GE
-condToSigned LEU = LE
-condToSigned x = x
-
-
-condToUnsigned :: Cond -> Cond
-condToUnsigned GTT = GU
-condToUnsigned LTT = LU
-condToUnsigned GE = GEU
-condToUnsigned LE = LEU
-condToUnsigned x = x
diff --git a/compiler/nativeGen/SPARC/Imm.hs b/compiler/nativeGen/SPARC/Imm.hs
deleted file mode 100644
index 78b6612bbf..0000000000
--- a/compiler/nativeGen/SPARC/Imm.hs
+++ /dev/null
@@ -1,67 +0,0 @@
-module SPARC.Imm (
- -- immediate values
- Imm(..),
- strImmLit,
- litToImm
-)
-
-where
-
-import GhcPrelude
-
-import GHC.Cmm
-import GHC.Cmm.CLabel
-
-import Outputable
-
--- | An immediate value.
--- Not all of these are directly representable by the machine.
--- Things like ImmLit are slurped out and put in a data segment instead.
---
-data Imm
- = ImmInt Int
-
- -- Sigh.
- | ImmInteger Integer
-
- -- AbstractC Label (with baggage)
- | ImmCLbl CLabel
-
- -- Simple string
- | ImmLit SDoc
- | ImmIndex CLabel Int
- | ImmFloat Rational
- | ImmDouble Rational
-
- | ImmConstantSum Imm Imm
- | ImmConstantDiff Imm Imm
-
- | LO Imm
- | HI Imm
-
-
--- | Create a ImmLit containing this string.
-strImmLit :: String -> Imm
-strImmLit s = ImmLit (text s)
-
-
--- | Convert a CmmLit to an Imm.
--- Narrow to the width: a CmmInt might be out of
--- range, but we assume that ImmInteger only contains
--- in-range values. A signed value should be fine here.
---
-litToImm :: CmmLit -> Imm
-litToImm lit
- = case lit of
- CmmInt i w -> ImmInteger (narrowS w i)
- CmmFloat f W32 -> ImmFloat f
- CmmFloat f W64 -> ImmDouble f
- CmmLabel l -> ImmCLbl l
- CmmLabelOff l off -> ImmIndex l off
-
- CmmLabelDiffOff l1 l2 off _
- -> ImmConstantSum
- (ImmConstantDiff (ImmCLbl l1) (ImmCLbl l2))
- (ImmInt off)
-
- _ -> panic "SPARC.Regs.litToImm: no match"
diff --git a/compiler/nativeGen/SPARC/Instr.hs b/compiler/nativeGen/SPARC/Instr.hs
deleted file mode 100644
index d49d82fa7e..0000000000
--- a/compiler/nativeGen/SPARC/Instr.hs
+++ /dev/null
@@ -1,481 +0,0 @@
-{-# LANGUAGE CPP #-}
-
------------------------------------------------------------------------------
---
--- Machine-dependent assembly language
---
--- (c) The University of Glasgow 1993-2004
---
------------------------------------------------------------------------------
-#include "HsVersions.h"
-
-module SPARC.Instr (
- RI(..),
- riZero,
-
- fpRelEA,
- moveSp,
-
- isUnconditionalJump,
-
- Instr(..),
- maxSpillSlots
-)
-
-where
-
-import GhcPrelude
-
-import SPARC.Stack
-import SPARC.Imm
-import SPARC.AddrMode
-import SPARC.Cond
-import SPARC.Regs
-import SPARC.Base
-import TargetReg
-import Instruction
-import RegClass
-import Reg
-import Format
-
-import GHC.Cmm.CLabel
-import GHC.Platform.Regs
-import GHC.Cmm.BlockId
-import GHC.Driver.Session
-import GHC.Cmm
-import FastString
-import Outputable
-import GHC.Platform
-
-
--- | Register or immediate
-data RI
- = RIReg Reg
- | RIImm Imm
-
--- | Check if a RI represents a zero value.
--- - a literal zero
--- - register %g0, which is always zero.
---
-riZero :: RI -> Bool
-riZero (RIImm (ImmInt 0)) = True
-riZero (RIImm (ImmInteger 0)) = True
-riZero (RIReg (RegReal (RealRegSingle 0))) = True
-riZero _ = False
-
-
--- | Calculate the effective address which would be used by the
--- corresponding fpRel sequence.
-fpRelEA :: Int -> Reg -> Instr
-fpRelEA n dst
- = ADD False False fp (RIImm (ImmInt (n * wordLength))) dst
-
-
--- | Code to shift the stack pointer by n words.
-moveSp :: Int -> Instr
-moveSp n
- = ADD False False sp (RIImm (ImmInt (n * wordLength))) sp
-
--- | An instruction that will cause the one after it never to be exectuted
-isUnconditionalJump :: Instr -> Bool
-isUnconditionalJump ii
- = case ii of
- CALL{} -> True
- JMP{} -> True
- JMP_TBL{} -> True
- BI ALWAYS _ _ -> True
- BF ALWAYS _ _ -> True
- _ -> False
-
-
--- | instance for sparc instruction set
-instance Instruction Instr where
- regUsageOfInstr = sparc_regUsageOfInstr
- patchRegsOfInstr = sparc_patchRegsOfInstr
- isJumpishInstr = sparc_isJumpishInstr
- jumpDestsOfInstr = sparc_jumpDestsOfInstr
- patchJumpInstr = sparc_patchJumpInstr
- mkSpillInstr = sparc_mkSpillInstr
- mkLoadInstr = sparc_mkLoadInstr
- takeDeltaInstr = sparc_takeDeltaInstr
- isMetaInstr = sparc_isMetaInstr
- mkRegRegMoveInstr = sparc_mkRegRegMoveInstr
- takeRegRegMoveInstr = sparc_takeRegRegMoveInstr
- mkJumpInstr = sparc_mkJumpInstr
- mkStackAllocInstr = panic "no sparc_mkStackAllocInstr"
- mkStackDeallocInstr = panic "no sparc_mkStackDeallocInstr"
-
-
--- | SPARC instruction set.
--- Not complete. This is only the ones we need.
---
-data Instr
-
- -- meta ops --------------------------------------------------
- -- comment pseudo-op
- = COMMENT FastString
-
- -- some static data spat out during code generation.
- -- Will be extracted before pretty-printing.
- | LDATA Section RawCmmStatics
-
- -- Start a new basic block. Useful during codegen, removed later.
- -- Preceding instruction should be a jump, as per the invariants
- -- for a BasicBlock (see Cmm).
- | NEWBLOCK BlockId
-
- -- specify current stack offset for benefit of subsequent passes.
- | DELTA Int
-
- -- real instrs -----------------------------------------------
- -- Loads and stores.
- | LD Format AddrMode Reg -- format, src, dst
- | ST Format Reg AddrMode -- format, src, dst
-
- -- Int Arithmetic.
- -- x: add/sub with carry bit.
- -- In SPARC V9 addx and friends were renamed addc.
- --
- -- cc: modify condition codes
- --
- | ADD Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
- | SUB Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
-
- | UMUL Bool Reg RI Reg -- cc?, src1, src2, dst
- | SMUL Bool Reg RI Reg -- cc?, src1, src2, dst
-
-
- -- The SPARC divide instructions perform 64bit by 32bit division
- -- The Y register is xored into the first operand.
-
- -- On _some implementations_ the Y register is overwritten by
- -- the remainder, so we have to make sure it is 0 each time.
-
- -- dst <- ((Y `shiftL` 32) `or` src1) `div` src2
- | UDIV Bool Reg RI Reg -- cc?, src1, src2, dst
- | SDIV Bool Reg RI Reg -- cc?, src1, src2, dst
-
- | RDY Reg -- move contents of Y register to reg
- | WRY Reg Reg -- Y <- src1 `xor` src2
-
- -- Logic operations.
- | AND Bool Reg RI Reg -- cc?, src1, src2, dst
- | ANDN Bool Reg RI Reg -- cc?, src1, src2, dst
- | OR Bool Reg RI Reg -- cc?, src1, src2, dst
- | ORN Bool Reg RI Reg -- cc?, src1, src2, dst
- | XOR Bool Reg RI Reg -- cc?, src1, src2, dst
- | XNOR Bool Reg RI Reg -- cc?, src1, src2, dst
- | SLL Reg RI Reg -- src1, src2, dst
- | SRL Reg RI Reg -- src1, src2, dst
- | SRA Reg RI Reg -- src1, src2, dst
-
- -- Load immediates.
- | SETHI Imm Reg -- src, dst
-
- -- Do nothing.
- -- Implemented by the assembler as SETHI 0, %g0, but worth an alias
- | NOP
-
- -- Float Arithmetic.
- -- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single
- -- instructions right up until we spit them out.
- --
- | FABS Format Reg Reg -- src dst
- | FADD Format Reg Reg Reg -- src1, src2, dst
- | FCMP Bool Format Reg Reg -- exception?, src1, src2, dst
- | FDIV Format Reg Reg Reg -- src1, src2, dst
- | FMOV Format Reg Reg -- src, dst
- | FMUL Format Reg Reg Reg -- src1, src2, dst
- | FNEG Format Reg Reg -- src, dst
- | FSQRT Format Reg Reg -- src, dst
- | FSUB Format Reg Reg Reg -- src1, src2, dst
- | FxTOy Format Format Reg Reg -- src, dst
-
- -- Jumping around.
- | BI Cond Bool BlockId -- cond, annul?, target
- | BF Cond Bool BlockId -- cond, annul?, target
-
- | JMP AddrMode -- target
-
- -- With a tabled jump we know all the possible destinations.
- -- We also need this info so we can work out what regs are live across the jump.
- --
- | JMP_TBL AddrMode [Maybe BlockId] CLabel
-
- | CALL (Either Imm Reg) Int Bool -- target, args, terminal
-
-
--- | regUsage returns the sets of src and destination registers used
--- by a particular instruction. Machine registers that are
--- pre-allocated to stgRegs are filtered out, because they are
--- uninteresting from a register allocation standpoint. (We wouldn't
--- want them to end up on the free list!) As far as we are concerned,
--- the fixed registers simply don't exist (for allocation purposes,
--- anyway).
-
--- regUsage doesn't need to do any trickery for jumps and such. Just
--- state precisely the regs read and written by that insn. The
--- consequences of control flow transfers, as far as register
--- allocation goes, are taken care of by the register allocator.
---
-sparc_regUsageOfInstr :: Platform -> Instr -> RegUsage
-sparc_regUsageOfInstr platform instr
- = case instr of
- LD _ addr reg -> usage (regAddr addr, [reg])
- ST _ reg addr -> usage (reg : regAddr addr, [])
- ADD _ _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
- SUB _ _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
- UMUL _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
- SMUL _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
- UDIV _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
- SDIV _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
- RDY rd -> usage ([], [rd])
- WRY r1 r2 -> usage ([r1, r2], [])
- AND _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
- ANDN _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
- OR _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
- ORN _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
- XOR _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
- XNOR _ r1 ar r2 -> usage (r1 : regRI ar, [r2])
- SLL r1 ar r2 -> usage (r1 : regRI ar, [r2])
- SRL r1 ar r2 -> usage (r1 : regRI ar, [r2])
- SRA r1 ar r2 -> usage (r1 : regRI ar, [r2])
- SETHI _ reg -> usage ([], [reg])
- FABS _ r1 r2 -> usage ([r1], [r2])
- FADD _ r1 r2 r3 -> usage ([r1, r2], [r3])
- FCMP _ _ r1 r2 -> usage ([r1, r2], [])
- FDIV _ r1 r2 r3 -> usage ([r1, r2], [r3])
- FMOV _ r1 r2 -> usage ([r1], [r2])
- FMUL _ r1 r2 r3 -> usage ([r1, r2], [r3])
- FNEG _ r1 r2 -> usage ([r1], [r2])
- FSQRT _ r1 r2 -> usage ([r1], [r2])
- FSUB _ r1 r2 r3 -> usage ([r1, r2], [r3])
- FxTOy _ _ r1 r2 -> usage ([r1], [r2])
-
- JMP addr -> usage (regAddr addr, [])
- JMP_TBL addr _ _ -> usage (regAddr addr, [])
-
- CALL (Left _ ) _ True -> noUsage
- CALL (Left _ ) n False -> usage (argRegs n, callClobberedRegs)
- CALL (Right reg) _ True -> usage ([reg], [])
- CALL (Right reg) n False -> usage (reg : (argRegs n), callClobberedRegs)
- _ -> noUsage
-
- where
- usage (src, dst)
- = RU (filter (interesting platform) src)
- (filter (interesting platform) dst)
-
- regAddr (AddrRegReg r1 r2) = [r1, r2]
- regAddr (AddrRegImm r1 _) = [r1]
-
- regRI (RIReg r) = [r]
- regRI _ = []
-
-
--- | Interesting regs are virtuals, or ones that are allocatable
--- by the register allocator.
-interesting :: Platform -> Reg -> Bool
-interesting platform reg
- = case reg of
- RegVirtual _ -> True
- RegReal (RealRegSingle r1) -> freeReg platform r1
- RegReal (RealRegPair r1 _) -> freeReg platform r1
-
-
-
--- | Apply a given mapping to tall the register references in this instruction.
-sparc_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
-sparc_patchRegsOfInstr instr env = case instr of
- LD fmt addr reg -> LD fmt (fixAddr addr) (env reg)
- ST fmt reg addr -> ST fmt (env reg) (fixAddr addr)
-
- ADD x cc r1 ar r2 -> ADD x cc (env r1) (fixRI ar) (env r2)
- SUB x cc r1 ar r2 -> SUB x cc (env r1) (fixRI ar) (env r2)
- UMUL cc r1 ar r2 -> UMUL cc (env r1) (fixRI ar) (env r2)
- SMUL cc r1 ar r2 -> SMUL cc (env r1) (fixRI ar) (env r2)
- UDIV cc r1 ar r2 -> UDIV cc (env r1) (fixRI ar) (env r2)
- SDIV cc r1 ar r2 -> SDIV cc (env r1) (fixRI ar) (env r2)
- RDY rd -> RDY (env rd)
- WRY r1 r2 -> WRY (env r1) (env r2)
- AND b r1 ar r2 -> AND b (env r1) (fixRI ar) (env r2)
- ANDN b r1 ar r2 -> ANDN b (env r1) (fixRI ar) (env r2)
- OR b r1 ar r2 -> OR b (env r1) (fixRI ar) (env r2)
- ORN b r1 ar r2 -> ORN b (env r1) (fixRI ar) (env r2)
- XOR b r1 ar r2 -> XOR b (env r1) (fixRI ar) (env r2)
- XNOR b r1 ar r2 -> XNOR b (env r1) (fixRI ar) (env r2)
- SLL r1 ar r2 -> SLL (env r1) (fixRI ar) (env r2)
- SRL r1 ar r2 -> SRL (env r1) (fixRI ar) (env r2)
- SRA r1 ar r2 -> SRA (env r1) (fixRI ar) (env r2)
-
- SETHI imm reg -> SETHI imm (env reg)
-
- FABS s r1 r2 -> FABS s (env r1) (env r2)
- FADD s r1 r2 r3 -> FADD s (env r1) (env r2) (env r3)
- FCMP e s r1 r2 -> FCMP e s (env r1) (env r2)
- FDIV s r1 r2 r3 -> FDIV s (env r1) (env r2) (env r3)
- FMOV s r1 r2 -> FMOV s (env r1) (env r2)
- FMUL s r1 r2 r3 -> FMUL s (env r1) (env r2) (env r3)
- FNEG s r1 r2 -> FNEG s (env r1) (env r2)
- FSQRT s r1 r2 -> FSQRT s (env r1) (env r2)
- FSUB s r1 r2 r3 -> FSUB s (env r1) (env r2) (env r3)
- FxTOy s1 s2 r1 r2 -> FxTOy s1 s2 (env r1) (env r2)
-
- JMP addr -> JMP (fixAddr addr)
- JMP_TBL addr ids l -> JMP_TBL (fixAddr addr) ids l
-
- CALL (Left i) n t -> CALL (Left i) n t
- CALL (Right r) n t -> CALL (Right (env r)) n t
- _ -> instr
-
- where
- fixAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2)
- fixAddr (AddrRegImm r1 i) = AddrRegImm (env r1) i
-
- fixRI (RIReg r) = RIReg (env r)
- fixRI other = other
-
-
---------------------------------------------------------------------------------
-sparc_isJumpishInstr :: Instr -> Bool
-sparc_isJumpishInstr instr
- = case instr of
- BI{} -> True
- BF{} -> True
- JMP{} -> True
- JMP_TBL{} -> True
- CALL{} -> True
- _ -> False
-
-sparc_jumpDestsOfInstr :: Instr -> [BlockId]
-sparc_jumpDestsOfInstr insn
- = case insn of
- BI _ _ id -> [id]
- BF _ _ id -> [id]
- JMP_TBL _ ids _ -> [id | Just id <- ids]
- _ -> []
-
-
-sparc_patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr
-sparc_patchJumpInstr insn patchF
- = case insn of
- BI cc annul id -> BI cc annul (patchF id)
- BF cc annul id -> BF cc annul (patchF id)
- JMP_TBL n ids l -> JMP_TBL n (map (fmap patchF) ids) l
- _ -> insn
-
-
---------------------------------------------------------------------------------
--- | Make a spill instruction.
--- On SPARC we spill below frame pointer leaving 2 words/spill
-sparc_mkSpillInstr
- :: DynFlags
- -> Reg -- ^ register to spill
- -> Int -- ^ current stack delta
- -> Int -- ^ spill slot to use
- -> Instr
-
-sparc_mkSpillInstr dflags reg _ slot
- = let platform = targetPlatform dflags
- off = spillSlotToOffset dflags slot
- off_w = 1 + (off `div` 4)
- fmt = case targetClassOfReg platform reg of
- RcInteger -> II32
- RcFloat -> FF32
- RcDouble -> FF64
-
- in ST fmt reg (fpRel (negate off_w))
-
-
--- | Make a spill reload instruction.
-sparc_mkLoadInstr
- :: DynFlags
- -> Reg -- ^ register to load into
- -> Int -- ^ current stack delta
- -> Int -- ^ spill slot to use
- -> Instr
-
-sparc_mkLoadInstr dflags reg _ slot
- = let platform = targetPlatform dflags
- off = spillSlotToOffset dflags slot
- off_w = 1 + (off `div` 4)
- fmt = case targetClassOfReg platform reg of
- RcInteger -> II32
- RcFloat -> FF32
- RcDouble -> FF64
-
- in LD fmt (fpRel (- off_w)) reg
-
-
---------------------------------------------------------------------------------
--- | See if this instruction is telling us the current C stack delta
-sparc_takeDeltaInstr
- :: Instr
- -> Maybe Int
-
-sparc_takeDeltaInstr instr
- = case instr of
- DELTA i -> Just i
- _ -> Nothing
-
-
-sparc_isMetaInstr
- :: Instr
- -> Bool
-
-sparc_isMetaInstr instr
- = case instr of
- COMMENT{} -> True
- LDATA{} -> True
- NEWBLOCK{} -> True
- DELTA{} -> True
- _ -> False
-
-
--- | Make a reg-reg move instruction.
--- On SPARC v8 there are no instructions to move directly between
--- floating point and integer regs. If we need to do that then we
--- have to go via memory.
---
-sparc_mkRegRegMoveInstr
- :: Platform
- -> Reg
- -> Reg
- -> Instr
-
-sparc_mkRegRegMoveInstr platform src dst
- | srcClass <- targetClassOfReg platform src
- , dstClass <- targetClassOfReg platform dst
- , srcClass == dstClass
- = case srcClass of
- RcInteger -> ADD False False src (RIReg g0) dst
- RcDouble -> FMOV FF64 src dst
- RcFloat -> FMOV FF32 src dst
-
- | otherwise
- = panic "SPARC.Instr.mkRegRegMoveInstr: classes of src and dest not the same"
-
-
--- | Check whether an instruction represents a reg-reg move.
--- The register allocator attempts to eliminate reg->reg moves whenever it can,
--- by assigning the src and dest temporaries to the same real register.
---
-sparc_takeRegRegMoveInstr :: Instr -> Maybe (Reg,Reg)
-sparc_takeRegRegMoveInstr instr
- = case instr of
- ADD False False src (RIReg src2) dst
- | g0 == src2 -> Just (src, dst)
-
- FMOV FF64 src dst -> Just (src, dst)
- FMOV FF32 src dst -> Just (src, dst)
- _ -> Nothing
-
-
--- | Make an unconditional branch instruction.
-sparc_mkJumpInstr
- :: BlockId
- -> [Instr]
-
-sparc_mkJumpInstr id
- = [BI ALWAYS False id
- , NOP] -- fill the branch delay slot.
diff --git a/compiler/nativeGen/SPARC/Ppr.hs b/compiler/nativeGen/SPARC/Ppr.hs
deleted file mode 100644
index 566f438403..0000000000
--- a/compiler/nativeGen/SPARC/Ppr.hs
+++ /dev/null
@@ -1,645 +0,0 @@
-{-# LANGUAGE CPP #-}
-
------------------------------------------------------------------------------
---
--- Pretty-printing assembly language
---
--- (c) The University of Glasgow 1993-2005
---
------------------------------------------------------------------------------
-
-{-# OPTIONS_GHC -fno-warn-orphans #-}
-
-module SPARC.Ppr (
- pprNatCmmDecl,
- pprBasicBlock,
- pprData,
- pprInstr,
- pprFormat,
- pprImm,
- pprDataItem
-)
-
-where
-
-#include "HsVersions.h"
-
-import GhcPrelude
-
-import SPARC.Regs
-import SPARC.Instr
-import SPARC.Cond
-import SPARC.Imm
-import SPARC.AddrMode
-import SPARC.Base
-import Instruction
-import Reg
-import Format
-import PprBase
-
-import GHC.Cmm hiding (topInfoTable)
-import GHC.Cmm.Ppr() -- For Outputable instances
-import GHC.Cmm.BlockId
-import GHC.Cmm.CLabel
-import GHC.Cmm.Dataflow.Label
-import GHC.Cmm.Dataflow.Collections
-
-import Unique ( pprUniqueAlways )
-import Outputable
-import GHC.Platform
-import FastString
-
--- -----------------------------------------------------------------------------
--- Printing this stuff out
-
-pprNatCmmDecl :: NatCmmDecl RawCmmStatics Instr -> SDoc
-pprNatCmmDecl (CmmData section dats) =
- pprSectionAlign section $$ pprDatas dats
-
-pprNatCmmDecl proc@(CmmProc top_info lbl _ (ListGraph blocks)) =
- case topInfoTable proc of
- Nothing ->
- -- special case for code without info table:
- pprSectionAlign (Section Text lbl) $$
- pprLabel lbl $$ -- blocks guaranteed not null, so label needed
- vcat (map (pprBasicBlock top_info) blocks)
-
- Just (RawCmmStatics info_lbl _) ->
- sdocWithPlatform $ \platform ->
- (if platformHasSubsectionsViaSymbols platform
- then pprSectionAlign dspSection $$
- ppr (mkDeadStripPreventer info_lbl) <> char ':'
- else empty) $$
- vcat (map (pprBasicBlock top_info) blocks) $$
- -- above: Even the first block gets a label, because with branch-chain
- -- elimination, it might be the target of a goto.
- (if platformHasSubsectionsViaSymbols platform
- then
- -- See Note [Subsections Via Symbols] in X86/Ppr.hs
- text "\t.long "
- <+> ppr info_lbl
- <+> char '-'
- <+> ppr (mkDeadStripPreventer info_lbl)
- else empty)
-
-dspSection :: Section
-dspSection = Section Text $
- panic "subsections-via-symbols doesn't combine with split-sections"
-
-pprBasicBlock :: LabelMap RawCmmStatics -> NatBasicBlock Instr -> SDoc
-pprBasicBlock info_env (BasicBlock blockid instrs)
- = maybe_infotable $$
- pprLabel (blockLbl blockid) $$
- vcat (map pprInstr instrs)
- where
- maybe_infotable = case mapLookup blockid info_env of
- Nothing -> empty
- Just (RawCmmStatics info_lbl info) ->
- pprAlignForSection Text $$
- vcat (map pprData info) $$
- pprLabel info_lbl
-
-
-pprDatas :: RawCmmStatics -> SDoc
--- See note [emit-time elimination of static indirections] in CLabel.
-pprDatas (RawCmmStatics alias [CmmStaticLit (CmmLabel lbl), CmmStaticLit ind, _, _])
- | lbl == mkIndStaticInfoLabel
- , let labelInd (CmmLabelOff l _) = Just l
- labelInd (CmmLabel l) = Just l
- labelInd _ = Nothing
- , Just ind' <- labelInd ind
- , alias `mayRedirectTo` ind'
- = pprGloblDecl alias
- $$ text ".equiv" <+> ppr alias <> comma <> ppr (CmmLabel ind')
-pprDatas (RawCmmStatics lbl dats) = vcat (pprLabel lbl : map pprData dats)
-
-pprData :: CmmStatic -> SDoc
-pprData (CmmString str) = pprBytes str
-pprData (CmmUninitialised bytes) = text ".skip " <> int bytes
-pprData (CmmStaticLit lit) = pprDataItem lit
-
-pprGloblDecl :: CLabel -> SDoc
-pprGloblDecl lbl
- | not (externallyVisibleCLabel lbl) = empty
- | otherwise = text ".global " <> ppr lbl
-
-pprTypeAndSizeDecl :: CLabel -> SDoc
-pprTypeAndSizeDecl lbl
- = sdocWithPlatform $ \platform ->
- if platformOS platform == OSLinux && externallyVisibleCLabel lbl
- then text ".type " <> ppr lbl <> ptext (sLit ", @object")
- else empty
-
-pprLabel :: CLabel -> SDoc
-pprLabel lbl = pprGloblDecl lbl
- $$ pprTypeAndSizeDecl lbl
- $$ (ppr lbl <> char ':')
-
--- -----------------------------------------------------------------------------
--- pprInstr: print an 'Instr'
-
-instance Outputable Instr where
- ppr instr = pprInstr instr
-
-
--- | Pretty print a register.
-pprReg :: Reg -> SDoc
-pprReg reg
- = case reg of
- RegVirtual vr
- -> case vr of
- VirtualRegI u -> text "%vI_" <> pprUniqueAlways u
- VirtualRegHi u -> text "%vHi_" <> pprUniqueAlways u
- VirtualRegF u -> text "%vF_" <> pprUniqueAlways u
- VirtualRegD u -> text "%vD_" <> pprUniqueAlways u
-
-
- RegReal rr
- -> case rr of
- RealRegSingle r1
- -> pprReg_ofRegNo r1
-
- RealRegPair r1 r2
- -> text "(" <> pprReg_ofRegNo r1
- <> vbar <> pprReg_ofRegNo r2
- <> text ")"
-
-
-
--- | Pretty print a register name, based on this register number.
--- The definition has been unfolded so we get a jump-table in the
--- object code. This function is called quite a lot when emitting
--- the asm file..
---
-pprReg_ofRegNo :: Int -> SDoc
-pprReg_ofRegNo i
- = ptext
- (case i of {
- 0 -> sLit "%g0"; 1 -> sLit "%g1";
- 2 -> sLit "%g2"; 3 -> sLit "%g3";
- 4 -> sLit "%g4"; 5 -> sLit "%g5";
- 6 -> sLit "%g6"; 7 -> sLit "%g7";
- 8 -> sLit "%o0"; 9 -> sLit "%o1";
- 10 -> sLit "%o2"; 11 -> sLit "%o3";
- 12 -> sLit "%o4"; 13 -> sLit "%o5";
- 14 -> sLit "%o6"; 15 -> sLit "%o7";
- 16 -> sLit "%l0"; 17 -> sLit "%l1";
- 18 -> sLit "%l2"; 19 -> sLit "%l3";
- 20 -> sLit "%l4"; 21 -> sLit "%l5";
- 22 -> sLit "%l6"; 23 -> sLit "%l7";
- 24 -> sLit "%i0"; 25 -> sLit "%i1";
- 26 -> sLit "%i2"; 27 -> sLit "%i3";
- 28 -> sLit "%i4"; 29 -> sLit "%i5";
- 30 -> sLit "%i6"; 31 -> sLit "%i7";
- 32 -> sLit "%f0"; 33 -> sLit "%f1";
- 34 -> sLit "%f2"; 35 -> sLit "%f3";
- 36 -> sLit "%f4"; 37 -> sLit "%f5";
- 38 -> sLit "%f6"; 39 -> sLit "%f7";
- 40 -> sLit "%f8"; 41 -> sLit "%f9";
- 42 -> sLit "%f10"; 43 -> sLit "%f11";
- 44 -> sLit "%f12"; 45 -> sLit "%f13";
- 46 -> sLit "%f14"; 47 -> sLit "%f15";
- 48 -> sLit "%f16"; 49 -> sLit "%f17";
- 50 -> sLit "%f18"; 51 -> sLit "%f19";
- 52 -> sLit "%f20"; 53 -> sLit "%f21";
- 54 -> sLit "%f22"; 55 -> sLit "%f23";
- 56 -> sLit "%f24"; 57 -> sLit "%f25";
- 58 -> sLit "%f26"; 59 -> sLit "%f27";
- 60 -> sLit "%f28"; 61 -> sLit "%f29";
- 62 -> sLit "%f30"; 63 -> sLit "%f31";
- _ -> sLit "very naughty sparc register" })
-
-
--- | Pretty print a format for an instruction suffix.
-pprFormat :: Format -> SDoc
-pprFormat x
- = ptext
- (case x of
- II8 -> sLit "ub"
- II16 -> sLit "uh"
- II32 -> sLit ""
- II64 -> sLit "d"
- FF32 -> sLit ""
- FF64 -> sLit "d")
-
-
--- | Pretty print a format for an instruction suffix.
--- eg LD is 32bit on sparc, but LDD is 64 bit.
-pprStFormat :: Format -> SDoc
-pprStFormat x
- = ptext
- (case x of
- II8 -> sLit "b"
- II16 -> sLit "h"
- II32 -> sLit ""
- II64 -> sLit "x"
- FF32 -> sLit ""
- FF64 -> sLit "d")
-
-
-
--- | Pretty print a condition code.
-pprCond :: Cond -> SDoc
-pprCond c
- = ptext
- (case c of
- ALWAYS -> sLit ""
- NEVER -> sLit "n"
- GEU -> sLit "geu"
- LU -> sLit "lu"
- EQQ -> sLit "e"
- GTT -> sLit "g"
- GE -> sLit "ge"
- GU -> sLit "gu"
- LTT -> sLit "l"
- LE -> sLit "le"
- LEU -> sLit "leu"
- NE -> sLit "ne"
- NEG -> sLit "neg"
- POS -> sLit "pos"
- VC -> sLit "vc"
- VS -> sLit "vs")
-
-
--- | Pretty print an address mode.
-pprAddr :: AddrMode -> SDoc
-pprAddr am
- = case am of
- AddrRegReg r1 (RegReal (RealRegSingle 0))
- -> pprReg r1
-
- AddrRegReg r1 r2
- -> hcat [ pprReg r1, char '+', pprReg r2 ]
-
- AddrRegImm r1 (ImmInt i)
- | i == 0 -> pprReg r1
- | not (fits13Bits i) -> largeOffsetError i
- | otherwise -> hcat [ pprReg r1, pp_sign, int i ]
- where
- pp_sign = if i > 0 then char '+' else empty
-
- AddrRegImm r1 (ImmInteger i)
- | i == 0 -> pprReg r1
- | not (fits13Bits i) -> largeOffsetError i
- | otherwise -> hcat [ pprReg r1, pp_sign, integer i ]
- where
- pp_sign = if i > 0 then char '+' else empty
-
- AddrRegImm r1 imm
- -> hcat [ pprReg r1, char '+', pprImm imm ]
-
-
--- | Pretty print an immediate value.
-pprImm :: Imm -> SDoc
-pprImm imm
- = case imm of
- ImmInt i -> int i
- ImmInteger i -> integer i
- ImmCLbl l -> ppr l
- ImmIndex l i -> ppr l <> char '+' <> int i
- ImmLit s -> s
-
- ImmConstantSum a b
- -> pprImm a <> char '+' <> pprImm b
-
- ImmConstantDiff a b
- -> pprImm a <> char '-' <> lparen <> pprImm b <> rparen
-
- LO i
- -> hcat [ text "%lo(", pprImm i, rparen ]
-
- HI i
- -> hcat [ text "%hi(", pprImm i, rparen ]
-
- -- these should have been converted to bytes and placed
- -- in the data section.
- ImmFloat _ -> text "naughty float immediate"
- ImmDouble _ -> text "naughty double immediate"
-
-
--- | Pretty print a section \/ segment header.
--- On SPARC all the data sections must be at least 8 byte aligned
--- incase we store doubles in them.
---
-pprSectionAlign :: Section -> SDoc
-pprSectionAlign sec@(Section seg _) =
- sdocWithPlatform $ \platform ->
- pprSectionHeader platform sec $$
- pprAlignForSection seg
-
--- | Print appropriate alignment for the given section type.
-pprAlignForSection :: SectionType -> SDoc
-pprAlignForSection seg =
- ptext (case seg of
- Text -> sLit ".align 4"
- Data -> sLit ".align 8"
- ReadOnlyData -> sLit ".align 8"
- RelocatableReadOnlyData
- -> sLit ".align 8"
- UninitialisedData -> sLit ".align 8"
- ReadOnlyData16 -> sLit ".align 16"
- -- TODO: This is copied from the ReadOnlyData case, but it can likely be
- -- made more efficient.
- CString -> sLit ".align 8"
- OtherSection _ -> panic "PprMach.pprSectionHeader: unknown section")
-
--- | Pretty print a data item.
-pprDataItem :: CmmLit -> SDoc
-pprDataItem lit
- = sdocWithDynFlags $ \dflags ->
- vcat (ppr_item (cmmTypeFormat $ cmmLitType dflags lit) lit)
- where
- imm = litToImm lit
-
- ppr_item II8 _ = [text "\t.byte\t" <> pprImm imm]
- ppr_item II32 _ = [text "\t.long\t" <> pprImm imm]
-
- ppr_item FF32 (CmmFloat r _)
- = let bs = floatToBytes (fromRational r)
- in map (\b -> text "\t.byte\t" <> pprImm (ImmInt b)) bs
-
- ppr_item FF64 (CmmFloat r _)
- = let bs = doubleToBytes (fromRational r)
- in map (\b -> text "\t.byte\t" <> pprImm (ImmInt b)) bs
-
- ppr_item II16 _ = [text "\t.short\t" <> pprImm imm]
- ppr_item II64 _ = [text "\t.quad\t" <> pprImm imm]
- ppr_item _ _ = panic "SPARC.Ppr.pprDataItem: no match"
-
-
--- | Pretty print an instruction.
-pprInstr :: Instr -> SDoc
-
--- nuke comments.
-pprInstr (COMMENT _)
- = empty
-
-pprInstr (DELTA d)
- = pprInstr (COMMENT (mkFastString ("\tdelta = " ++ show d)))
-
--- Newblocks and LData should have been slurped out before producing the .s file.
-pprInstr (NEWBLOCK _)
- = panic "X86.Ppr.pprInstr: NEWBLOCK"
-
-pprInstr (LDATA _ _)
- = panic "PprMach.pprInstr: LDATA"
-
--- 64 bit FP loads are expanded into individual instructions in CodeGen.Expand
-pprInstr (LD FF64 _ reg)
- | RegReal (RealRegSingle{}) <- reg
- = panic "SPARC.Ppr: not emitting potentially misaligned LD FF64 instr"
-
-pprInstr (LD format addr reg)
- = hcat [
- text "\tld",
- pprFormat format,
- char '\t',
- lbrack,
- pprAddr addr,
- pp_rbracket_comma,
- pprReg reg
- ]
-
--- 64 bit FP stores are expanded into individual instructions in CodeGen.Expand
-pprInstr (ST FF64 reg _)
- | RegReal (RealRegSingle{}) <- reg
- = panic "SPARC.Ppr: not emitting potentially misaligned ST FF64 instr"
-
--- no distinction is made between signed and unsigned bytes on stores for the
--- Sparc opcodes (at least I cannot see any, and gas is nagging me --SOF),
--- so we call a special-purpose pprFormat for ST..
-pprInstr (ST format reg addr)
- = hcat [
- text "\tst",
- pprStFormat format,
- char '\t',
- pprReg reg,
- pp_comma_lbracket,
- pprAddr addr,
- rbrack
- ]
-
-
-pprInstr (ADD x cc reg1 ri reg2)
- | not x && not cc && riZero ri
- = hcat [ text "\tmov\t", pprReg reg1, comma, pprReg reg2 ]
-
- | otherwise
- = pprRegRIReg (if x then sLit "addx" else sLit "add") cc reg1 ri reg2
-
-
-pprInstr (SUB x cc reg1 ri reg2)
- | not x && cc && reg2 == g0
- = hcat [ text "\tcmp\t", pprReg reg1, comma, pprRI ri ]
-
- | not x && not cc && riZero ri
- = hcat [ text "\tmov\t", pprReg reg1, comma, pprReg reg2 ]
-
- | otherwise
- = pprRegRIReg (if x then sLit "subx" else sLit "sub") cc reg1 ri reg2
-
-pprInstr (AND b reg1 ri reg2) = pprRegRIReg (sLit "and") b reg1 ri reg2
-
-pprInstr (ANDN b reg1 ri reg2) = pprRegRIReg (sLit "andn") b reg1 ri reg2
-
-pprInstr (OR b reg1 ri reg2)
- | not b && reg1 == g0
- = let doit = hcat [ text "\tmov\t", pprRI ri, comma, pprReg reg2 ]
- in case ri of
- RIReg rrr | rrr == reg2 -> empty
- _ -> doit
-
- | otherwise
- = pprRegRIReg (sLit "or") b reg1 ri reg2
-
-pprInstr (ORN b reg1 ri reg2) = pprRegRIReg (sLit "orn") b reg1 ri reg2
-
-pprInstr (XOR b reg1 ri reg2) = pprRegRIReg (sLit "xor") b reg1 ri reg2
-pprInstr (XNOR b reg1 ri reg2) = pprRegRIReg (sLit "xnor") b reg1 ri reg2
-
-pprInstr (SLL reg1 ri reg2) = pprRegRIReg (sLit "sll") False reg1 ri reg2
-pprInstr (SRL reg1 ri reg2) = pprRegRIReg (sLit "srl") False reg1 ri reg2
-pprInstr (SRA reg1 ri reg2) = pprRegRIReg (sLit "sra") False reg1 ri reg2
-
-pprInstr (RDY rd) = text "\trd\t%y," <> pprReg rd
-pprInstr (WRY reg1 reg2)
- = text "\twr\t"
- <> pprReg reg1
- <> char ','
- <> pprReg reg2
- <> char ','
- <> text "%y"
-
-pprInstr (SMUL b reg1 ri reg2) = pprRegRIReg (sLit "smul") b reg1 ri reg2
-pprInstr (UMUL b reg1 ri reg2) = pprRegRIReg (sLit "umul") b reg1 ri reg2
-pprInstr (SDIV b reg1 ri reg2) = pprRegRIReg (sLit "sdiv") b reg1 ri reg2
-pprInstr (UDIV b reg1 ri reg2) = pprRegRIReg (sLit "udiv") b reg1 ri reg2
-
-pprInstr (SETHI imm reg)
- = hcat [
- text "\tsethi\t",
- pprImm imm,
- comma,
- pprReg reg
- ]
-
-pprInstr NOP
- = text "\tnop"
-
-pprInstr (FABS format reg1 reg2)
- = pprFormatRegReg (sLit "fabs") format reg1 reg2
-
-pprInstr (FADD format reg1 reg2 reg3)
- = pprFormatRegRegReg (sLit "fadd") format reg1 reg2 reg3
-
-pprInstr (FCMP e format reg1 reg2)
- = pprFormatRegReg (if e then sLit "fcmpe" else sLit "fcmp")
- format reg1 reg2
-
-pprInstr (FDIV format reg1 reg2 reg3)
- = pprFormatRegRegReg (sLit "fdiv") format reg1 reg2 reg3
-
-pprInstr (FMOV format reg1 reg2)
- = pprFormatRegReg (sLit "fmov") format reg1 reg2
-
-pprInstr (FMUL format reg1 reg2 reg3)
- = pprFormatRegRegReg (sLit "fmul") format reg1 reg2 reg3
-
-pprInstr (FNEG format reg1 reg2)
- = pprFormatRegReg (sLit "fneg") format reg1 reg2
-
-pprInstr (FSQRT format reg1 reg2)
- = pprFormatRegReg (sLit "fsqrt") format reg1 reg2
-
-pprInstr (FSUB format reg1 reg2 reg3)
- = pprFormatRegRegReg (sLit "fsub") format reg1 reg2 reg3
-
-pprInstr (FxTOy format1 format2 reg1 reg2)
- = hcat [
- text "\tf",
- ptext
- (case format1 of
- II32 -> sLit "ito"
- FF32 -> sLit "sto"
- FF64 -> sLit "dto"
- _ -> panic "SPARC.Ppr.pprInstr.FxToY: no match"),
- ptext
- (case format2 of
- II32 -> sLit "i\t"
- II64 -> sLit "x\t"
- FF32 -> sLit "s\t"
- FF64 -> sLit "d\t"
- _ -> panic "SPARC.Ppr.pprInstr.FxToY: no match"),
- pprReg reg1, comma, pprReg reg2
- ]
-
-
-pprInstr (BI cond b blockid)
- = hcat [
- text "\tb", pprCond cond,
- if b then pp_comma_a else empty,
- char '\t',
- ppr (blockLbl blockid)
- ]
-
-pprInstr (BF cond b blockid)
- = hcat [
- text "\tfb", pprCond cond,
- if b then pp_comma_a else empty,
- char '\t',
- ppr (blockLbl blockid)
- ]
-
-pprInstr (JMP addr) = text "\tjmp\t" <> pprAddr addr
-pprInstr (JMP_TBL op _ _) = pprInstr (JMP op)
-
-pprInstr (CALL (Left imm) n _)
- = hcat [ text "\tcall\t", pprImm imm, comma, int n ]
-
-pprInstr (CALL (Right reg) n _)
- = hcat [ text "\tcall\t", pprReg reg, comma, int n ]
-
-
--- | Pretty print a RI
-pprRI :: RI -> SDoc
-pprRI (RIReg r) = pprReg r
-pprRI (RIImm r) = pprImm r
-
-
--- | Pretty print a two reg instruction.
-pprFormatRegReg :: PtrString -> Format -> Reg -> Reg -> SDoc
-pprFormatRegReg name format reg1 reg2
- = hcat [
- char '\t',
- ptext name,
- (case format of
- FF32 -> text "s\t"
- FF64 -> text "d\t"
- _ -> panic "SPARC.Ppr.pprFormatRegReg: no match"),
-
- pprReg reg1,
- comma,
- pprReg reg2
- ]
-
-
--- | Pretty print a three reg instruction.
-pprFormatRegRegReg :: PtrString -> Format -> Reg -> Reg -> Reg -> SDoc
-pprFormatRegRegReg name format reg1 reg2 reg3
- = hcat [
- char '\t',
- ptext name,
- (case format of
- FF32 -> text "s\t"
- FF64 -> text "d\t"
- _ -> panic "SPARC.Ppr.pprFormatRegReg: no match"),
- pprReg reg1,
- comma,
- pprReg reg2,
- comma,
- pprReg reg3
- ]
-
-
--- | Pretty print an instruction of two regs and a ri.
-pprRegRIReg :: PtrString -> Bool -> Reg -> RI -> Reg -> SDoc
-pprRegRIReg name b reg1 ri reg2
- = hcat [
- char '\t',
- ptext name,
- if b then text "cc\t" else char '\t',
- pprReg reg1,
- comma,
- pprRI ri,
- comma,
- pprReg reg2
- ]
-
-{-
-pprRIReg :: PtrString -> Bool -> RI -> Reg -> SDoc
-pprRIReg name b ri reg1
- = hcat [
- char '\t',
- ptext name,
- if b then text "cc\t" else char '\t',
- pprRI ri,
- comma,
- pprReg reg1
- ]
--}
-
-{-
-pp_ld_lbracket :: SDoc
-pp_ld_lbracket = text "\tld\t["
--}
-
-pp_rbracket_comma :: SDoc
-pp_rbracket_comma = text "],"
-
-
-pp_comma_lbracket :: SDoc
-pp_comma_lbracket = text ",["
-
-
-pp_comma_a :: SDoc
-pp_comma_a = text ",a"
diff --git a/compiler/nativeGen/SPARC/Regs.hs b/compiler/nativeGen/SPARC/Regs.hs
deleted file mode 100644
index 8f470ad79d..0000000000
--- a/compiler/nativeGen/SPARC/Regs.hs
+++ /dev/null
@@ -1,259 +0,0 @@
--- -----------------------------------------------------------------------------
---
--- (c) The University of Glasgow 1994-2004
---
--- -----------------------------------------------------------------------------
-
-module SPARC.Regs (
- -- registers
- showReg,
- virtualRegSqueeze,
- realRegSqueeze,
- classOfRealReg,
- allRealRegs,
-
- -- machine specific info
- gReg, iReg, lReg, oReg, fReg,
- fp, sp, g0, g1, g2, o0, o1, f0, f1, f6, f8, f22, f26, f27,
-
- -- allocatable
- allocatableRegs,
-
- -- args
- argRegs,
- allArgRegs,
- callClobberedRegs,
-
- --
- mkVirtualReg,
- regDotColor
-)
-
-where
-
-
-import GhcPrelude
-
-import GHC.Platform.SPARC
-import Reg
-import RegClass
-import Format
-
-import Unique
-import Outputable
-
-{-
- The SPARC has 64 registers of interest; 32 integer registers and 32
- floating point registers. The mapping of STG registers to SPARC
- machine registers is defined in StgRegs.h. We are, of course,
- prepared for any eventuality.
-
- The whole fp-register pairing thing on sparcs is a huge nuisance. See
- includes/stg/MachRegs.h for a description of what's going on
- here.
--}
-
-
--- | Get the standard name for the register with this number.
-showReg :: RegNo -> String
-showReg n
- | n >= 0 && n < 8 = "%g" ++ show n
- | n >= 8 && n < 16 = "%o" ++ show (n-8)
- | n >= 16 && n < 24 = "%l" ++ show (n-16)
- | n >= 24 && n < 32 = "%i" ++ show (n-24)
- | n >= 32 && n < 64 = "%f" ++ show (n-32)
- | otherwise = panic "SPARC.Regs.showReg: unknown sparc register"
-
-
--- Get the register class of a certain real reg
-classOfRealReg :: RealReg -> RegClass
-classOfRealReg reg
- = case reg of
- RealRegSingle i
- | i < 32 -> RcInteger
- | otherwise -> RcFloat
-
- RealRegPair{} -> RcDouble
-
-
--- | regSqueeze_class reg
--- Calculate the maximum number of register colors that could be
--- denied to a node of this class due to having this reg
--- as a neighbour.
---
-{-# INLINE virtualRegSqueeze #-}
-virtualRegSqueeze :: RegClass -> VirtualReg -> Int
-
-virtualRegSqueeze cls vr
- = case cls of
- RcInteger
- -> case vr of
- VirtualRegI{} -> 1
- VirtualRegHi{} -> 1
- _other -> 0
-
- RcFloat
- -> case vr of
- VirtualRegF{} -> 1
- VirtualRegD{} -> 2
- _other -> 0
-
- RcDouble
- -> case vr of
- VirtualRegF{} -> 1
- VirtualRegD{} -> 1
- _other -> 0
-
-
-{-# INLINE realRegSqueeze #-}
-realRegSqueeze :: RegClass -> RealReg -> Int
-
-realRegSqueeze cls rr
- = case cls of
- RcInteger
- -> case rr of
- RealRegSingle regNo
- | regNo < 32 -> 1
- | otherwise -> 0
-
- RealRegPair{} -> 0
-
- RcFloat
- -> case rr of
- RealRegSingle regNo
- | regNo < 32 -> 0
- | otherwise -> 1
-
- RealRegPair{} -> 2
-
- RcDouble
- -> case rr of
- RealRegSingle regNo
- | regNo < 32 -> 0
- | otherwise -> 1
-
- RealRegPair{} -> 1
-
-
--- | All the allocatable registers in the machine,
--- including register pairs.
-allRealRegs :: [RealReg]
-allRealRegs
- = [ (RealRegSingle i) | i <- [0..63] ]
- ++ [ (RealRegPair i (i+1)) | i <- [32, 34 .. 62 ] ]
-
-
--- | Get the regno for this sort of reg
-gReg, lReg, iReg, oReg, fReg :: Int -> RegNo
-
-gReg x = x -- global regs
-oReg x = (8 + x) -- output regs
-lReg x = (16 + x) -- local regs
-iReg x = (24 + x) -- input regs
-fReg x = (32 + x) -- float regs
-
-
--- | Some specific regs used by the code generator.
-g0, g1, g2, fp, sp, o0, o1, f0, f1, f6, f8, f22, f26, f27 :: Reg
-
-f6 = RegReal (RealRegSingle (fReg 6))
-f8 = RegReal (RealRegSingle (fReg 8))
-f22 = RegReal (RealRegSingle (fReg 22))
-f26 = RegReal (RealRegSingle (fReg 26))
-f27 = RegReal (RealRegSingle (fReg 27))
-
--- g0 is always zero, and writes to it vanish.
-g0 = RegReal (RealRegSingle (gReg 0))
-g1 = RegReal (RealRegSingle (gReg 1))
-g2 = RegReal (RealRegSingle (gReg 2))
-
--- FP, SP, int and float return (from C) regs.
-fp = RegReal (RealRegSingle (iReg 6))
-sp = RegReal (RealRegSingle (oReg 6))
-o0 = RegReal (RealRegSingle (oReg 0))
-o1 = RegReal (RealRegSingle (oReg 1))
-f0 = RegReal (RealRegSingle (fReg 0))
-f1 = RegReal (RealRegSingle (fReg 1))
-
--- | Produce the second-half-of-a-double register given the first half.
-{-
-fPair :: Reg -> Maybe Reg
-fPair (RealReg n)
- | n >= 32 && n `mod` 2 == 0 = Just (RealReg (n+1))
-
-fPair (VirtualRegD u)
- = Just (VirtualRegHi u)
-
-fPair reg
- = trace ("MachInstrs.fPair: can't get high half of supposed double reg " ++ showPpr reg)
- Nothing
--}
-
-
--- | All the regs that the register allocator can allocate to,
--- with the fixed use regs removed.
---
-allocatableRegs :: [RealReg]
-allocatableRegs
- = let isFree rr
- = case rr of
- RealRegSingle r -> freeReg r
- RealRegPair r1 r2 -> freeReg r1 && freeReg r2
- in filter isFree allRealRegs
-
-
--- | The registers to place arguments for function calls,
--- for some number of arguments.
---
-argRegs :: RegNo -> [Reg]
-argRegs r
- = case r of
- 0 -> []
- 1 -> map (RegReal . RealRegSingle . oReg) [0]
- 2 -> map (RegReal . RealRegSingle . oReg) [0,1]
- 3 -> map (RegReal . RealRegSingle . oReg) [0,1,2]
- 4 -> map (RegReal . RealRegSingle . oReg) [0,1,2,3]
- 5 -> map (RegReal . RealRegSingle . oReg) [0,1,2,3,4]
- 6 -> map (RegReal . RealRegSingle . oReg) [0,1,2,3,4,5]
- _ -> panic "MachRegs.argRegs(sparc): don't know about >6 arguments!"
-
-
--- | All all the regs that could possibly be returned by argRegs
---
-allArgRegs :: [Reg]
-allArgRegs
- = map (RegReal . RealRegSingle) [oReg i | i <- [0..5]]
-
-
--- These are the regs that we cannot assume stay alive over a C call.
--- TODO: Why can we assume that o6 isn't clobbered? -- BL 2009/02
---
-callClobberedRegs :: [Reg]
-callClobberedRegs
- = map (RegReal . RealRegSingle)
- ( oReg 7 :
- [oReg i | i <- [0..5]] ++
- [gReg i | i <- [1..7]] ++
- [fReg i | i <- [0..31]] )
-
-
-
--- | Make a virtual reg with this format.
-mkVirtualReg :: Unique -> Format -> VirtualReg
-mkVirtualReg u format
- | not (isFloatFormat format)
- = VirtualRegI u
-
- | otherwise
- = case format of
- FF32 -> VirtualRegF u
- FF64 -> VirtualRegD u
- _ -> panic "mkVReg"
-
-
-regDotColor :: RealReg -> SDoc
-regDotColor reg
- = case classOfRealReg reg of
- RcInteger -> text "blue"
- RcFloat -> text "red"
- _other -> text "green"
diff --git a/compiler/nativeGen/SPARC/ShortcutJump.hs b/compiler/nativeGen/SPARC/ShortcutJump.hs
deleted file mode 100644
index 35604b0b7e..0000000000
--- a/compiler/nativeGen/SPARC/ShortcutJump.hs
+++ /dev/null
@@ -1,74 +0,0 @@
-module SPARC.ShortcutJump (
- JumpDest(..), getJumpDestBlockId,
- canShortcut,
- shortcutJump,
- shortcutStatics,
- shortBlockId
-)
-
-where
-
-import GhcPrelude
-
-import SPARC.Instr
-import SPARC.Imm
-
-import GHC.Cmm.CLabel
-import GHC.Cmm.BlockId
-import GHC.Cmm
-
-import Panic
-import Outputable
-
-data JumpDest
- = DestBlockId BlockId
- | DestImm Imm
-
--- Debug Instance
-instance Outputable JumpDest where
- ppr (DestBlockId bid) = text "blk:" <> ppr bid
- ppr (DestImm _bid) = text "imm:?"
-
-getJumpDestBlockId :: JumpDest -> Maybe BlockId
-getJumpDestBlockId (DestBlockId bid) = Just bid
-getJumpDestBlockId _ = Nothing
-
-
-canShortcut :: Instr -> Maybe JumpDest
-canShortcut _ = Nothing
-
-
-shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
-shortcutJump _ other = other
-
-
-
-shortcutStatics :: (BlockId -> Maybe JumpDest) -> RawCmmStatics -> RawCmmStatics
-shortcutStatics fn (RawCmmStatics lbl statics)
- = RawCmmStatics lbl $ map (shortcutStatic fn) statics
- -- we need to get the jump tables, so apply the mapping to the entries
- -- of a CmmData too.
-
-shortcutLabel :: (BlockId -> Maybe JumpDest) -> CLabel -> CLabel
-shortcutLabel fn lab
- | Just blkId <- maybeLocalBlockLabel lab = shortBlockId fn blkId
- | otherwise = lab
-
-shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
-shortcutStatic fn (CmmStaticLit (CmmLabel lab))
- = CmmStaticLit (CmmLabel (shortcutLabel fn lab))
-shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off w))
- = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off w)
--- slightly dodgy, we're ignoring the second label, but this
--- works with the way we use CmmLabelDiffOff for jump tables now.
-shortcutStatic _ other_static
- = other_static
-
-
-shortBlockId :: (BlockId -> Maybe JumpDest) -> BlockId -> CLabel
-shortBlockId fn blockid =
- case fn blockid of
- Nothing -> blockLbl blockid
- Just (DestBlockId blockid') -> shortBlockId fn blockid'
- Just (DestImm (ImmCLbl lbl)) -> lbl
- _other -> panic "shortBlockId"
diff --git a/compiler/nativeGen/SPARC/Stack.hs b/compiler/nativeGen/SPARC/Stack.hs
deleted file mode 100644
index 2553c6135d..0000000000
--- a/compiler/nativeGen/SPARC/Stack.hs
+++ /dev/null
@@ -1,59 +0,0 @@
-module SPARC.Stack (
- spRel,
- fpRel,
- spillSlotToOffset,
- maxSpillSlots
-)
-
-where
-
-import GhcPrelude
-
-import SPARC.AddrMode
-import SPARC.Regs
-import SPARC.Base
-import SPARC.Imm
-
-import GHC.Driver.Session
-import Outputable
-
--- | Get an AddrMode relative to the address in sp.
--- This gives us a stack relative addressing mode for volatile
--- temporaries and for excess call arguments.
---
-spRel :: Int -- ^ stack offset in words, positive or negative
- -> AddrMode
-
-spRel n = AddrRegImm sp (ImmInt (n * wordLength))
-
-
--- | Get an address relative to the frame pointer.
--- This doesn't work work for offsets greater than 13 bits; we just hope for the best
---
-fpRel :: Int -> AddrMode
-fpRel n
- = AddrRegImm fp (ImmInt (n * wordLength))
-
-
--- | Convert a spill slot number to a *byte* offset, with no sign.
---
-spillSlotToOffset :: DynFlags -> Int -> Int
-spillSlotToOffset dflags slot
- | slot >= 0 && slot < maxSpillSlots dflags
- = 64 + spillSlotSize * slot
-
- | otherwise
- = pprPanic "spillSlotToOffset:"
- ( text "invalid spill location: " <> int slot
- $$ text "maxSpillSlots: " <> int (maxSpillSlots dflags))
-
-
--- | The maximum number of spill slots available on the C stack.
--- If we use up all of the slots, then we're screwed.
---
--- Why do we reserve 64 bytes, instead of using the whole thing??
--- -- BL 2009/02/15
---
-maxSpillSlots :: DynFlags -> Int
-maxSpillSlots dflags
- = ((spillAreaLength dflags - 64) `div` spillSlotSize) - 1
diff --git a/compiler/nativeGen/TargetReg.hs b/compiler/nativeGen/TargetReg.hs
deleted file mode 100644
index e0eca9235d..0000000000
--- a/compiler/nativeGen/TargetReg.hs
+++ /dev/null
@@ -1,137 +0,0 @@
-{-# LANGUAGE CPP #-}
--- | Hard wired things related to registers.
--- This is module is preventing the native code generator being able to
--- emit code for non-host architectures.
---
--- TODO: Do a better job of the overloading, and eliminate this module.
--- We'd probably do better with a Register type class, and hook this to
--- Instruction somehow.
---
--- TODO: We should also make arch specific versions of RegAlloc.Graph.TrivColorable
-module TargetReg (
- targetVirtualRegSqueeze,
- targetRealRegSqueeze,
- targetClassOfRealReg,
- targetMkVirtualReg,
- targetRegDotColor,
- targetClassOfReg
-)
-
-where
-
-#include "HsVersions.h"
-
-import GhcPrelude
-
-import Reg
-import RegClass
-import Format
-
-import Outputable
-import Unique
-import GHC.Platform
-
-import qualified X86.Regs as X86
-import qualified X86.RegInfo as X86
-
-import qualified PPC.Regs as PPC
-
-import qualified SPARC.Regs as SPARC
-
-targetVirtualRegSqueeze :: Platform -> RegClass -> VirtualReg -> Int
-targetVirtualRegSqueeze platform
- = case platformArch platform of
- ArchX86 -> X86.virtualRegSqueeze
- ArchX86_64 -> X86.virtualRegSqueeze
- ArchPPC -> PPC.virtualRegSqueeze
- ArchS390X -> panic "targetVirtualRegSqueeze ArchS390X"
- ArchSPARC -> SPARC.virtualRegSqueeze
- ArchSPARC64 -> panic "targetVirtualRegSqueeze ArchSPARC64"
- ArchPPC_64 _ -> PPC.virtualRegSqueeze
- ArchARM _ _ _ -> panic "targetVirtualRegSqueeze ArchARM"
- ArchARM64 -> panic "targetVirtualRegSqueeze ArchARM64"
- ArchAlpha -> panic "targetVirtualRegSqueeze ArchAlpha"
- ArchMipseb -> panic "targetVirtualRegSqueeze ArchMipseb"
- ArchMipsel -> panic "targetVirtualRegSqueeze ArchMipsel"
- ArchJavaScript-> panic "targetVirtualRegSqueeze ArchJavaScript"
- ArchUnknown -> panic "targetVirtualRegSqueeze ArchUnknown"
-
-
-targetRealRegSqueeze :: Platform -> RegClass -> RealReg -> Int
-targetRealRegSqueeze platform
- = case platformArch platform of
- ArchX86 -> X86.realRegSqueeze
- ArchX86_64 -> X86.realRegSqueeze
- ArchPPC -> PPC.realRegSqueeze
- ArchS390X -> panic "targetRealRegSqueeze ArchS390X"
- ArchSPARC -> SPARC.realRegSqueeze
- ArchSPARC64 -> panic "targetRealRegSqueeze ArchSPARC64"
- ArchPPC_64 _ -> PPC.realRegSqueeze
- ArchARM _ _ _ -> panic "targetRealRegSqueeze ArchARM"
- ArchARM64 -> panic "targetRealRegSqueeze ArchARM64"
- ArchAlpha -> panic "targetRealRegSqueeze ArchAlpha"
- ArchMipseb -> panic "targetRealRegSqueeze ArchMipseb"
- ArchMipsel -> panic "targetRealRegSqueeze ArchMipsel"
- ArchJavaScript-> panic "targetRealRegSqueeze ArchJavaScript"
- ArchUnknown -> panic "targetRealRegSqueeze ArchUnknown"
-
-targetClassOfRealReg :: Platform -> RealReg -> RegClass
-targetClassOfRealReg platform
- = case platformArch platform of
- ArchX86 -> X86.classOfRealReg platform
- ArchX86_64 -> X86.classOfRealReg platform
- ArchPPC -> PPC.classOfRealReg
- ArchS390X -> panic "targetClassOfRealReg ArchS390X"
- ArchSPARC -> SPARC.classOfRealReg
- ArchSPARC64 -> panic "targetClassOfRealReg ArchSPARC64"
- ArchPPC_64 _ -> PPC.classOfRealReg
- ArchARM _ _ _ -> panic "targetClassOfRealReg ArchARM"
- ArchARM64 -> panic "targetClassOfRealReg ArchARM64"
- ArchAlpha -> panic "targetClassOfRealReg ArchAlpha"
- ArchMipseb -> panic "targetClassOfRealReg ArchMipseb"
- ArchMipsel -> panic "targetClassOfRealReg ArchMipsel"
- ArchJavaScript-> panic "targetClassOfRealReg ArchJavaScript"
- ArchUnknown -> panic "targetClassOfRealReg ArchUnknown"
-
-targetMkVirtualReg :: Platform -> Unique -> Format -> VirtualReg
-targetMkVirtualReg platform
- = case platformArch platform of
- ArchX86 -> X86.mkVirtualReg
- ArchX86_64 -> X86.mkVirtualReg
- ArchPPC -> PPC.mkVirtualReg
- ArchS390X -> panic "targetMkVirtualReg ArchS390X"
- ArchSPARC -> SPARC.mkVirtualReg
- ArchSPARC64 -> panic "targetMkVirtualReg ArchSPARC64"
- ArchPPC_64 _ -> PPC.mkVirtualReg
- ArchARM _ _ _ -> panic "targetMkVirtualReg ArchARM"
- ArchARM64 -> panic "targetMkVirtualReg ArchARM64"
- ArchAlpha -> panic "targetMkVirtualReg ArchAlpha"
- ArchMipseb -> panic "targetMkVirtualReg ArchMipseb"
- ArchMipsel -> panic "targetMkVirtualReg ArchMipsel"
- ArchJavaScript-> panic "targetMkVirtualReg ArchJavaScript"
- ArchUnknown -> panic "targetMkVirtualReg ArchUnknown"
-
-targetRegDotColor :: Platform -> RealReg -> SDoc
-targetRegDotColor platform
- = case platformArch platform of
- ArchX86 -> X86.regDotColor platform
- ArchX86_64 -> X86.regDotColor platform
- ArchPPC -> PPC.regDotColor
- ArchS390X -> panic "targetRegDotColor ArchS390X"
- ArchSPARC -> SPARC.regDotColor
- ArchSPARC64 -> panic "targetRegDotColor ArchSPARC64"
- ArchPPC_64 _ -> PPC.regDotColor
- ArchARM _ _ _ -> panic "targetRegDotColor ArchARM"
- ArchARM64 -> panic "targetRegDotColor ArchARM64"
- ArchAlpha -> panic "targetRegDotColor ArchAlpha"
- ArchMipseb -> panic "targetRegDotColor ArchMipseb"
- ArchMipsel -> panic "targetRegDotColor ArchMipsel"
- ArchJavaScript-> panic "targetRegDotColor ArchJavaScript"
- ArchUnknown -> panic "targetRegDotColor ArchUnknown"
-
-
-targetClassOfReg :: Platform -> Reg -> RegClass
-targetClassOfReg platform reg
- = case reg of
- RegVirtual vr -> classOfVirtualReg vr
- RegReal rr -> targetClassOfRealReg platform rr
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
deleted file mode 100644
index 26797949f4..0000000000
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ /dev/null
@@ -1,3743 +0,0 @@
-{-# LANGUAGE CPP, GADTs, NondecreasingIndentation #-}
-{-# LANGUAGE TupleSections #-}
-{-# LANGUAGE BangPatterns #-}
-
-#if __GLASGOW_HASKELL__ <= 808
--- GHC 8.10 deprecates this flag, but GHC 8.8 needs it
--- The default iteration limit is a bit too low for the definitions
--- in this module.
-{-# OPTIONS_GHC -fmax-pmcheck-iterations=10000000 #-}
-#endif
-
-{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
-
------------------------------------------------------------------------------
---
--- Generating machine code (instruction selection)
---
--- (c) The University of Glasgow 1996-2004
---
------------------------------------------------------------------------------
-
--- This is a big module, but, if you pay attention to
--- (a) the sectioning, and (b) the type signatures, the
--- structure should not be too overwhelming.
-
-module X86.CodeGen (
- cmmTopCodeGen,
- generateJumpTableForInstr,
- extractUnwindPoints,
- invertCondBranches,
- InstrBlock
-)
-
-where
-
-#include "HsVersions.h"
-
--- NCG stuff:
-import GhcPrelude
-
-import X86.Instr
-import X86.Cond
-import X86.Regs
-import X86.Ppr ( )
-import X86.RegInfo
-
-import GHC.Platform.Regs
-import CPrim
-import GHC.Cmm.DebugBlock ( DebugBlock(..), UnwindPoint(..), UnwindTable
- , UnwindExpr(UwReg), toUnwindExpr )
-import Instruction
-import PIC
-import NCGMonad ( NatM, getNewRegNat, getNewLabelNat, setDeltaNat
- , getDeltaNat, getBlockIdNat, getPicBaseNat, getNewRegPairNat
- , getPicBaseMaybeNat, getDebugBlock, getFileId
- , addImmediateSuccessorNat, updateCfgNat)
-import CFG
-import Format
-import Reg
-import GHC.Platform
-
--- Our intermediate code:
-import BasicTypes
-import GHC.Cmm.BlockId
-import Module ( primUnitId )
-import GHC.Cmm.Utils
-import GHC.Cmm.Switch
-import GHC.Cmm
-import GHC.Cmm.Dataflow.Block
-import GHC.Cmm.Dataflow.Collections
-import GHC.Cmm.Dataflow.Graph
-import GHC.Cmm.Dataflow.Label
-import GHC.Cmm.CLabel
-import CoreSyn ( Tickish(..) )
-import SrcLoc ( srcSpanFile, srcSpanStartLine, srcSpanStartCol )
-
--- The rest:
-import ForeignCall ( CCallConv(..) )
-import OrdList
-import Outputable
-import FastString
-import GHC.Driver.Session
-import Util
-import UniqSupply ( getUniqueM )
-
-import Control.Monad
-import Data.Bits
-import Data.Foldable (fold)
-import Data.Int
-import Data.Maybe
-import Data.Word
-
-import qualified Data.Map as M
-
-is32BitPlatform :: NatM Bool
-is32BitPlatform = do
- dflags <- getDynFlags
- return $ target32Bit (targetPlatform dflags)
-
-sse2Enabled :: NatM Bool
-sse2Enabled = do
- dflags <- getDynFlags
- case platformArch (targetPlatform dflags) of
- -- We Assume SSE1 and SSE2 operations are available on both
- -- x86 and x86_64. Historically we didn't default to SSE2 and
- -- SSE1 on x86, which results in defacto nondeterminism for how
- -- rounding behaves in the associated x87 floating point instructions
- -- because variations in the spill/fpu stack placement of arguments for
- -- operations would change the precision and final result of what
- -- would otherwise be the same expressions with respect to single or
- -- double precision IEEE floating point computations.
- ArchX86_64 -> return True
- ArchX86 -> return True
- _ -> panic "trying to generate x86/x86_64 on the wrong platform"
-
-
-sse4_2Enabled :: NatM Bool
-sse4_2Enabled = do
- dflags <- getDynFlags
- return (isSse4_2Enabled dflags)
-
-
-cmmTopCodeGen
- :: RawCmmDecl
- -> NatM [NatCmmDecl (Alignment, RawCmmStatics) Instr]
-
-cmmTopCodeGen (CmmProc info lab live graph) = do
- let blocks = toBlockListEntryFirst graph
- (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
- picBaseMb <- getPicBaseMaybeNat
- dflags <- getDynFlags
- let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
- tops = proc : concat statics
- os = platformOS $ targetPlatform dflags
-
- case picBaseMb of
- Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
- Nothing -> return tops
-
-cmmTopCodeGen (CmmData sec dat) = do
- return [CmmData sec (mkAlignment 1, dat)] -- no translation, we just use CmmStatic
-
-{- Note [Verifying basic blocks]
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
- We want to guarantee a few things about the results
- of instruction selection.
-
- Namely that each basic blocks consists of:
- * A (potentially empty) sequence of straight line instructions
- followed by
- * A (potentially empty) sequence of jump like instructions.
-
- We can verify this by going through the instructions and
- making sure that any non-jumpish instruction can't appear
- after a jumpish instruction.
-
- There are gotchas however:
- * CALLs are strictly speaking control flow but here we care
- not about them. Hence we treat them as regular instructions.
-
- It's safe for them to appear inside a basic block
- as (ignoring side effects inside the call) they will result in
- straight line code.
-
- * NEWBLOCK marks the start of a new basic block so can
- be followed by any instructions.
--}
-
--- Verifying basic blocks is cheap, but not cheap enough to enable it unconditionally.
-verifyBasicBlock :: [Instr] -> ()
-verifyBasicBlock instrs
- | debugIsOn = go False instrs
- | otherwise = ()
- where
- go _ [] = ()
- go atEnd (i:instr)
- = case i of
- -- Start a new basic block
- NEWBLOCK {} -> go False instr
- -- Calls are not viable block terminators
- CALL {} | atEnd -> faultyBlockWith i
- | not atEnd -> go atEnd instr
- -- All instructions ok, check if we reached the end and continue.
- _ | not atEnd -> go (isJumpishInstr i) instr
- -- Only jumps allowed at the end of basic blocks.
- | otherwise -> if isJumpishInstr i
- then go True instr
- else faultyBlockWith i
- faultyBlockWith i
- = pprPanic "Non control flow instructions after end of basic block."
- (ppr i <+> text "in:" $$ vcat (map ppr instrs))
-
-basicBlockCodeGen
- :: CmmBlock
- -> NatM ( [NatBasicBlock Instr]
- , [NatCmmDecl (Alignment, RawCmmStatics) Instr])
-
-basicBlockCodeGen block = do
- let (_, nodes, tail) = blockSplit block
- id = entryLabel block
- stmts = blockToList nodes
- -- Generate location directive
- dbg <- getDebugBlock (entryLabel block)
- loc_instrs <- case dblSourceTick =<< dbg of
- Just (SourceNote span name)
- -> do fileId <- getFileId (srcSpanFile span)
- let line = srcSpanStartLine span; col = srcSpanStartCol span
- return $ unitOL $ LOCATION fileId line col name
- _ -> return nilOL
- (mid_instrs,mid_bid) <- stmtsToInstrs id stmts
- (!tail_instrs,_) <- stmtToInstrs mid_bid tail
- let instrs = loc_instrs `appOL` mid_instrs `appOL` tail_instrs
- return $! verifyBasicBlock (fromOL instrs)
- instrs' <- fold <$> traverse addSpUnwindings instrs
- -- code generation may introduce new basic block boundaries, which
- -- are indicated by the NEWBLOCK instruction. We must split up the
- -- instruction stream into basic blocks again. Also, we extract
- -- LDATAs here too.
- let
- (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs'
-
- mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
- = ([], BasicBlock id instrs : blocks, statics)
- mkBlocks (LDATA sec dat) (instrs,blocks,statics)
- = (instrs, blocks, CmmData sec dat:statics)
- mkBlocks instr (instrs,blocks,statics)
- = (instr:instrs, blocks, statics)
- return (BasicBlock id top : other_blocks, statics)
-
--- | Convert 'DELTA' instructions into 'UNWIND' instructions to capture changes
--- in the @sp@ register. See Note [What is this unwinding business?] in Debug
--- for details.
-addSpUnwindings :: Instr -> NatM (OrdList Instr)
-addSpUnwindings instr@(DELTA d) = do
- dflags <- getDynFlags
- if debugLevel dflags >= 1
- then do lbl <- mkAsmTempLabel <$> getUniqueM
- let unwind = M.singleton MachSp (Just $ UwReg MachSp $ negate d)
- return $ toOL [ instr, UNWIND lbl unwind ]
- else return (unitOL instr)
-addSpUnwindings instr = return $ unitOL instr
-
-{- Note [Keeping track of the current block]
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-When generating instructions for Cmm we sometimes require
-the current block for things like retry loops.
-
-We also sometimes change the current block, if a MachOP
-results in branching control flow.
-
-Issues arise if we have two statements in the same block,
-which both depend on the current block id *and* change the
-basic block after them. This happens for atomic primops
-in the X86 backend where we want to update the CFG data structure
-when introducing new basic blocks.
-
-For example in #17334 we got this Cmm code:
-
- c3Bf: // global
- (_s3t1::I64) = call MO_AtomicRMW W64 AMO_And(_s3sQ::P64 + 88, 18);
- (_s3t4::I64) = call MO_AtomicRMW W64 AMO_Or(_s3sQ::P64 + 88, 0);
- _s3sT::I64 = _s3sV::I64;
- goto c3B1;
-
-This resulted in two new basic blocks being inserted:
-
- c3Bf:
- movl $18,%vI_n3Bo
- movq 88(%vI_s3sQ),%rax
- jmp _n3Bp
- n3Bp:
- ...
- cmpxchgq %vI_n3Bq,88(%vI_s3sQ)
- jne _n3Bp
- ...
- jmp _n3Bs
- n3Bs:
- ...
- cmpxchgq %vI_n3Bt,88(%vI_s3sQ)
- jne _n3Bs
- ...
- jmp _c3B1
- ...
-
-Based on the Cmm we called stmtToInstrs we translated both atomic operations under
-the assumption they would be placed into their Cmm basic block `c3Bf`.
-However for the retry loop we introduce new labels, so this is not the case
-for the second statement.
-This resulted in a desync between the explicit control flow graph
-we construct as a separate data type and the actual control flow graph in the code.
-
-Instead we now return the new basic block if a statement causes a change
-in the current block and use the block for all following statements.
-
-For this reason genCCall is also split into two parts.
-One for calls which *won't* change the basic blocks in
-which successive instructions will be placed.
-A different one for calls which *are* known to change the
-basic block.
-
--}
-
--- See Note [Keeping track of the current block] for why
--- we pass the BlockId.
-stmtsToInstrs :: BlockId -- ^ Basic block these statement will start to be placed in.
- -> [CmmNode O O] -- ^ Cmm Statement
- -> NatM (InstrBlock, BlockId) -- ^ Resulting instruction
-stmtsToInstrs bid stmts =
- go bid stmts nilOL
- where
- go bid [] instrs = return (instrs,bid)
- go bid (s:stmts) instrs = do
- (instrs',bid') <- stmtToInstrs bid s
- -- If the statement introduced a new block, we use that one
- let !newBid = fromMaybe bid bid'
- go newBid stmts (instrs `appOL` instrs')
-
--- | `bid` refers to the current block and is used to update the CFG
--- if new blocks are inserted in the control flow.
--- See Note [Keeping track of the current block] for more details.
-stmtToInstrs :: BlockId -- ^ Basic block this statement will start to be placed in.
- -> CmmNode e x
- -> NatM (InstrBlock, Maybe BlockId)
- -- ^ Instructions, and bid of new block if successive
- -- statements are placed in a different basic block.
-stmtToInstrs bid stmt = do
- dflags <- getDynFlags
- is32Bit <- is32BitPlatform
- case stmt of
- CmmUnsafeForeignCall target result_regs args
- -> genCCall dflags is32Bit target result_regs args bid
-
- _ -> (,Nothing) <$> case stmt of
- CmmComment s -> return (unitOL (COMMENT s))
- CmmTick {} -> return nilOL
-
- CmmUnwind regs -> do
- let to_unwind_entry :: (GlobalReg, Maybe CmmExpr) -> UnwindTable
- to_unwind_entry (reg, expr) = M.singleton reg (fmap toUnwindExpr expr)
- case foldMap to_unwind_entry regs of
- tbl | M.null tbl -> return nilOL
- | otherwise -> do
- lbl <- mkAsmTempLabel <$> getUniqueM
- return $ unitOL $ UNWIND lbl tbl
-
- CmmAssign reg src
- | isFloatType ty -> assignReg_FltCode format reg src
- | is32Bit && isWord64 ty -> assignReg_I64Code reg src
- | otherwise -> assignReg_IntCode format reg src
- where ty = cmmRegType dflags reg
- format = cmmTypeFormat ty
-
- CmmStore addr src
- | isFloatType ty -> assignMem_FltCode format addr src
- | is32Bit && isWord64 ty -> assignMem_I64Code addr src
- | otherwise -> assignMem_IntCode format addr src
- where ty = cmmExprType dflags src
- format = cmmTypeFormat ty
-
- CmmBranch id -> return $ genBranch id
-
- --We try to arrange blocks such that the likely branch is the fallthrough
- --in GHC.Cmm.ContFlowOpt. So we can assume the condition is likely false here.
- CmmCondBranch arg true false _ -> genCondBranch bid true false arg
- CmmSwitch arg ids -> do dflags <- getDynFlags
- genSwitch dflags arg ids
- CmmCall { cml_target = arg
- , cml_args_regs = gregs } -> do
- dflags <- getDynFlags
- genJump arg (jumpRegs dflags gregs)
- _ ->
- panic "stmtToInstrs: statement should have been cps'd away"
-
-
-jumpRegs :: DynFlags -> [GlobalReg] -> [Reg]
-jumpRegs dflags gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ]
- where platform = targetPlatform dflags
-
---------------------------------------------------------------------------------
--- | 'InstrBlock's are the insn sequences generated by the insn selectors.
--- They are really trees of insns to facilitate fast appending, where a
--- left-to-right traversal yields the insns in the correct order.
---
-type InstrBlock
- = OrdList Instr
-
-
--- | Condition codes passed up the tree.
---
-data CondCode
- = CondCode Bool Cond InstrBlock
-
-
--- | a.k.a "Register64"
--- Reg is the lower 32-bit temporary which contains the result.
--- Use getHiVRegFromLo to find the other VRegUnique.
---
--- Rules of this simplified insn selection game are therefore that
--- the returned Reg may be modified
---
-data ChildCode64
- = ChildCode64
- InstrBlock
- Reg
-
-
--- | Register's passed up the tree. If the stix code forces the register
--- to live in a pre-decided machine register, it comes out as @Fixed@;
--- otherwise, it comes out as @Any@, and the parent can decide which
--- register to put it in.
---
-data Register
- = Fixed Format Reg InstrBlock
- | Any Format (Reg -> InstrBlock)
-
-
-swizzleRegisterRep :: Register -> Format -> Register
-swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code
-swizzleRegisterRep (Any _ codefn) format = Any format codefn
-
-
--- | Grab the Reg for a CmmReg
-getRegisterReg :: Platform -> CmmReg -> Reg
-
-getRegisterReg _ (CmmLocal (LocalReg u pk))
- = -- by Assuming SSE2, Int,Word,Float,Double all can be register allocated
- let fmt = cmmTypeFormat pk in
- RegVirtual (mkVirtualReg u fmt)
-
-getRegisterReg platform (CmmGlobal mid)
- = case globalRegMaybe platform mid of
- Just reg -> RegReal $ reg
- Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
- -- By this stage, the only MagicIds remaining should be the
- -- ones which map to a real machine register on this
- -- platform. Hence ...
-
-
--- | Memory addressing modes passed up the tree.
-data Amode
- = Amode AddrMode InstrBlock
-
-{-
-Now, given a tree (the argument to a CmmLoad) that references memory,
-produce a suitable addressing mode.
-
-A Rule of the Game (tm) for Amodes: use of the addr bit must
-immediately follow use of the code part, since the code part puts
-values in registers which the addr then refers to. So you can't put
-anything in between, lest it overwrite some of those registers. If
-you need to do some other computation between the code part and use of
-the addr bit, first store the effective address from the amode in a
-temporary, then do the other computation, and then use the temporary:
-
- code
- LEA amode, tmp
- ... other computation ...
- ... (tmp) ...
--}
-
-
--- | Check whether an integer will fit in 32 bits.
--- A CmmInt is intended to be truncated to the appropriate
--- number of bits, so here we truncate it to Int64. This is
--- important because e.g. -1 as a CmmInt might be either
--- -1 or 18446744073709551615.
---
-is32BitInteger :: Integer -> Bool
-is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
- where i64 = fromIntegral i :: Int64
-
-
--- | Convert a BlockId to some CmmStatic data
-jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
-jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
-jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
- where blockLabel = blockLbl blockid
-
-
--- -----------------------------------------------------------------------------
--- General things for putting together code sequences
-
--- Expand CmmRegOff. ToDo: should we do it this way around, or convert
--- CmmExprs into CmmRegOff?
-mangleIndexTree :: DynFlags -> CmmReg -> Int -> CmmExpr
-mangleIndexTree dflags reg off
- = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
- where width = typeWidth (cmmRegType dflags reg)
-
--- | The dual to getAnyReg: compute an expression into a register, but
--- we don't mind which one it is.
-getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
-getSomeReg expr = do
- r <- getRegister expr
- case r of
- Any rep code -> do
- tmp <- getNewRegNat rep
- return (tmp, code tmp)
- Fixed _ reg code ->
- return (reg, code)
-
-
-assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
-assignMem_I64Code addrTree valueTree = do
- Amode addr addr_code <- getAmode addrTree
- ChildCode64 vcode rlo <- iselExpr64 valueTree
- let
- rhi = getHiVRegFromLo rlo
-
- -- Little-endian store
- mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
- mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
- return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
-
-
-assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
-assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
- ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
- let
- r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
- r_dst_hi = getHiVRegFromLo r_dst_lo
- r_src_hi = getHiVRegFromLo r_src_lo
- mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
- mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
- return (
- vcode `snocOL` mov_lo `snocOL` mov_hi
- )
-
-assignReg_I64Code _ _
- = panic "assignReg_I64Code(i386): invalid lvalue"
-
-
-iselExpr64 :: CmmExpr -> NatM ChildCode64
-iselExpr64 (CmmLit (CmmInt i _)) = do
- (rlo,rhi) <- getNewRegPairNat II32
- let
- r = fromIntegral (fromIntegral i :: Word32)
- q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
- code = toOL [
- MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
- MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
- ]
- return (ChildCode64 code rlo)
-
-iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
- Amode addr addr_code <- getAmode addrTree
- (rlo,rhi) <- getNewRegPairNat II32
- let
- mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
- mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
- return (
- ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
- rlo
- )
-
-iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
- = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
-
--- we handle addition, but rather badly
-iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
- ChildCode64 code1 r1lo <- iselExpr64 e1
- (rlo,rhi) <- getNewRegPairNat II32
- let
- r = fromIntegral (fromIntegral i :: Word32)
- q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
- r1hi = getHiVRegFromLo r1lo
- code = code1 `appOL`
- toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
- ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
- MOV II32 (OpReg r1hi) (OpReg rhi),
- ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
- return (ChildCode64 code rlo)
-
-iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
- ChildCode64 code1 r1lo <- iselExpr64 e1
- ChildCode64 code2 r2lo <- iselExpr64 e2
- (rlo,rhi) <- getNewRegPairNat II32
- let
- r1hi = getHiVRegFromLo r1lo
- r2hi = getHiVRegFromLo r2lo
- code = code1 `appOL`
- code2 `appOL`
- toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
- ADD II32 (OpReg r2lo) (OpReg rlo),
- MOV II32 (OpReg r1hi) (OpReg rhi),
- ADC II32 (OpReg r2hi) (OpReg rhi) ]
- return (ChildCode64 code rlo)
-
-iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
- ChildCode64 code1 r1lo <- iselExpr64 e1
- ChildCode64 code2 r2lo <- iselExpr64 e2
- (rlo,rhi) <- getNewRegPairNat II32
- let
- r1hi = getHiVRegFromLo r1lo
- r2hi = getHiVRegFromLo r2lo
- code = code1 `appOL`
- code2 `appOL`
- toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
- SUB II32 (OpReg r2lo) (OpReg rlo),
- MOV II32 (OpReg r1hi) (OpReg rhi),
- SBB II32 (OpReg r2hi) (OpReg rhi) ]
- return (ChildCode64 code rlo)
-
-iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
- fn <- getAnyReg expr
- r_dst_lo <- getNewRegNat II32
- let r_dst_hi = getHiVRegFromLo r_dst_lo
- code = fn r_dst_lo
- return (
- ChildCode64 (code `snocOL`
- MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
- r_dst_lo
- )
-
-iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do
- fn <- getAnyReg expr
- r_dst_lo <- getNewRegNat II32
- let r_dst_hi = getHiVRegFromLo r_dst_lo
- code = fn r_dst_lo
- return (
- ChildCode64 (code `snocOL`
- MOV II32 (OpReg r_dst_lo) (OpReg eax) `snocOL`
- CLTD II32 `snocOL`
- MOV II32 (OpReg eax) (OpReg r_dst_lo) `snocOL`
- MOV II32 (OpReg edx) (OpReg r_dst_hi))
- r_dst_lo
- )
-
-iselExpr64 expr
- = pprPanic "iselExpr64(i386)" (ppr expr)
-
-
---------------------------------------------------------------------------------
-getRegister :: CmmExpr -> NatM Register
-getRegister e = do dflags <- getDynFlags
- is32Bit <- is32BitPlatform
- getRegister' dflags is32Bit e
-
-getRegister' :: DynFlags -> Bool -> CmmExpr -> NatM Register
-
-getRegister' dflags is32Bit (CmmReg reg)
- = case reg of
- CmmGlobal PicBaseReg
- | is32Bit ->
- -- on x86_64, we have %rip for PicBaseReg, but it's not
- -- a full-featured register, it can only be used for
- -- rip-relative addressing.
- do reg' <- getPicBaseNat (archWordFormat is32Bit)
- return (Fixed (archWordFormat is32Bit) reg' nilOL)
- _ ->
- do
- let
- fmt = cmmTypeFormat (cmmRegType dflags reg)
- format = fmt
- --
- let platform = targetPlatform dflags
- return (Fixed format
- (getRegisterReg platform reg)
- nilOL)
-
-
-getRegister' dflags is32Bit (CmmRegOff r n)
- = getRegister' dflags is32Bit $ mangleIndexTree dflags r n
-
-getRegister' dflags is32Bit (CmmMachOp (MO_AlignmentCheck align _) [e])
- = addAlignmentCheck align <$> getRegister' dflags is32Bit e
-
--- for 32-bit architectures, support some 64 -> 32 bit conversions:
--- TO_W_(x), TO_W_(x >> 32)
-
-getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32)
- [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
- | is32Bit = do
- ChildCode64 code rlo <- iselExpr64 x
- return $ Fixed II32 (getHiVRegFromLo rlo) code
-
-getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32)
- [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
- | is32Bit = do
- ChildCode64 code rlo <- iselExpr64 x
- return $ Fixed II32 (getHiVRegFromLo rlo) code
-
-getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32) [x])
- | is32Bit = do
- ChildCode64 code rlo <- iselExpr64 x
- return $ Fixed II32 rlo code
-
-getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32) [x])
- | is32Bit = do
- ChildCode64 code rlo <- iselExpr64 x
- return $ Fixed II32 rlo code
-
-getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
- float_const_sse2 where
- float_const_sse2
- | f == 0.0 = do
- let
- format = floatFormat w
- code dst = unitOL (XOR format (OpReg dst) (OpReg dst))
- -- I don't know why there are xorpd, xorps, and pxor instructions.
- -- They all appear to do the same thing --SDM
- return (Any format code)
-
- | otherwise = do
- Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit
- loadFloatAmode w addr code
-
--- catch simple cases of zero- or sign-extended load
-getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
- code <- intLoadCode (MOVZxL II8) addr
- return (Any II32 code)
-
-getRegister' _ _ (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
- code <- intLoadCode (MOVSxL II8) addr
- return (Any II32 code)
-
-getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
- code <- intLoadCode (MOVZxL II16) addr
- return (Any II32 code)
-
-getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
- code <- intLoadCode (MOVSxL II16) addr
- return (Any II32 code)
-
--- catch simple cases of zero- or sign-extended load
-getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _])
- | not is32Bit = do
- code <- intLoadCode (MOVZxL II8) addr
- return (Any II64 code)
-
-getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _])
- | not is32Bit = do
- code <- intLoadCode (MOVSxL II8) addr
- return (Any II64 code)
-
-getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _])
- | not is32Bit = do
- code <- intLoadCode (MOVZxL II16) addr
- return (Any II64 code)
-
-getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _])
- | not is32Bit = do
- code <- intLoadCode (MOVSxL II16) addr
- return (Any II64 code)
-
-getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _])
- | not is32Bit = do
- code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
- return (Any II64 code)
-
-getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _])
- | not is32Bit = do
- code <- intLoadCode (MOVSxL II32) addr
- return (Any II64 code)
-
-getRegister' _ is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
- CmmLit displacement])
- | not is32Bit = do
- return $ Any II64 (\dst -> unitOL $
- LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
-
-getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
- case mop of
- MO_F_Neg w -> sse2NegCode w x
-
-
- MO_S_Neg w -> triv_ucode NEGI (intFormat w)
- MO_Not w -> triv_ucode NOT (intFormat w)
-
- -- Nop conversions
- MO_UU_Conv W32 W8 -> toI8Reg W32 x
- MO_SS_Conv W32 W8 -> toI8Reg W32 x
- MO_XX_Conv W32 W8 -> toI8Reg W32 x
- MO_UU_Conv W16 W8 -> toI8Reg W16 x
- MO_SS_Conv W16 W8 -> toI8Reg W16 x
- MO_XX_Conv W16 W8 -> toI8Reg W16 x
- MO_UU_Conv W32 W16 -> toI16Reg W32 x
- MO_SS_Conv W32 W16 -> toI16Reg W32 x
- MO_XX_Conv W32 W16 -> toI16Reg W32 x
-
- MO_UU_Conv W64 W32 | not is32Bit -> conversionNop II64 x
- MO_SS_Conv W64 W32 | not is32Bit -> conversionNop II64 x
- MO_XX_Conv W64 W32 | not is32Bit -> conversionNop II64 x
- MO_UU_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
- MO_SS_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
- MO_XX_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
- MO_UU_Conv W64 W8 | not is32Bit -> toI8Reg W64 x
- MO_SS_Conv W64 W8 | not is32Bit -> toI8Reg W64 x
- MO_XX_Conv W64 W8 | not is32Bit -> toI8Reg W64 x
-
- MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
- MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
- MO_XX_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intFormat rep1) x
-
- -- widenings
- MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x
- MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
- MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x
-
- MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x
- MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
- MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x
-
- -- We don't care about the upper bits for MO_XX_Conv, so MOV is enough. However, on 32-bit we
- -- have 8-bit registers only for a few registers (as opposed to x86-64 where every register
- -- has 8-bit version). So for 32-bit code, we'll just zero-extend.
- MO_XX_Conv W8 W32
- | is32Bit -> integerExtend W8 W32 MOVZxL x
- | otherwise -> integerExtend W8 W32 MOV x
- MO_XX_Conv W8 W16
- | is32Bit -> integerExtend W8 W16 MOVZxL x
- | otherwise -> integerExtend W8 W16 MOV x
- MO_XX_Conv W16 W32 -> integerExtend W16 W32 MOV x
-
- MO_UU_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVZxL x
- MO_UU_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVZxL x
- MO_UU_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVZxL x
- MO_SS_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVSxL x
- MO_SS_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVSxL x
- MO_SS_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVSxL x
- -- For 32-to-64 bit zero extension, amd64 uses an ordinary movl.
- -- However, we don't want the register allocator to throw it
- -- away as an unnecessary reg-to-reg move, so we keep it in
- -- the form of a movzl and print it as a movl later.
- -- This doesn't apply to MO_XX_Conv since in this case we don't care about
- -- the upper bits. So we can just use MOV.
- MO_XX_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOV x
- MO_XX_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOV x
- MO_XX_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOV x
-
- MO_FF_Conv W32 W64 -> coerceFP2FP W64 x
-
-
- MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
-
- MO_FS_Conv from to -> coerceFP2Int from to x
- MO_SF_Conv from to -> coerceInt2FP from to x
-
- MO_V_Insert {} -> needLlvm
- MO_V_Extract {} -> needLlvm
- MO_V_Add {} -> needLlvm
- MO_V_Sub {} -> needLlvm
- MO_V_Mul {} -> needLlvm
- MO_VS_Quot {} -> needLlvm
- MO_VS_Rem {} -> needLlvm
- MO_VS_Neg {} -> needLlvm
- MO_VU_Quot {} -> needLlvm
- MO_VU_Rem {} -> needLlvm
- MO_VF_Insert {} -> needLlvm
- MO_VF_Extract {} -> needLlvm
- MO_VF_Add {} -> needLlvm
- MO_VF_Sub {} -> needLlvm
- MO_VF_Mul {} -> needLlvm
- MO_VF_Quot {} -> needLlvm
- MO_VF_Neg {} -> needLlvm
-
- _other -> pprPanic "getRegister" (pprMachOp mop)
- where
- triv_ucode :: (Format -> Operand -> Instr) -> Format -> NatM Register
- triv_ucode instr format = trivialUCode format (instr format) x
-
- -- signed or unsigned extension.
- integerExtend :: Width -> Width
- -> (Format -> Operand -> Operand -> Instr)
- -> CmmExpr -> NatM Register
- integerExtend from to instr expr = do
- (reg,e_code) <- if from == W8 then getByteReg expr
- else getSomeReg expr
- let
- code dst =
- e_code `snocOL`
- instr (intFormat from) (OpReg reg) (OpReg dst)
- return (Any (intFormat to) code)
-
- toI8Reg :: Width -> CmmExpr -> NatM Register
- toI8Reg new_rep expr
- = do codefn <- getAnyReg expr
- return (Any (intFormat new_rep) codefn)
- -- HACK: use getAnyReg to get a byte-addressable register.
- -- If the source was a Fixed register, this will add the
- -- mov instruction to put it into the desired destination.
- -- We're assuming that the destination won't be a fixed
- -- non-byte-addressable register; it won't be, because all
- -- fixed registers are word-sized.
-
- toI16Reg = toI8Reg -- for now
-
- conversionNop :: Format -> CmmExpr -> NatM Register
- conversionNop new_format expr
- = do e_code <- getRegister' dflags is32Bit expr
- return (swizzleRegisterRep e_code new_format)
-
-
-getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
- case mop of
- MO_F_Eq _ -> condFltReg is32Bit EQQ x y
- MO_F_Ne _ -> condFltReg is32Bit NE x y
- MO_F_Gt _ -> condFltReg is32Bit GTT x y
- MO_F_Ge _ -> condFltReg is32Bit GE x y
- -- Invert comparison condition and swap operands
- -- See Note [SSE Parity Checks]
- MO_F_Lt _ -> condFltReg is32Bit GTT y x
- MO_F_Le _ -> condFltReg is32Bit GE y x
-
- MO_Eq _ -> condIntReg EQQ x y
- MO_Ne _ -> condIntReg NE x y
-
- MO_S_Gt _ -> condIntReg GTT x y
- MO_S_Ge _ -> condIntReg GE x y
- MO_S_Lt _ -> condIntReg LTT x y
- MO_S_Le _ -> condIntReg LE x y
-
- MO_U_Gt _ -> condIntReg GU x y
- MO_U_Ge _ -> condIntReg GEU x y
- MO_U_Lt _ -> condIntReg LU x y
- MO_U_Le _ -> condIntReg LEU x y
-
- MO_F_Add w -> trivialFCode_sse2 w ADD x y
-
- MO_F_Sub w -> trivialFCode_sse2 w SUB x y
-
- MO_F_Quot w -> trivialFCode_sse2 w FDIV x y
-
- MO_F_Mul w -> trivialFCode_sse2 w MUL x y
-
-
- MO_Add rep -> add_code rep x y
- MO_Sub rep -> sub_code rep x y
-
- MO_S_Quot rep -> div_code rep True True x y
- MO_S_Rem rep -> div_code rep True False x y
- MO_U_Quot rep -> div_code rep False True x y
- MO_U_Rem rep -> div_code rep False False x y
-
- MO_S_MulMayOflo rep -> imulMayOflo rep x y
-
- MO_Mul W8 -> imulW8 x y
- MO_Mul rep -> triv_op rep IMUL
- MO_And rep -> triv_op rep AND
- MO_Or rep -> triv_op rep OR
- MO_Xor rep -> triv_op rep XOR
-
- {- Shift ops on x86s have constraints on their source, it
- either has to be Imm, CL or 1
- => trivialCode is not restrictive enough (sigh.)
- -}
- MO_Shl rep -> shift_code rep SHL x y {-False-}
- MO_U_Shr rep -> shift_code rep SHR x y {-False-}
- MO_S_Shr rep -> shift_code rep SAR x y {-False-}
-
- MO_V_Insert {} -> needLlvm
- MO_V_Extract {} -> needLlvm
- MO_V_Add {} -> needLlvm
- MO_V_Sub {} -> needLlvm
- MO_V_Mul {} -> needLlvm
- MO_VS_Quot {} -> needLlvm
- MO_VS_Rem {} -> needLlvm
- MO_VS_Neg {} -> needLlvm
- MO_VF_Insert {} -> needLlvm
- MO_VF_Extract {} -> needLlvm
- MO_VF_Add {} -> needLlvm
- MO_VF_Sub {} -> needLlvm
- MO_VF_Mul {} -> needLlvm
- MO_VF_Quot {} -> needLlvm
- MO_VF_Neg {} -> needLlvm
-
- _other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
- where
- --------------------
- triv_op width instr = trivialCode width op (Just op) x y
- where op = instr (intFormat width)
-
- -- Special case for IMUL for bytes, since the result of IMULB will be in
- -- %ax, the split to %dx/%edx/%rdx and %ax/%eax/%rax happens only for wider
- -- values.
- imulW8 :: CmmExpr -> CmmExpr -> NatM Register
- imulW8 arg_a arg_b = do
- (a_reg, a_code) <- getNonClobberedReg arg_a
- b_code <- getAnyReg arg_b
-
- let code = a_code `appOL` b_code eax `appOL`
- toOL [ IMUL2 format (OpReg a_reg) ]
- format = intFormat W8
-
- return (Fixed format eax code)
-
-
- imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
- imulMayOflo rep a b = do
- (a_reg, a_code) <- getNonClobberedReg a
- b_code <- getAnyReg b
- let
- shift_amt = case rep of
- W32 -> 31
- W64 -> 63
- _ -> panic "shift_amt"
-
- format = intFormat rep
- code = a_code `appOL` b_code eax `appOL`
- toOL [
- IMUL2 format (OpReg a_reg), -- result in %edx:%eax
- SAR format (OpImm (ImmInt shift_amt)) (OpReg eax),
- -- sign extend lower part
- SUB format (OpReg edx) (OpReg eax)
- -- compare against upper
- -- eax==0 if high part == sign extended low part
- ]
- return (Fixed format eax code)
-
- --------------------
- shift_code :: Width
- -> (Format -> Operand -> Operand -> Instr)
- -> CmmExpr
- -> CmmExpr
- -> NatM Register
-
- {- Case1: shift length as immediate -}
- shift_code width instr x (CmmLit lit) = do
- x_code <- getAnyReg x
- let
- format = intFormat width
- code dst
- = x_code dst `snocOL`
- instr format (OpImm (litToImm lit)) (OpReg dst)
- return (Any format code)
-
- {- Case2: shift length is complex (non-immediate)
- * y must go in %ecx.
- * we cannot do y first *and* put its result in %ecx, because
- %ecx might be clobbered by x.
- * if we do y second, then x cannot be
- in a clobbered reg. Also, we cannot clobber x's reg
- with the instruction itself.
- * so we can either:
- - do y first, put its result in a fresh tmp, then copy it to %ecx later
- - do y second and put its result into %ecx. x gets placed in a fresh
- tmp. This is likely to be better, because the reg alloc can
- eliminate this reg->reg move here (it won't eliminate the other one,
- because the move is into the fixed %ecx).
- -}
- shift_code width instr x y{-amount-} = do
- x_code <- getAnyReg x
- let format = intFormat width
- tmp <- getNewRegNat format
- y_code <- getAnyReg y
- let
- code = x_code tmp `appOL`
- y_code ecx `snocOL`
- instr format (OpReg ecx) (OpReg tmp)
- return (Fixed format tmp code)
-
- --------------------
- add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
- add_code rep x (CmmLit (CmmInt y _))
- | is32BitInteger y = add_int rep x y
- add_code rep x y = trivialCode rep (ADD format) (Just (ADD format)) x y
- where format = intFormat rep
- -- TODO: There are other interesting patterns we want to replace
- -- with a LEA, e.g. `(x + offset) + (y << shift)`.
-
- --------------------
- sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
- sub_code rep x (CmmLit (CmmInt y _))
- | is32BitInteger (-y) = add_int rep x (-y)
- sub_code rep x y = trivialCode rep (SUB (intFormat rep)) Nothing x y
-
- -- our three-operand add instruction:
- add_int width x y = do
- (x_reg, x_code) <- getSomeReg x
- let
- format = intFormat width
- imm = ImmInt (fromInteger y)
- code dst
- = x_code `snocOL`
- LEA format
- (OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
- (OpReg dst)
- --
- return (Any format code)
-
- ----------------------
-
- -- See Note [DIV/IDIV for bytes]
- div_code W8 signed quotient x y = do
- let widen | signed = MO_SS_Conv W8 W16
- | otherwise = MO_UU_Conv W8 W16
- div_code
- W16
- signed
- quotient
- (CmmMachOp widen [x])
- (CmmMachOp widen [y])
-
- div_code width signed quotient x y = do
- (y_op, y_code) <- getRegOrMem y -- cannot be clobbered
- x_code <- getAnyReg x
- let
- format = intFormat width
- widen | signed = CLTD format
- | otherwise = XOR format (OpReg edx) (OpReg edx)
-
- instr | signed = IDIV
- | otherwise = DIV
-
- code = y_code `appOL`
- x_code eax `appOL`
- toOL [widen, instr format y_op]
-
- result | quotient = eax
- | otherwise = edx
-
- return (Fixed format result code)
-
-
-getRegister' _ _ (CmmLoad mem pk)
- | isFloatType pk
- = do
- Amode addr mem_code <- getAmode mem
- loadFloatAmode (typeWidth pk) addr mem_code
-
-getRegister' _ is32Bit (CmmLoad mem pk)
- | is32Bit && not (isWord64 pk)
- = do
- code <- intLoadCode instr mem
- return (Any format code)
- where
- width = typeWidth pk
- format = intFormat width
- instr = case width of
- W8 -> MOVZxL II8
- _other -> MOV format
- -- We always zero-extend 8-bit loads, if we
- -- can't think of anything better. This is because
- -- we can't guarantee access to an 8-bit variant of every register
- -- (esi and edi don't have 8-bit variants), so to make things
- -- simpler we do our 8-bit arithmetic with full 32-bit registers.
-
--- Simpler memory load code on x86_64
-getRegister' _ is32Bit (CmmLoad mem pk)
- | not is32Bit
- = do
- code <- intLoadCode (MOV format) mem
- return (Any format code)
- where format = intFormat $ typeWidth pk
-
-getRegister' _ is32Bit (CmmLit (CmmInt 0 width))
- = let
- format = intFormat width
-
- -- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
- format1 = if is32Bit then format
- else case format of
- II64 -> II32
- _ -> format
- code dst
- = unitOL (XOR format1 (OpReg dst) (OpReg dst))
- in
- return (Any format code)
-
- -- optimisation for loading small literals on x86_64: take advantage
- -- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
- -- instruction forms are shorter.
-getRegister' dflags is32Bit (CmmLit lit)
- | not is32Bit, isWord64 (cmmLitType dflags lit), not (isBigLit lit)
- = let
- imm = litToImm lit
- code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
- in
- return (Any II64 code)
- where
- isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
- isBigLit _ = False
- -- note1: not the same as (not.is32BitLit), because that checks for
- -- signed literals that fit in 32 bits, but we want unsigned
- -- literals here.
- -- note2: all labels are small, because we're assuming the
- -- small memory model (see gcc docs, -mcmodel=small).
-
-getRegister' dflags _ (CmmLit lit)
- = do let format = cmmTypeFormat (cmmLitType dflags lit)
- imm = litToImm lit
- code dst = unitOL (MOV format (OpImm imm) (OpReg dst))
- return (Any format code)
-
-getRegister' _ _ other
- | isVecExpr other = needLlvm
- | otherwise = pprPanic "getRegister(x86)" (ppr other)
-
-
-intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
- -> NatM (Reg -> InstrBlock)
-intLoadCode instr mem = do
- Amode src mem_code <- getAmode mem
- return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
-
--- Compute an expression into *any* register, adding the appropriate
--- move instruction if necessary.
-getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
-getAnyReg expr = do
- r <- getRegister expr
- anyReg r
-
-anyReg :: Register -> NatM (Reg -> InstrBlock)
-anyReg (Any _ code) = return code
-anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
-
--- A bit like getSomeReg, but we want a reg that can be byte-addressed.
--- Fixed registers might not be byte-addressable, so we make sure we've
--- got a temporary, inserting an extra reg copy if necessary.
-getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
-getByteReg expr = do
- is32Bit <- is32BitPlatform
- if is32Bit
- then do r <- getRegister expr
- case r of
- Any rep code -> do
- tmp <- getNewRegNat rep
- return (tmp, code tmp)
- Fixed rep reg code
- | isVirtualReg reg -> return (reg,code)
- | otherwise -> do
- tmp <- getNewRegNat rep
- return (tmp, code `snocOL` reg2reg rep reg tmp)
- -- ToDo: could optimise slightly by checking for
- -- byte-addressable real registers, but that will
- -- happen very rarely if at all.
- else getSomeReg expr -- all regs are byte-addressable on x86_64
-
--- Another variant: this time we want the result in a register that cannot
--- be modified by code to evaluate an arbitrary expression.
-getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
-getNonClobberedReg expr = do
- dflags <- getDynFlags
- r <- getRegister expr
- case r of
- Any rep code -> do
- tmp <- getNewRegNat rep
- return (tmp, code tmp)
- Fixed rep reg code
- -- only certain regs can be clobbered
- | reg `elem` instrClobberedRegs (targetPlatform dflags)
- -> do
- tmp <- getNewRegNat rep
- return (tmp, code `snocOL` reg2reg rep reg tmp)
- | otherwise ->
- return (reg, code)
-
-reg2reg :: Format -> Reg -> Reg -> Instr
-reg2reg format src dst = MOV format (OpReg src) (OpReg dst)
-
-
---------------------------------------------------------------------------------
-getAmode :: CmmExpr -> NatM Amode
-getAmode e = do is32Bit <- is32BitPlatform
- getAmode' is32Bit e
-
-getAmode' :: Bool -> CmmExpr -> NatM Amode
-getAmode' _ (CmmRegOff r n) = do dflags <- getDynFlags
- getAmode $ mangleIndexTree dflags r n
-
-getAmode' is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
- CmmLit displacement])
- | not is32Bit
- = return $ Amode (ripRel (litToImm displacement)) nilOL
-
-
--- This is all just ridiculous, since it carefully undoes
--- what mangleIndexTree has just done.
-getAmode' is32Bit (CmmMachOp (MO_Sub _rep) [x, CmmLit lit@(CmmInt i _)])
- | is32BitLit is32Bit lit
- -- ASSERT(rep == II32)???
- = do (x_reg, x_code) <- getSomeReg x
- let off = ImmInt (-(fromInteger i))
- return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
-
-getAmode' is32Bit (CmmMachOp (MO_Add _rep) [x, CmmLit lit])
- | is32BitLit is32Bit lit
- -- ASSERT(rep == II32)???
- = do (x_reg, x_code) <- getSomeReg x
- let off = litToImm lit
- return (Amode (AddrBaseIndex (EABaseReg x_reg) EAIndexNone off) x_code)
-
--- Turn (lit1 << n + lit2) into (lit2 + lit1 << n) so it will be
--- recognised by the next rule.
-getAmode' is32Bit (CmmMachOp (MO_Add rep) [a@(CmmMachOp (MO_Shl _) _),
- b@(CmmLit _)])
- = getAmode' is32Bit (CmmMachOp (MO_Add rep) [b,a])
-
--- Matches: (x + offset) + (y << shift)
-getAmode' _ (CmmMachOp (MO_Add _) [CmmRegOff x offset,
- CmmMachOp (MO_Shl _)
- [y, CmmLit (CmmInt shift _)]])
- | shift == 0 || shift == 1 || shift == 2 || shift == 3
- = x86_complex_amode (CmmReg x) y shift (fromIntegral offset)
-
-getAmode' _ (CmmMachOp (MO_Add _) [x, CmmMachOp (MO_Shl _)
- [y, CmmLit (CmmInt shift _)]])
- | shift == 0 || shift == 1 || shift == 2 || shift == 3
- = x86_complex_amode x y shift 0
-
-getAmode' _ (CmmMachOp (MO_Add _)
- [x, CmmMachOp (MO_Add _)
- [CmmMachOp (MO_Shl _) [y, CmmLit (CmmInt shift _)],
- CmmLit (CmmInt offset _)]])
- | shift == 0 || shift == 1 || shift == 2 || shift == 3
- && is32BitInteger offset
- = x86_complex_amode x y shift offset
-
-getAmode' _ (CmmMachOp (MO_Add _) [x,y])
- = x86_complex_amode x y 0 0
-
-getAmode' is32Bit (CmmLit lit) | is32BitLit is32Bit lit
- = return (Amode (ImmAddr (litToImm lit) 0) nilOL)
-
-getAmode' _ expr = do
- (reg,code) <- getSomeReg expr
- return (Amode (AddrBaseIndex (EABaseReg reg) EAIndexNone (ImmInt 0)) code)
-
--- | Like 'getAmode', but on 32-bit use simple register addressing
--- (i.e. no index register). This stops us from running out of
--- registers on x86 when using instructions such as cmpxchg, which can
--- use up to three virtual registers and one fixed register.
-getSimpleAmode :: DynFlags -> Bool -> CmmExpr -> NatM Amode
-getSimpleAmode dflags is32Bit addr
- | is32Bit = do
- addr_code <- getAnyReg addr
- addr_r <- getNewRegNat (intFormat (wordWidth dflags))
- let amode = AddrBaseIndex (EABaseReg addr_r) EAIndexNone (ImmInt 0)
- return $! Amode amode (addr_code addr_r)
- | otherwise = getAmode addr
-
-x86_complex_amode :: CmmExpr -> CmmExpr -> Integer -> Integer -> NatM Amode
-x86_complex_amode base index shift offset
- = do (x_reg, x_code) <- getNonClobberedReg base
- -- x must be in a temp, because it has to stay live over y_code
- -- we could compare x_reg and y_reg and do something better here...
- (y_reg, y_code) <- getSomeReg index
- let
- code = x_code `appOL` y_code
- base = case shift of 0 -> 1; 1 -> 2; 2 -> 4; 3 -> 8;
- n -> panic $ "x86_complex_amode: unhandled shift! (" ++ show n ++ ")"
- return (Amode (AddrBaseIndex (EABaseReg x_reg) (EAIndex y_reg base) (ImmInt (fromIntegral offset)))
- code)
-
-
-
-
--- -----------------------------------------------------------------------------
--- getOperand: sometimes any operand will do.
-
--- getNonClobberedOperand: the value of the operand will remain valid across
--- the computation of an arbitrary expression, unless the expression
--- is computed directly into a register which the operand refers to
--- (see trivialCode where this function is used for an example).
-
-getNonClobberedOperand :: CmmExpr -> NatM (Operand, InstrBlock)
-getNonClobberedOperand (CmmLit lit) = do
- if isSuitableFloatingPointLit lit
- then do
- let CmmFloat _ w = lit
- Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit
- return (OpAddr addr, code)
- else do
-
- is32Bit <- is32BitPlatform
- dflags <- getDynFlags
- if is32BitLit is32Bit lit && not (isFloatType (cmmLitType dflags lit))
- then return (OpImm (litToImm lit), nilOL)
- else getNonClobberedOperand_generic (CmmLit lit)
-
-getNonClobberedOperand (CmmLoad mem pk) = do
- is32Bit <- is32BitPlatform
- -- this logic could be simplified
- -- TODO FIXME
- if (if is32Bit then not (isWord64 pk) else True)
- -- if 32bit and pk is at float/double/simd value
- -- or if 64bit
- -- this could use some eyeballs or i'll need to stare at it more later
- then do
- dflags <- getDynFlags
- let platform = targetPlatform dflags
- Amode src mem_code <- getAmode mem
- (src',save_code) <-
- if (amodeCouldBeClobbered platform src)
- then do
- tmp <- getNewRegNat (archWordFormat is32Bit)
- return (AddrBaseIndex (EABaseReg tmp) EAIndexNone (ImmInt 0),
- unitOL (LEA (archWordFormat is32Bit)
- (OpAddr src)
- (OpReg tmp)))
- else
- return (src, nilOL)
- return (OpAddr src', mem_code `appOL` save_code)
- else do
- -- if its a word or gcptr on 32bit?
- getNonClobberedOperand_generic (CmmLoad mem pk)
-
-getNonClobberedOperand e = getNonClobberedOperand_generic e
-
-getNonClobberedOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
-getNonClobberedOperand_generic e = do
- (reg, code) <- getNonClobberedReg e
- return (OpReg reg, code)
-
-amodeCouldBeClobbered :: Platform -> AddrMode -> Bool
-amodeCouldBeClobbered platform amode = any (regClobbered platform) (addrModeRegs amode)
-
-regClobbered :: Platform -> Reg -> Bool
-regClobbered platform (RegReal (RealRegSingle rr)) = freeReg platform rr
-regClobbered _ _ = False
-
--- getOperand: the operand is not required to remain valid across the
--- computation of an arbitrary expression.
-getOperand :: CmmExpr -> NatM (Operand, InstrBlock)
-
-getOperand (CmmLit lit) = do
- use_sse2 <- sse2Enabled
- if (use_sse2 && isSuitableFloatingPointLit lit)
- then do
- let CmmFloat _ w = lit
- Amode addr code <- memConstant (mkAlignment $ widthInBytes w) lit
- return (OpAddr addr, code)
- else do
-
- is32Bit <- is32BitPlatform
- dflags <- getDynFlags
- if is32BitLit is32Bit lit && not (isFloatType (cmmLitType dflags lit))
- then return (OpImm (litToImm lit), nilOL)
- else getOperand_generic (CmmLit lit)
-
-getOperand (CmmLoad mem pk) = do
- is32Bit <- is32BitPlatform
- use_sse2 <- sse2Enabled
- if (not (isFloatType pk) || use_sse2) && (if is32Bit then not (isWord64 pk) else True)
- then do
- Amode src mem_code <- getAmode mem
- return (OpAddr src, mem_code)
- else
- getOperand_generic (CmmLoad mem pk)
-
-getOperand e = getOperand_generic e
-
-getOperand_generic :: CmmExpr -> NatM (Operand, InstrBlock)
-getOperand_generic e = do
- (reg, code) <- getSomeReg e
- return (OpReg reg, code)
-
-isOperand :: Bool -> CmmExpr -> Bool
-isOperand _ (CmmLoad _ _) = True
-isOperand is32Bit (CmmLit lit) = is32BitLit is32Bit lit
- || isSuitableFloatingPointLit lit
-isOperand _ _ = False
-
--- | Given a 'Register', produce a new 'Register' with an instruction block
--- which will check the value for alignment. Used for @-falignment-sanitisation@.
-addAlignmentCheck :: Int -> Register -> Register
-addAlignmentCheck align reg =
- case reg of
- Fixed fmt reg code -> Fixed fmt reg (code `appOL` check fmt reg)
- Any fmt f -> Any fmt (\reg -> f reg `appOL` check fmt reg)
- where
- check :: Format -> Reg -> InstrBlock
- check fmt reg =
- ASSERT(not $ isFloatFormat fmt)
- toOL [ TEST fmt (OpImm $ ImmInt $ align-1) (OpReg reg)
- , JXX_GBL NE $ ImmCLbl mkBadAlignmentLabel
- ]
-
-memConstant :: Alignment -> CmmLit -> NatM Amode
-memConstant align lit = do
- lbl <- getNewLabelNat
- let rosection = Section ReadOnlyData lbl
- dflags <- getDynFlags
- (addr, addr_code) <- if target32Bit (targetPlatform dflags)
- then do dynRef <- cmmMakeDynamicReference
- dflags
- DataReference
- lbl
- Amode addr addr_code <- getAmode dynRef
- return (addr, addr_code)
- else return (ripRel (ImmCLbl lbl), nilOL)
- let code =
- LDATA rosection (align, RawCmmStatics lbl [CmmStaticLit lit])
- `consOL` addr_code
- return (Amode addr code)
-
-
-loadFloatAmode :: Width -> AddrMode -> InstrBlock -> NatM Register
-loadFloatAmode w addr addr_code = do
- let format = floatFormat w
- code dst = addr_code `snocOL`
- MOV format (OpAddr addr) (OpReg dst)
-
- return (Any format code)
-
-
--- if we want a floating-point literal as an operand, we can
--- use it directly from memory. However, if the literal is
--- zero, we're better off generating it into a register using
--- xor.
-isSuitableFloatingPointLit :: CmmLit -> Bool
-isSuitableFloatingPointLit (CmmFloat f _) = f /= 0.0
-isSuitableFloatingPointLit _ = False
-
-getRegOrMem :: CmmExpr -> NatM (Operand, InstrBlock)
-getRegOrMem e@(CmmLoad mem pk) = do
- is32Bit <- is32BitPlatform
- use_sse2 <- sse2Enabled
- if (not (isFloatType pk) || use_sse2) && (if is32Bit then not (isWord64 pk) else True)
- then do
- Amode src mem_code <- getAmode mem
- return (OpAddr src, mem_code)
- else do
- (reg, code) <- getNonClobberedReg e
- return (OpReg reg, code)
-getRegOrMem e = do
- (reg, code) <- getNonClobberedReg e
- return (OpReg reg, code)
-
-is32BitLit :: Bool -> CmmLit -> Bool
-is32BitLit is32Bit (CmmInt i W64)
- | not is32Bit
- = -- assume that labels are in the range 0-2^31-1: this assumes the
- -- small memory model (see gcc docs, -mcmodel=small).
- is32BitInteger i
-is32BitLit _ _ = True
-
-
-
-
--- Set up a condition code for a conditional branch.
-
-getCondCode :: CmmExpr -> NatM CondCode
-
--- yes, they really do seem to want exactly the same!
-
-getCondCode (CmmMachOp mop [x, y])
- =
- case mop of
- MO_F_Eq W32 -> condFltCode EQQ x y
- MO_F_Ne W32 -> condFltCode NE x y
- MO_F_Gt W32 -> condFltCode GTT x y
- MO_F_Ge W32 -> condFltCode GE x y
- -- Invert comparison condition and swap operands
- -- See Note [SSE Parity Checks]
- MO_F_Lt W32 -> condFltCode GTT y x
- MO_F_Le W32 -> condFltCode GE y x
-
- MO_F_Eq W64 -> condFltCode EQQ x y
- MO_F_Ne W64 -> condFltCode NE x y
- MO_F_Gt W64 -> condFltCode GTT x y
- MO_F_Ge W64 -> condFltCode GE x y
- MO_F_Lt W64 -> condFltCode GTT y x
- MO_F_Le W64 -> condFltCode GE y x
-
- _ -> condIntCode (machOpToCond mop) x y
-
-getCondCode other = pprPanic "getCondCode(2)(x86,x86_64)" (ppr other)
-
-machOpToCond :: MachOp -> Cond
-machOpToCond mo = case mo of
- MO_Eq _ -> EQQ
- MO_Ne _ -> NE
- MO_S_Gt _ -> GTT
- MO_S_Ge _ -> GE
- MO_S_Lt _ -> LTT
- MO_S_Le _ -> LE
- MO_U_Gt _ -> GU
- MO_U_Ge _ -> GEU
- MO_U_Lt _ -> LU
- MO_U_Le _ -> LEU
- _other -> pprPanic "machOpToCond" (pprMachOp mo)
-
-
--- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be
--- passed back up the tree.
-
-condIntCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
-condIntCode cond x y = do is32Bit <- is32BitPlatform
- condIntCode' is32Bit cond x y
-
-condIntCode' :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM CondCode
-
--- memory vs immediate
-condIntCode' is32Bit cond (CmmLoad x pk) (CmmLit lit)
- | is32BitLit is32Bit lit = do
- Amode x_addr x_code <- getAmode x
- let
- imm = litToImm lit
- code = x_code `snocOL`
- CMP (cmmTypeFormat pk) (OpImm imm) (OpAddr x_addr)
- --
- return (CondCode False cond code)
-
--- anything vs zero, using a mask
--- TODO: Add some sanity checking!!!!
-condIntCode' is32Bit cond (CmmMachOp (MO_And _) [x,o2]) (CmmLit (CmmInt 0 pk))
- | (CmmLit lit@(CmmInt mask _)) <- o2, is32BitLit is32Bit lit
- = do
- (x_reg, x_code) <- getSomeReg x
- let
- code = x_code `snocOL`
- TEST (intFormat pk) (OpImm (ImmInteger mask)) (OpReg x_reg)
- --
- return (CondCode False cond code)
-
--- anything vs zero
-condIntCode' _ cond x (CmmLit (CmmInt 0 pk)) = do
- (x_reg, x_code) <- getSomeReg x
- let
- code = x_code `snocOL`
- TEST (intFormat pk) (OpReg x_reg) (OpReg x_reg)
- --
- return (CondCode False cond code)
-
--- anything vs operand
-condIntCode' is32Bit cond x y
- | isOperand is32Bit y = do
- dflags <- getDynFlags
- (x_reg, x_code) <- getNonClobberedReg x
- (y_op, y_code) <- getOperand y
- let
- code = x_code `appOL` y_code `snocOL`
- CMP (cmmTypeFormat (cmmExprType dflags x)) y_op (OpReg x_reg)
- return (CondCode False cond code)
--- operand vs. anything: invert the comparison so that we can use a
--- single comparison instruction.
- | isOperand is32Bit x
- , Just revcond <- maybeFlipCond cond = do
- dflags <- getDynFlags
- (y_reg, y_code) <- getNonClobberedReg y
- (x_op, x_code) <- getOperand x
- let
- code = y_code `appOL` x_code `snocOL`
- CMP (cmmTypeFormat (cmmExprType dflags x)) x_op (OpReg y_reg)
- return (CondCode False revcond code)
-
--- anything vs anything
-condIntCode' _ cond x y = do
- dflags <- getDynFlags
- (y_reg, y_code) <- getNonClobberedReg y
- (x_op, x_code) <- getRegOrMem x
- let
- code = y_code `appOL`
- x_code `snocOL`
- CMP (cmmTypeFormat (cmmExprType dflags x)) (OpReg y_reg) x_op
- return (CondCode False cond code)
-
-
-
---------------------------------------------------------------------------------
-condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode
-
-condFltCode cond x y
- = condFltCode_sse2
- where
-
-
- -- in the SSE2 comparison ops (ucomiss, ucomisd) the left arg may be
- -- an operand, but the right must be a reg. We can probably do better
- -- than this general case...
- condFltCode_sse2 = do
- dflags <- getDynFlags
- (x_reg, x_code) <- getNonClobberedReg x
- (y_op, y_code) <- getOperand y
- let
- code = x_code `appOL`
- y_code `snocOL`
- CMP (floatFormat $ cmmExprWidth dflags x) y_op (OpReg x_reg)
- -- NB(1): we need to use the unsigned comparison operators on the
- -- result of this comparison.
- return (CondCode True (condToUnsigned cond) code)
-
--- -----------------------------------------------------------------------------
--- Generating assignments
-
--- Assignments are really at the heart of the whole code generation
--- business. Almost all top-level nodes of any real importance are
--- assignments, which correspond to loads, stores, or register
--- transfers. If we're really lucky, some of the register transfers
--- will go away, because we can use the destination register to
--- complete the code generation for the right hand side. This only
--- fails when the right hand side is forced into a fixed register
--- (e.g. the result of a call).
-
-assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
-assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
-
-assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
-assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
-
-
--- integer assignment to memory
-
--- specific case of adding/subtracting an integer to a particular address.
--- ToDo: catch other cases where we can use an operation directly on a memory
--- address.
-assignMem_IntCode pk addr (CmmMachOp op [CmmLoad addr2 _,
- CmmLit (CmmInt i _)])
- | addr == addr2, pk /= II64 || is32BitInteger i,
- Just instr <- check op
- = do Amode amode code_addr <- getAmode addr
- let code = code_addr `snocOL`
- instr pk (OpImm (ImmInt (fromIntegral i))) (OpAddr amode)
- return code
- where
- check (MO_Add _) = Just ADD
- check (MO_Sub _) = Just SUB
- check _ = Nothing
- -- ToDo: more?
-
--- general case
-assignMem_IntCode pk addr src = do
- is32Bit <- is32BitPlatform
- Amode addr code_addr <- getAmode addr
- (code_src, op_src) <- get_op_RI is32Bit src
- let
- code = code_src `appOL`
- code_addr `snocOL`
- MOV pk op_src (OpAddr addr)
- -- NOTE: op_src is stable, so it will still be valid
- -- after code_addr. This may involve the introduction
- -- of an extra MOV to a temporary register, but we hope
- -- the register allocator will get rid of it.
- --
- return code
- where
- get_op_RI :: Bool -> CmmExpr -> NatM (InstrBlock,Operand) -- code, operator
- get_op_RI is32Bit (CmmLit lit) | is32BitLit is32Bit lit
- = return (nilOL, OpImm (litToImm lit))
- get_op_RI _ op
- = do (reg,code) <- getNonClobberedReg op
- return (code, OpReg reg)
-
-
--- Assign; dst is a reg, rhs is mem
-assignReg_IntCode pk reg (CmmLoad src _) = do
- load_code <- intLoadCode (MOV pk) src
- dflags <- getDynFlags
- let platform = targetPlatform dflags
- return (load_code (getRegisterReg platform reg))
-
--- dst is a reg, but src could be anything
-assignReg_IntCode _ reg src = do
- dflags <- getDynFlags
- let platform = targetPlatform dflags
- code <- getAnyReg src
- return (code (getRegisterReg platform reg))
-
-
--- Floating point assignment to memory
-assignMem_FltCode pk addr src = do
- (src_reg, src_code) <- getNonClobberedReg src
- Amode addr addr_code <- getAmode addr
- let
- code = src_code `appOL`
- addr_code `snocOL`
- MOV pk (OpReg src_reg) (OpAddr addr)
-
- return code
-
--- Floating point assignment to a register/temporary
-assignReg_FltCode _ reg src = do
- src_code <- getAnyReg src
- dflags <- getDynFlags
- let platform = targetPlatform dflags
- return (src_code (getRegisterReg platform reg))
-
-
-genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock
-
-genJump (CmmLoad mem _) regs = do
- Amode target code <- getAmode mem
- return (code `snocOL` JMP (OpAddr target) regs)
-
-genJump (CmmLit lit) regs = do
- return (unitOL (JMP (OpImm (litToImm lit)) regs))
-
-genJump expr regs = do
- (reg,code) <- getSomeReg expr
- return (code `snocOL` JMP (OpReg reg) regs)
-
-
--- -----------------------------------------------------------------------------
--- Unconditional branches
-
-genBranch :: BlockId -> InstrBlock
-genBranch = toOL . mkJumpInstr
-
-
-
--- -----------------------------------------------------------------------------
--- Conditional jumps/branches
-
-{-
-Conditional jumps are always to local labels, so we can use branch
-instructions. We peek at the arguments to decide what kind of
-comparison to do.
-
-I386: First, we have to ensure that the condition
-codes are set according to the supplied comparison operation.
--}
-
-
-genCondBranch
- :: BlockId -- the source of the jump
- -> BlockId -- the true branch target
- -> BlockId -- the false branch target
- -> CmmExpr -- the condition on which to branch
- -> NatM InstrBlock -- Instructions
-
-genCondBranch bid id false expr = do
- is32Bit <- is32BitPlatform
- genCondBranch' is32Bit bid id false expr
-
--- | We return the instructions generated.
-genCondBranch' :: Bool -> BlockId -> BlockId -> BlockId -> CmmExpr
- -> NatM InstrBlock
-
--- 64-bit integer comparisons on 32-bit
-genCondBranch' is32Bit _bid true false (CmmMachOp mop [e1,e2])
- | is32Bit, Just W64 <- maybeIntComparison mop = do
- ChildCode64 code1 r1_lo <- iselExpr64 e1
- ChildCode64 code2 r2_lo <- iselExpr64 e2
- let r1_hi = getHiVRegFromLo r1_lo
- r2_hi = getHiVRegFromLo r2_lo
- cond = machOpToCond mop
- Just cond' = maybeFlipCond cond
- --TODO: Update CFG for x86
- let code = code1 `appOL` code2 `appOL` toOL [
- CMP II32 (OpReg r2_hi) (OpReg r1_hi),
- JXX cond true,
- JXX cond' false,
- CMP II32 (OpReg r2_lo) (OpReg r1_lo),
- JXX cond true] `appOL` genBranch false
- return code
-
-genCondBranch' _ bid id false bool = do
- CondCode is_float cond cond_code <- getCondCode bool
- use_sse2 <- sse2Enabled
- if not is_float || not use_sse2
- then
- return (cond_code `snocOL` JXX cond id `appOL` genBranch false)
- else do
- -- See Note [SSE Parity Checks]
- let jmpFalse = genBranch false
- code
- = case cond of
- NE -> or_unordered
- GU -> plain_test
- GEU -> plain_test
- -- Use ASSERT so we don't break releases if
- -- LTT/LE creep in somehow.
- LTT ->
- ASSERT2(False, ppr "Should have been turned into >")
- and_ordered
- LE ->
- ASSERT2(False, ppr "Should have been turned into >=")
- and_ordered
- _ -> and_ordered
-
- plain_test = unitOL (
- JXX cond id
- ) `appOL` jmpFalse
- or_unordered = toOL [
- JXX cond id,
- JXX PARITY id
- ] `appOL` jmpFalse
- and_ordered = toOL [
- JXX PARITY false,
- JXX cond id,
- JXX ALWAYS false
- ]
- updateCfgNat (\cfg -> adjustEdgeWeight cfg (+3) bid false)
- return (cond_code `appOL` code)
-
-{- Note [Introducing cfg edges inside basic blocks]
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
- During instruction selection a statement `s`
- in a block B with control of the sort: B -> C
- will sometimes result in control
- flow of the sort:
-
- ┌ < ┐
- v ^
- B -> B1 ┴ -> C
-
- as is the case for some atomic operations.
-
- Now to keep the CFG in sync when introducing B1 we clearly
- want to insert it between B and C. However there is
- a catch when we have to deal with self loops.
-
- We might start with code and a CFG of these forms:
-
- loop:
- stmt1 ┌ < ┐
- .... v ^
- stmtX loop ┘
- stmtY
- ....
- goto loop:
-
- Now we introduce B1:
- ┌ ─ ─ ─ ─ ─┐
- loop: │ ┌ < ┐ │
- instrs v │ │ ^
- .... loop ┴ B1 ┴ ┘
- instrsFromX
- stmtY
- goto loop:
-
- This is simple, all outgoing edges from loop now simply
- start from B1 instead and the code generator knows which
- new edges it introduced for the self loop of B1.
-
- Disaster strikes if the statement Y follows the same pattern.
- If we apply the same rule that all outgoing edges change then
- we end up with:
-
- loop ─> B1 ─> B2 ┬─┐
- │ │ └─<┤ │
- │ └───<───┘ │
- └───────<────────┘
-
- This is problematic. The edge B1->B1 is modified as expected.
- However the modification is wrong!
-
- The assembly in this case looked like this:
-
- _loop:
- <instrs>
- _B1:
- ...
- cmpxchgq ...
- jne _B1
- <instrs>
- <end _B1>
- _B2:
- ...
- cmpxchgq ...
- jne _B2
- <instrs>
- jmp loop
-
- There is no edge _B2 -> _B1 here. It's still a self loop onto _B1.
-
- The problem here is that really B1 should be two basic blocks.
- Otherwise we have control flow in the *middle* of a basic block.
- A contradiction!
-
- So to account for this we add yet another basic block marker:
-
- _B:
- <instrs>
- _B1:
- ...
- cmpxchgq ...
- jne _B1
- jmp _B1'
- _B1':
- <instrs>
- <end _B1>
- _B2:
- ...
-
- Now when inserting B2 we will only look at the outgoing edges of B1' and
- everything will work out nicely.
-
- You might also wonder why we don't insert jumps at the end of _B1'. There is
- no way another block ends up jumping to the labels _B1 or _B2 since they are
- essentially invisible to other blocks. View them as control flow labels local
- to the basic block if you'd like.
-
- Not doing this ultimately caused (part 2 of) #17334.
--}
-
-
--- -----------------------------------------------------------------------------
--- Generating C calls
-
--- Now the biggest nightmare---calls. Most of the nastiness is buried in
--- @get_arg@, which moves the arguments to the correct registers/stack
--- locations. Apart from that, the code is easy.
---
--- (If applicable) Do not fill the delay slots here; you will confuse the
--- register allocator.
---
--- See Note [Keeping track of the current block] for information why we need
--- to take/return a block id.
-
-genCCall
- :: DynFlags
- -> Bool -- 32 bit platform?
- -> ForeignTarget -- function to call
- -> [CmmFormal] -- where to put the result
- -> [CmmActual] -- arguments (of mixed type)
- -> BlockId -- The block we are in
- -> NatM (InstrBlock, Maybe BlockId)
-
--- First we deal with cases which might introduce new blocks in the stream.
-
-genCCall dflags is32Bit (PrimTarget (MO_AtomicRMW width amop))
- [dst] [addr, n] bid = do
- Amode amode addr_code <-
- if amop `elem` [AMO_Add, AMO_Sub]
- then getAmode addr
- else getSimpleAmode dflags is32Bit addr -- See genCCall for MO_Cmpxchg
- arg <- getNewRegNat format
- arg_code <- getAnyReg n
- let platform = targetPlatform dflags
- dst_r = getRegisterReg platform (CmmLocal dst)
- (code, lbl) <- op_code dst_r arg amode
- return (addr_code `appOL` arg_code arg `appOL` code, Just lbl)
- where
- -- Code for the operation
- op_code :: Reg -- Destination reg
- -> Reg -- Register containing argument
- -> AddrMode -- Address of location to mutate
- -> NatM (OrdList Instr,BlockId) -- TODO: Return Maybe BlockId
- op_code dst_r arg amode = case amop of
- -- In the common case where dst_r is a virtual register the
- -- final move should go away, because it's the last use of arg
- -- and the first use of dst_r.
- AMO_Add -> return $ (toOL [ LOCK (XADD format (OpReg arg) (OpAddr amode))
- , MOV format (OpReg arg) (OpReg dst_r)
- ], bid)
- AMO_Sub -> return $ (toOL [ NEGI format (OpReg arg)
- , LOCK (XADD format (OpReg arg) (OpAddr amode))
- , MOV format (OpReg arg) (OpReg dst_r)
- ], bid)
- -- In these cases we need a new block id, and have to return it so
- -- that later instruction selection can reference it.
- AMO_And -> cmpxchg_code (\ src dst -> unitOL $ AND format src dst)
- AMO_Nand -> cmpxchg_code (\ src dst -> toOL [ AND format src dst
- , NOT format dst
- ])
- AMO_Or -> cmpxchg_code (\ src dst -> unitOL $ OR format src dst)
- AMO_Xor -> cmpxchg_code (\ src dst -> unitOL $ XOR format src dst)
- where
- -- Simulate operation that lacks a dedicated instruction using
- -- cmpxchg.
- cmpxchg_code :: (Operand -> Operand -> OrdList Instr)
- -> NatM (OrdList Instr, BlockId)
- cmpxchg_code instrs = do
- lbl1 <- getBlockIdNat
- lbl2 <- getBlockIdNat
- tmp <- getNewRegNat format
-
- --Record inserted blocks
- -- We turn A -> B into A -> A' -> A'' -> B
- -- with a self loop on A'.
- addImmediateSuccessorNat bid lbl1
- addImmediateSuccessorNat lbl1 lbl2
- updateCfgNat (addWeightEdge lbl1 lbl1 0)
-
- return $ (toOL
- [ MOV format (OpAddr amode) (OpReg eax)
- , JXX ALWAYS lbl1
- , NEWBLOCK lbl1
- -- Keep old value so we can return it:
- , MOV format (OpReg eax) (OpReg dst_r)
- , MOV format (OpReg eax) (OpReg tmp)
- ]
- `appOL` instrs (OpReg arg) (OpReg tmp) `appOL` toOL
- [ LOCK (CMPXCHG format (OpReg tmp) (OpAddr amode))
- , JXX NE lbl1
- -- See Note [Introducing cfg edges inside basic blocks]
- -- why this basic block is required.
- , JXX ALWAYS lbl2
- , NEWBLOCK lbl2
- ],
- lbl2)
- format = intFormat width
-
-genCCall dflags is32Bit (PrimTarget (MO_Ctz width)) [dst] [src] bid
- | is32Bit, width == W64 = do
- ChildCode64 vcode rlo <- iselExpr64 src
- let rhi = getHiVRegFromLo rlo
- dst_r = getRegisterReg platform (CmmLocal dst)
- lbl1 <- getBlockIdNat
- lbl2 <- getBlockIdNat
- let format = if width == W8 then II16 else intFormat width
- tmp_r <- getNewRegNat format
-
- -- New CFG Edges:
- -- bid -> lbl2
- -- bid -> lbl1 -> lbl2
- -- We also changes edges originating at bid to start at lbl2 instead.
- updateCfgNat (addWeightEdge bid lbl1 110 .
- addWeightEdge lbl1 lbl2 110 .
- addImmediateSuccessor bid lbl2)
-
- -- The following instruction sequence corresponds to the pseudo-code
- --
- -- if (src) {
- -- dst = src.lo32 ? BSF(src.lo32) : (BSF(src.hi32) + 32);
- -- } else {
- -- dst = 64;
- -- }
- let !instrs = vcode `appOL` toOL
- ([ MOV II32 (OpReg rhi) (OpReg tmp_r)
- , OR II32 (OpReg rlo) (OpReg tmp_r)
- , MOV II32 (OpImm (ImmInt 64)) (OpReg dst_r)
- , JXX EQQ lbl2
- , JXX ALWAYS lbl1
-
- , NEWBLOCK lbl1
- , BSF II32 (OpReg rhi) dst_r
- , ADD II32 (OpImm (ImmInt 32)) (OpReg dst_r)
- , BSF II32 (OpReg rlo) tmp_r
- , CMOV NE II32 (OpReg tmp_r) dst_r
- , JXX ALWAYS lbl2
-
- , NEWBLOCK lbl2
- ])
- return (instrs, Just lbl2)
-
- | otherwise = do
- code_src <- getAnyReg src
- let dst_r = getRegisterReg platform (CmmLocal dst)
-
- if isBmi2Enabled dflags
- then do
- src_r <- getNewRegNat (intFormat width)
- let instrs = appOL (code_src src_r) $ case width of
- W8 -> toOL
- [ OR II32 (OpImm (ImmInteger 0xFFFFFF00)) (OpReg src_r)
- , TZCNT II32 (OpReg src_r) dst_r
- ]
- W16 -> toOL
- [ TZCNT II16 (OpReg src_r) dst_r
- , MOVZxL II16 (OpReg dst_r) (OpReg dst_r)
- ]
- _ -> unitOL $ TZCNT (intFormat width) (OpReg src_r) dst_r
- return (instrs, Nothing)
- else do
- -- The following insn sequence makes sure 'ctz 0' has a defined value.
- -- starting with Haswell, one could use the TZCNT insn instead.
- let format = if width == W8 then II16 else intFormat width
- src_r <- getNewRegNat format
- tmp_r <- getNewRegNat format
- let !instrs = code_src src_r `appOL` toOL
- ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++
- [ BSF format (OpReg src_r) tmp_r
- , MOV II32 (OpImm (ImmInt bw)) (OpReg dst_r)
- , CMOV NE format (OpReg tmp_r) dst_r
- ]) -- NB: We don't need to zero-extend the result for the
- -- W8/W16 cases because the 'MOV' insn already
- -- took care of implicitly clearing the upper bits
- return (instrs, Nothing)
- where
- bw = widthInBits width
- platform = targetPlatform dflags
-
-genCCall dflags bits mop dst args bid = do
- instr <- genCCall' dflags bits mop dst args bid
- return (instr, Nothing)
-
--- genCCall' handles cases not introducing new code blocks.
-genCCall'
- :: DynFlags
- -> Bool -- 32 bit platform?
- -> ForeignTarget -- function to call
- -> [CmmFormal] -- where to put the result
- -> [CmmActual] -- arguments (of mixed type)
- -> BlockId -- The block we are in
- -> NatM InstrBlock
-
--- Unroll memcpy calls if the number of bytes to copy isn't too
--- large. Otherwise, call C's memcpy.
-genCCall' dflags _ (PrimTarget (MO_Memcpy align)) _
- [dst, src, CmmLit (CmmInt n _)] _
- | fromInteger insns <= maxInlineMemcpyInsns dflags = do
- code_dst <- getAnyReg dst
- dst_r <- getNewRegNat format
- code_src <- getAnyReg src
- src_r <- getNewRegNat format
- tmp_r <- getNewRegNat format
- return $ code_dst dst_r `appOL` code_src src_r `appOL`
- go dst_r src_r tmp_r (fromInteger n)
- where
- -- The number of instructions we will generate (approx). We need 2
- -- instructions per move.
- insns = 2 * ((n + sizeBytes - 1) `div` sizeBytes)
-
- maxAlignment = wordAlignment dflags -- only machine word wide MOVs are supported
- effectiveAlignment = min (alignmentOf align) maxAlignment
- format = intFormat . widthFromBytes $ alignmentBytes effectiveAlignment
-
- -- The size of each move, in bytes.
- sizeBytes :: Integer
- sizeBytes = fromIntegral (formatInBytes format)
-
- go :: Reg -> Reg -> Reg -> Integer -> OrdList Instr
- go dst src tmp i
- | i >= sizeBytes =
- unitOL (MOV format (OpAddr src_addr) (OpReg tmp)) `appOL`
- unitOL (MOV format (OpReg tmp) (OpAddr dst_addr)) `appOL`
- go dst src tmp (i - sizeBytes)
- -- Deal with remaining bytes.
- | i >= 4 = -- Will never happen on 32-bit
- unitOL (MOV II32 (OpAddr src_addr) (OpReg tmp)) `appOL`
- unitOL (MOV II32 (OpReg tmp) (OpAddr dst_addr)) `appOL`
- go dst src tmp (i - 4)
- | i >= 2 =
- unitOL (MOVZxL II16 (OpAddr src_addr) (OpReg tmp)) `appOL`
- unitOL (MOV II16 (OpReg tmp) (OpAddr dst_addr)) `appOL`
- go dst src tmp (i - 2)
- | i >= 1 =
- unitOL (MOVZxL II8 (OpAddr src_addr) (OpReg tmp)) `appOL`
- unitOL (MOV II8 (OpReg tmp) (OpAddr dst_addr)) `appOL`
- go dst src tmp (i - 1)
- | otherwise = nilOL
- where
- src_addr = AddrBaseIndex (EABaseReg src) EAIndexNone
- (ImmInteger (n - i))
- dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone
- (ImmInteger (n - i))
-
-genCCall' dflags _ (PrimTarget (MO_Memset align)) _
- [dst,
- CmmLit (CmmInt c _),
- CmmLit (CmmInt n _)]
- _
- | fromInteger insns <= maxInlineMemsetInsns dflags = do
- code_dst <- getAnyReg dst
- dst_r <- getNewRegNat format
- if format == II64 && n >= 8 then do
- code_imm8byte <- getAnyReg (CmmLit (CmmInt c8 W64))
- imm8byte_r <- getNewRegNat II64
- return $ code_dst dst_r `appOL`
- code_imm8byte imm8byte_r `appOL`
- go8 dst_r imm8byte_r (fromInteger n)
- else
- return $ code_dst dst_r `appOL`
- go4 dst_r (fromInteger n)
- where
- maxAlignment = wordAlignment dflags -- only machine word wide MOVs are supported
- effectiveAlignment = min (alignmentOf align) maxAlignment
- format = intFormat . widthFromBytes $ alignmentBytes effectiveAlignment
- c2 = c `shiftL` 8 .|. c
- c4 = c2 `shiftL` 16 .|. c2
- c8 = c4 `shiftL` 32 .|. c4
-
- -- The number of instructions we will generate (approx). We need 1
- -- instructions per move.
- insns = (n + sizeBytes - 1) `div` sizeBytes
-
- -- The size of each move, in bytes.
- sizeBytes :: Integer
- sizeBytes = fromIntegral (formatInBytes format)
-
- -- Depending on size returns the widest MOV instruction and its
- -- width.
- gen4 :: AddrMode -> Integer -> (InstrBlock, Integer)
- gen4 addr size
- | size >= 4 =
- (unitOL (MOV II32 (OpImm (ImmInteger c4)) (OpAddr addr)), 4)
- | size >= 2 =
- (unitOL (MOV II16 (OpImm (ImmInteger c2)) (OpAddr addr)), 2)
- | size >= 1 =
- (unitOL (MOV II8 (OpImm (ImmInteger c)) (OpAddr addr)), 1)
- | otherwise = (nilOL, 0)
-
- -- Generates a 64-bit wide MOV instruction from REG to MEM.
- gen8 :: AddrMode -> Reg -> InstrBlock
- gen8 addr reg8byte =
- unitOL (MOV format (OpReg reg8byte) (OpAddr addr))
-
- -- Unrolls memset when the widest MOV is <= 4 bytes.
- go4 :: Reg -> Integer -> InstrBlock
- go4 dst left =
- if left <= 0 then nilOL
- else curMov `appOL` go4 dst (left - curWidth)
- where
- possibleWidth = minimum [left, sizeBytes]
- dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone (ImmInteger (n - left))
- (curMov, curWidth) = gen4 dst_addr possibleWidth
-
- -- Unrolls memset when the widest MOV is 8 bytes (thus another Reg
- -- argument). Falls back to go4 when all 8 byte moves are
- -- exhausted.
- go8 :: Reg -> Reg -> Integer -> InstrBlock
- go8 dst reg8byte left =
- if possibleWidth >= 8 then
- let curMov = gen8 dst_addr reg8byte
- in curMov `appOL` go8 dst reg8byte (left - 8)
- else go4 dst left
- where
- possibleWidth = minimum [left, sizeBytes]
- dst_addr = AddrBaseIndex (EABaseReg dst) EAIndexNone (ImmInteger (n - left))
-
-genCCall' _ _ (PrimTarget MO_ReadBarrier) _ _ _ = return nilOL
-genCCall' _ _ (PrimTarget MO_WriteBarrier) _ _ _ = return nilOL
- -- barriers compile to no code on x86/x86-64;
- -- we keep it this long in order to prevent earlier optimisations.
-
-genCCall' _ _ (PrimTarget MO_Touch) _ _ _ = return nilOL
-
-genCCall' _ is32bit (PrimTarget (MO_Prefetch_Data n )) _ [src] _ =
- case n of
- 0 -> genPrefetch src $ PREFETCH NTA format
- 1 -> genPrefetch src $ PREFETCH Lvl2 format
- 2 -> genPrefetch src $ PREFETCH Lvl1 format
- 3 -> genPrefetch src $ PREFETCH Lvl0 format
- l -> panic $ "unexpected prefetch level in genCCall MO_Prefetch_Data: " ++ (show l)
- -- the c / llvm prefetch convention is 0, 1, 2, and 3
- -- the x86 corresponding names are : NTA, 2 , 1, and 0
- where
- format = archWordFormat is32bit
- -- need to know what register width for pointers!
- genPrefetch inRegSrc prefetchCTor =
- do
- code_src <- getAnyReg inRegSrc
- src_r <- getNewRegNat format
- return $ code_src src_r `appOL`
- (unitOL (prefetchCTor (OpAddr
- ((AddrBaseIndex (EABaseReg src_r ) EAIndexNone (ImmInt 0)))) ))
- -- prefetch always takes an address
-
-genCCall' dflags is32Bit (PrimTarget (MO_BSwap width)) [dst] [src] _ = do
- let platform = targetPlatform dflags
- let dst_r = getRegisterReg platform (CmmLocal dst)
- case width of
- W64 | is32Bit -> do
- ChildCode64 vcode rlo <- iselExpr64 src
- let dst_rhi = getHiVRegFromLo dst_r
- rhi = getHiVRegFromLo rlo
- return $ vcode `appOL`
- toOL [ MOV II32 (OpReg rlo) (OpReg dst_rhi),
- MOV II32 (OpReg rhi) (OpReg dst_r),
- BSWAP II32 dst_rhi,
- BSWAP II32 dst_r ]
- W16 -> do code_src <- getAnyReg src
- return $ code_src dst_r `appOL`
- unitOL (BSWAP II32 dst_r) `appOL`
- unitOL (SHR II32 (OpImm $ ImmInt 16) (OpReg dst_r))
- _ -> do code_src <- getAnyReg src
- return $ code_src dst_r `appOL` unitOL (BSWAP format dst_r)
- where
- format = intFormat width
-
-genCCall' dflags is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst]
- args@[src] bid = do
- sse4_2 <- sse4_2Enabled
- let platform = targetPlatform dflags
- if sse4_2
- then do code_src <- getAnyReg src
- src_r <- getNewRegNat format
- let dst_r = getRegisterReg platform (CmmLocal dst)
- return $ code_src src_r `appOL`
- (if width == W8 then
- -- The POPCNT instruction doesn't take a r/m8
- unitOL (MOVZxL II8 (OpReg src_r) (OpReg src_r)) `appOL`
- unitOL (POPCNT II16 (OpReg src_r) dst_r)
- else
- unitOL (POPCNT format (OpReg src_r) dst_r)) `appOL`
- (if width == W8 || width == W16 then
- -- We used a 16-bit destination register above,
- -- so zero-extend
- unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r))
- else nilOL)
- else do
- targetExpr <- cmmMakeDynamicReference dflags
- CallReference lbl
- let target = ForeignTarget targetExpr (ForeignConvention CCallConv
- [NoHint] [NoHint]
- CmmMayReturn)
- genCCall' dflags is32Bit target dest_regs args bid
- where
- format = intFormat width
- lbl = mkCmmCodeLabel primUnitId (fsLit (popCntLabel width))
-
-genCCall' dflags is32Bit (PrimTarget (MO_Pdep width)) dest_regs@[dst]
- args@[src, mask] bid = do
- let platform = targetPlatform dflags
- if isBmi2Enabled dflags
- then do code_src <- getAnyReg src
- code_mask <- getAnyReg mask
- src_r <- getNewRegNat format
- mask_r <- getNewRegNat format
- let dst_r = getRegisterReg platform (CmmLocal dst)
- return $ code_src src_r `appOL` code_mask mask_r `appOL`
- (if width == W8 then
- -- The PDEP instruction doesn't take a r/m8
- unitOL (MOVZxL II8 (OpReg src_r ) (OpReg src_r )) `appOL`
- unitOL (MOVZxL II8 (OpReg mask_r) (OpReg mask_r)) `appOL`
- unitOL (PDEP II16 (OpReg mask_r) (OpReg src_r ) dst_r)
- else
- unitOL (PDEP format (OpReg mask_r) (OpReg src_r) dst_r)) `appOL`
- (if width == W8 || width == W16 then
- -- We used a 16-bit destination register above,
- -- so zero-extend
- unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r))
- else nilOL)
- else do
- targetExpr <- cmmMakeDynamicReference dflags
- CallReference lbl
- let target = ForeignTarget targetExpr (ForeignConvention CCallConv
- [NoHint] [NoHint]
- CmmMayReturn)
- genCCall' dflags is32Bit target dest_regs args bid
- where
- format = intFormat width
- lbl = mkCmmCodeLabel primUnitId (fsLit (pdepLabel width))
-
-genCCall' dflags is32Bit (PrimTarget (MO_Pext width)) dest_regs@[dst]
- args@[src, mask] bid = do
- let platform = targetPlatform dflags
- if isBmi2Enabled dflags
- then do code_src <- getAnyReg src
- code_mask <- getAnyReg mask
- src_r <- getNewRegNat format
- mask_r <- getNewRegNat format
- let dst_r = getRegisterReg platform (CmmLocal dst)
- return $ code_src src_r `appOL` code_mask mask_r `appOL`
- (if width == W8 then
- -- The PEXT instruction doesn't take a r/m8
- unitOL (MOVZxL II8 (OpReg src_r ) (OpReg src_r )) `appOL`
- unitOL (MOVZxL II8 (OpReg mask_r) (OpReg mask_r)) `appOL`
- unitOL (PEXT II16 (OpReg mask_r) (OpReg src_r) dst_r)
- else
- unitOL (PEXT format (OpReg mask_r) (OpReg src_r) dst_r)) `appOL`
- (if width == W8 || width == W16 then
- -- We used a 16-bit destination register above,
- -- so zero-extend
- unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r))
- else nilOL)
- else do
- targetExpr <- cmmMakeDynamicReference dflags
- CallReference lbl
- let target = ForeignTarget targetExpr (ForeignConvention CCallConv
- [NoHint] [NoHint]
- CmmMayReturn)
- genCCall' dflags is32Bit target dest_regs args bid
- where
- format = intFormat width
- lbl = mkCmmCodeLabel primUnitId (fsLit (pextLabel width))
-
-genCCall' dflags is32Bit (PrimTarget (MO_Clz width)) dest_regs@[dst] args@[src] bid
- | is32Bit && width == W64 = do
- -- Fallback to `hs_clz64` on i386
- targetExpr <- cmmMakeDynamicReference dflags CallReference lbl
- let target = ForeignTarget targetExpr (ForeignConvention CCallConv
- [NoHint] [NoHint]
- CmmMayReturn)
- genCCall' dflags is32Bit target dest_regs args bid
-
- | otherwise = do
- code_src <- getAnyReg src
- let dst_r = getRegisterReg platform (CmmLocal dst)
- if isBmi2Enabled dflags
- then do
- src_r <- getNewRegNat (intFormat width)
- return $ appOL (code_src src_r) $ case width of
- W8 -> toOL
- [ MOVZxL II8 (OpReg src_r) (OpReg src_r) -- zero-extend to 32 bit
- , LZCNT II32 (OpReg src_r) dst_r -- lzcnt with extra 24 zeros
- , SUB II32 (OpImm (ImmInt 24)) (OpReg dst_r) -- compensate for extra zeros
- ]
- W16 -> toOL
- [ LZCNT II16 (OpReg src_r) dst_r
- , MOVZxL II16 (OpReg dst_r) (OpReg dst_r) -- zero-extend from 16 bit
- ]
- _ -> unitOL (LZCNT (intFormat width) (OpReg src_r) dst_r)
- else do
- let format = if width == W8 then II16 else intFormat width
- src_r <- getNewRegNat format
- tmp_r <- getNewRegNat format
- return $ code_src src_r `appOL` toOL
- ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++
- [ BSR format (OpReg src_r) tmp_r
- , MOV II32 (OpImm (ImmInt (2*bw-1))) (OpReg dst_r)
- , CMOV NE format (OpReg tmp_r) dst_r
- , XOR format (OpImm (ImmInt (bw-1))) (OpReg dst_r)
- ]) -- NB: We don't need to zero-extend the result for the
- -- W8/W16 cases because the 'MOV' insn already
- -- took care of implicitly clearing the upper bits
- where
- bw = widthInBits width
- platform = targetPlatform dflags
- lbl = mkCmmCodeLabel primUnitId (fsLit (clzLabel width))
-
-genCCall' dflags is32Bit (PrimTarget (MO_UF_Conv width)) dest_regs args bid = do
- targetExpr <- cmmMakeDynamicReference dflags
- CallReference lbl
- let target = ForeignTarget targetExpr (ForeignConvention CCallConv
- [NoHint] [NoHint]
- CmmMayReturn)
- genCCall' dflags is32Bit target dest_regs args bid
- where
- lbl = mkCmmCodeLabel primUnitId (fsLit (word2FloatLabel width))
-
-genCCall' dflags _ (PrimTarget (MO_AtomicRead width)) [dst] [addr] _ = do
- load_code <- intLoadCode (MOV (intFormat width)) addr
- let platform = targetPlatform dflags
-
- return (load_code (getRegisterReg platform (CmmLocal dst)))
-
-genCCall' _ _ (PrimTarget (MO_AtomicWrite width)) [] [addr, val] _ = do
- code <- assignMem_IntCode (intFormat width) addr val
- return $ code `snocOL` MFENCE
-
-genCCall' dflags is32Bit (PrimTarget (MO_Cmpxchg width)) [dst] [addr, old, new] _ = do
- -- On x86 we don't have enough registers to use cmpxchg with a
- -- complicated addressing mode, so on that architecture we
- -- pre-compute the address first.
- Amode amode addr_code <- getSimpleAmode dflags is32Bit addr
- newval <- getNewRegNat format
- newval_code <- getAnyReg new
- oldval <- getNewRegNat format
- oldval_code <- getAnyReg old
- let platform = targetPlatform dflags
- dst_r = getRegisterReg platform (CmmLocal dst)
- code = toOL
- [ MOV format (OpReg oldval) (OpReg eax)
- , LOCK (CMPXCHG format (OpReg newval) (OpAddr amode))
- , MOV format (OpReg eax) (OpReg dst_r)
- ]
- return $ addr_code `appOL` newval_code newval `appOL` oldval_code oldval
- `appOL` code
- where
- format = intFormat width
-
-genCCall' _ is32Bit target dest_regs args bid = do
- dflags <- getDynFlags
- let platform = targetPlatform dflags
- case (target, dest_regs) of
- -- void return type prim op
- (PrimTarget op, []) ->
- outOfLineCmmOp bid op Nothing args
- -- we only cope with a single result for foreign calls
- (PrimTarget op, [r]) -> case op of
- MO_F32_Fabs -> case args of
- [x] -> sse2FabsCode W32 x
- _ -> panic "genCCall: Wrong number of arguments for fabs"
- MO_F64_Fabs -> case args of
- [x] -> sse2FabsCode W64 x
- _ -> panic "genCCall: Wrong number of arguments for fabs"
-
- MO_F32_Sqrt -> actuallyInlineSSE2Op (\fmt r -> SQRT fmt (OpReg r)) FF32 args
- MO_F64_Sqrt -> actuallyInlineSSE2Op (\fmt r -> SQRT fmt (OpReg r)) FF64 args
- _other_op -> outOfLineCmmOp bid op (Just r) args
-
- where
- actuallyInlineSSE2Op = actuallyInlineFloatOp'
-
- actuallyInlineFloatOp' instr format [x]
- = do res <- trivialUFCode format (instr format) x
- any <- anyReg res
- return (any (getRegisterReg platform (CmmLocal r)))
-
- actuallyInlineFloatOp' _ _ args
- = panic $ "genCCall.actuallyInlineFloatOp': bad number of arguments! ("
- ++ show (length args) ++ ")"
-
- sse2FabsCode :: Width -> CmmExpr -> NatM InstrBlock
- sse2FabsCode w x = do
- let fmt = floatFormat w
- x_code <- getAnyReg x
- let
- const | FF32 <- fmt = CmmInt 0x7fffffff W32
- | otherwise = CmmInt 0x7fffffffffffffff W64
- Amode amode amode_code <- memConstant (mkAlignment $ widthInBytes w) const
- tmp <- getNewRegNat fmt
- let
- code dst = x_code dst `appOL` amode_code `appOL` toOL [
- MOV fmt (OpAddr amode) (OpReg tmp),
- AND fmt (OpReg tmp) (OpReg dst)
- ]
-
- return $ code (getRegisterReg platform (CmmLocal r))
-
- (PrimTarget (MO_S_QuotRem width), _) -> divOp1 platform True width dest_regs args
- (PrimTarget (MO_U_QuotRem width), _) -> divOp1 platform False width dest_regs args
- (PrimTarget (MO_U_QuotRem2 width), _) -> divOp2 platform False width dest_regs args
- (PrimTarget (MO_Add2 width), [res_h, res_l]) ->
- case args of
- [arg_x, arg_y] ->
- do hCode <- getAnyReg (CmmLit (CmmInt 0 width))
- let format = intFormat width
- lCode <- anyReg =<< trivialCode width (ADD_CC format)
- (Just (ADD_CC format)) arg_x arg_y
- let reg_l = getRegisterReg platform (CmmLocal res_l)
- reg_h = getRegisterReg platform (CmmLocal res_h)
- code = hCode reg_h `appOL`
- lCode reg_l `snocOL`
- ADC format (OpImm (ImmInteger 0)) (OpReg reg_h)
- return code
- _ -> panic "genCCall: Wrong number of arguments/results for add2"
- (PrimTarget (MO_AddWordC width), [res_r, res_c]) ->
- addSubIntC platform ADD_CC (const Nothing) CARRY width res_r res_c args
- (PrimTarget (MO_SubWordC width), [res_r, res_c]) ->
- addSubIntC platform SUB_CC (const Nothing) CARRY width res_r res_c args
- (PrimTarget (MO_AddIntC width), [res_r, res_c]) ->
- addSubIntC platform ADD_CC (Just . ADD_CC) OFLO width res_r res_c args
- (PrimTarget (MO_SubIntC width), [res_r, res_c]) ->
- addSubIntC platform SUB_CC (const Nothing) OFLO width res_r res_c args
- (PrimTarget (MO_U_Mul2 width), [res_h, res_l]) ->
- case args of
- [arg_x, arg_y] ->
- do (y_reg, y_code) <- getRegOrMem arg_y
- x_code <- getAnyReg arg_x
- let format = intFormat width
- reg_h = getRegisterReg platform (CmmLocal res_h)
- reg_l = getRegisterReg platform (CmmLocal res_l)
- code = y_code `appOL`
- x_code rax `appOL`
- toOL [MUL2 format y_reg,
- MOV format (OpReg rdx) (OpReg reg_h),
- MOV format (OpReg rax) (OpReg reg_l)]
- return code
- _ -> panic "genCCall: Wrong number of arguments/results for mul2"
- (PrimTarget (MO_S_Mul2 width), [res_c, res_h, res_l]) ->
- case args of
- [arg_x, arg_y] ->
- do (y_reg, y_code) <- getRegOrMem arg_y
- x_code <- getAnyReg arg_x
- reg_tmp <- getNewRegNat II8
- let format = intFormat width
- reg_h = getRegisterReg platform (CmmLocal res_h)
- reg_l = getRegisterReg platform (CmmLocal res_l)
- reg_c = getRegisterReg platform (CmmLocal res_c)
- code = y_code `appOL`
- x_code rax `appOL`
- toOL [ IMUL2 format y_reg
- , MOV format (OpReg rdx) (OpReg reg_h)
- , MOV format (OpReg rax) (OpReg reg_l)
- , SETCC CARRY (OpReg reg_tmp)
- , MOVZxL II8 (OpReg reg_tmp) (OpReg reg_c)
- ]
- return code
- _ -> panic "genCCall: Wrong number of arguments/results for imul2"
-
- _ -> if is32Bit
- then genCCall32' dflags target dest_regs args
- else genCCall64' dflags target dest_regs args
-
- where divOp1 platform signed width results [arg_x, arg_y]
- = divOp platform signed width results Nothing arg_x arg_y
- divOp1 _ _ _ _ _
- = panic "genCCall: Wrong number of arguments for divOp1"
- divOp2 platform signed width results [arg_x_high, arg_x_low, arg_y]
- = divOp platform signed width results (Just arg_x_high) arg_x_low arg_y
- divOp2 _ _ _ _ _
- = panic "genCCall: Wrong number of arguments for divOp2"
-
- -- See Note [DIV/IDIV for bytes]
- divOp platform signed W8 [res_q, res_r] m_arg_x_high arg_x_low arg_y =
- let widen | signed = MO_SS_Conv W8 W16
- | otherwise = MO_UU_Conv W8 W16
- arg_x_low_16 = CmmMachOp widen [arg_x_low]
- arg_y_16 = CmmMachOp widen [arg_y]
- m_arg_x_high_16 = (\p -> CmmMachOp widen [p]) <$> m_arg_x_high
- in divOp
- platform signed W16 [res_q, res_r]
- m_arg_x_high_16 arg_x_low_16 arg_y_16
-
- divOp platform signed width [res_q, res_r]
- m_arg_x_high arg_x_low arg_y
- = do let format = intFormat width
- reg_q = getRegisterReg platform (CmmLocal res_q)
- reg_r = getRegisterReg platform (CmmLocal res_r)
- widen | signed = CLTD format
- | otherwise = XOR format (OpReg rdx) (OpReg rdx)
- instr | signed = IDIV
- | otherwise = DIV
- (y_reg, y_code) <- getRegOrMem arg_y
- x_low_code <- getAnyReg arg_x_low
- x_high_code <- case m_arg_x_high of
- Just arg_x_high ->
- getAnyReg arg_x_high
- Nothing ->
- return $ const $ unitOL widen
- return $ y_code `appOL`
- x_low_code rax `appOL`
- x_high_code rdx `appOL`
- toOL [instr format y_reg,
- MOV format (OpReg rax) (OpReg reg_q),
- MOV format (OpReg rdx) (OpReg reg_r)]
- divOp _ _ _ _ _ _ _
- = panic "genCCall: Wrong number of results for divOp"
-
- addSubIntC platform instr mrevinstr cond width
- res_r res_c [arg_x, arg_y]
- = do let format = intFormat width
- rCode <- anyReg =<< trivialCode width (instr format)
- (mrevinstr format) arg_x arg_y
- reg_tmp <- getNewRegNat II8
- let reg_c = getRegisterReg platform (CmmLocal res_c)
- reg_r = getRegisterReg platform (CmmLocal res_r)
- code = rCode reg_r `snocOL`
- SETCC cond (OpReg reg_tmp) `snocOL`
- MOVZxL II8 (OpReg reg_tmp) (OpReg reg_c)
-
- return code
- addSubIntC _ _ _ _ _ _ _ _
- = panic "genCCall: Wrong number of arguments/results for addSubIntC"
-
--- Note [DIV/IDIV for bytes]
---
--- IDIV reminder:
--- Size Dividend Divisor Quotient Remainder
--- byte %ax r/m8 %al %ah
--- word %dx:%ax r/m16 %ax %dx
--- dword %edx:%eax r/m32 %eax %edx
--- qword %rdx:%rax r/m64 %rax %rdx
---
--- We do a special case for the byte division because the current
--- codegen doesn't deal well with accessing %ah register (also,
--- accessing %ah in 64-bit mode is complicated because it cannot be an
--- operand of many instructions). So we just widen operands to 16 bits
--- and get the results from %al, %dl. This is not optimal, but a few
--- register moves are probably not a huge deal when doing division.
-
-genCCall32' :: DynFlags
- -> ForeignTarget -- function to call
- -> [CmmFormal] -- where to put the result
- -> [CmmActual] -- arguments (of mixed type)
- -> NatM InstrBlock
-genCCall32' dflags target dest_regs args = do
- let
- prom_args = map (maybePromoteCArg dflags W32) args
-
- -- Align stack to 16n for calls, assuming a starting stack
- -- alignment of 16n - word_size on procedure entry. Which we
- -- maintiain. See Note [rts/StgCRun.c : Stack Alignment on X86]
- sizes = map (arg_size_bytes . cmmExprType dflags) (reverse args)
- raw_arg_size = sum sizes + wORD_SIZE dflags
- arg_pad_size = (roundTo 16 $ raw_arg_size) - raw_arg_size
- tot_arg_size = raw_arg_size + arg_pad_size - wORD_SIZE dflags
- delta0 <- getDeltaNat
- setDeltaNat (delta0 - arg_pad_size)
-
- push_codes <- mapM push_arg (reverse prom_args)
- delta <- getDeltaNat
- MASSERT(delta == delta0 - tot_arg_size)
-
- -- deal with static vs dynamic call targets
- (callinsns,cconv) <-
- case target of
- ForeignTarget (CmmLit (CmmLabel lbl)) conv
- -> -- ToDo: stdcall arg sizes
- return (unitOL (CALL (Left fn_imm) []), conv)
- where fn_imm = ImmCLbl lbl
- ForeignTarget expr conv
- -> do { (dyn_r, dyn_c) <- getSomeReg expr
- ; ASSERT( isWord32 (cmmExprType dflags expr) )
- return (dyn_c `snocOL` CALL (Right dyn_r) [], conv) }
- PrimTarget _
- -> panic $ "genCCall: Can't handle PrimTarget call type here, error "
- ++ "probably because too many return values."
-
- let push_code
- | arg_pad_size /= 0
- = toOL [SUB II32 (OpImm (ImmInt arg_pad_size)) (OpReg esp),
- DELTA (delta0 - arg_pad_size)]
- `appOL` concatOL push_codes
- | otherwise
- = concatOL push_codes
-
- -- Deallocate parameters after call for ccall;
- -- but not for stdcall (callee does it)
- --
- -- We have to pop any stack padding we added
- -- even if we are doing stdcall, though (#5052)
- pop_size
- | ForeignConvention StdCallConv _ _ _ <- cconv = arg_pad_size
- | otherwise = tot_arg_size
-
- call = callinsns `appOL`
- toOL (
- (if pop_size==0 then [] else
- [ADD II32 (OpImm (ImmInt pop_size)) (OpReg esp)])
- ++
- [DELTA delta0]
- )
- setDeltaNat delta0
-
- dflags <- getDynFlags
- let platform = targetPlatform dflags
-
- let
- -- assign the results, if necessary
- assign_code [] = nilOL
- assign_code [dest]
- | isFloatType ty =
- -- we assume SSE2
- let tmp_amode = AddrBaseIndex (EABaseReg esp)
- EAIndexNone
- (ImmInt 0)
- fmt = floatFormat w
- in toOL [ SUB II32 (OpImm (ImmInt b)) (OpReg esp),
- DELTA (delta0 - b),
- X87Store fmt tmp_amode,
- -- X87Store only supported for the CDECL ABI
- -- NB: This code will need to be
- -- revisted once GHC does more work around
- -- SIGFPE f
- MOV fmt (OpAddr tmp_amode) (OpReg r_dest),
- ADD II32 (OpImm (ImmInt b)) (OpReg esp),
- DELTA delta0]
- | isWord64 ty = toOL [MOV II32 (OpReg eax) (OpReg r_dest),
- MOV II32 (OpReg edx) (OpReg r_dest_hi)]
- | otherwise = unitOL (MOV (intFormat w)
- (OpReg eax)
- (OpReg r_dest))
- where
- ty = localRegType dest
- w = typeWidth ty
- b = widthInBytes w
- r_dest_hi = getHiVRegFromLo r_dest
- r_dest = getRegisterReg platform (CmmLocal dest)
- assign_code many = pprPanic "genCCall.assign_code - too many return values:" (ppr many)
-
- return (push_code `appOL`
- call `appOL`
- assign_code dest_regs)
-
- where
- -- If the size is smaller than the word, we widen things (see maybePromoteCArg)
- arg_size_bytes :: CmmType -> Int
- arg_size_bytes ty = max (widthInBytes (typeWidth ty)) (widthInBytes (wordWidth dflags))
-
- roundTo a x | x `mod` a == 0 = x
- | otherwise = x + a - (x `mod` a)
-
- push_arg :: CmmActual {-current argument-}
- -> NatM InstrBlock -- code
-
- push_arg arg -- we don't need the hints on x86
- | isWord64 arg_ty = do
- ChildCode64 code r_lo <- iselExpr64 arg
- delta <- getDeltaNat
- setDeltaNat (delta - 8)
- let r_hi = getHiVRegFromLo r_lo
- return ( code `appOL`
- toOL [PUSH II32 (OpReg r_hi), DELTA (delta - 4),
- PUSH II32 (OpReg r_lo), DELTA (delta - 8),
- DELTA (delta-8)]
- )
-
- | isFloatType arg_ty = do
- (reg, code) <- getSomeReg arg
- delta <- getDeltaNat
- setDeltaNat (delta-size)
- return (code `appOL`
- toOL [SUB II32 (OpImm (ImmInt size)) (OpReg esp),
- DELTA (delta-size),
- let addr = AddrBaseIndex (EABaseReg esp)
- EAIndexNone
- (ImmInt 0)
- format = floatFormat (typeWidth arg_ty)
- in
-
- -- assume SSE2
- MOV format (OpReg reg) (OpAddr addr)
-
- ]
- )
-
- | otherwise = do
- -- Arguments can be smaller than 32-bit, but we still use @PUSH
- -- II32@ - the usual calling conventions expect integers to be
- -- 4-byte aligned.
- ASSERT((typeWidth arg_ty) <= W32) return ()
- (operand, code) <- getOperand arg
- delta <- getDeltaNat
- setDeltaNat (delta-size)
- return (code `snocOL`
- PUSH II32 operand `snocOL`
- DELTA (delta-size))
-
- where
- arg_ty = cmmExprType dflags arg
- size = arg_size_bytes arg_ty -- Byte size
-
-genCCall64' :: DynFlags
- -> ForeignTarget -- function to call
- -> [CmmFormal] -- where to put the result
- -> [CmmActual] -- arguments (of mixed type)
- -> NatM InstrBlock
-genCCall64' dflags target dest_regs args = do
- -- load up the register arguments
- let prom_args = map (maybePromoteCArg dflags W32) args
-
- (stack_args, int_regs_used, fp_regs_used, load_args_code, assign_args_code)
- <-
- if platformOS platform == OSMinGW32
- then load_args_win prom_args [] [] (allArgRegs platform) nilOL
- else do
- (stack_args, aregs, fregs, load_args_code, assign_args_code)
- <- load_args prom_args (allIntArgRegs platform)
- (allFPArgRegs platform)
- nilOL nilOL
- let used_regs rs as = reverse (drop (length rs) (reverse as))
- fregs_used = used_regs fregs (allFPArgRegs platform)
- aregs_used = used_regs aregs (allIntArgRegs platform)
- return (stack_args, aregs_used, fregs_used, load_args_code
- , assign_args_code)
-
- let
- arg_regs_used = int_regs_used ++ fp_regs_used
- arg_regs = [eax] ++ arg_regs_used
- -- for annotating the call instruction with
- sse_regs = length fp_regs_used
- arg_stack_slots = if platformOS platform == OSMinGW32
- then length stack_args + length (allArgRegs platform)
- else length stack_args
- tot_arg_size = arg_size * arg_stack_slots
-
-
- -- Align stack to 16n for calls, assuming a starting stack
- -- alignment of 16n - word_size on procedure entry. Which we
- -- maintain. See Note [rts/StgCRun.c : Stack Alignment on X86]
- (real_size, adjust_rsp) <-
- if (tot_arg_size + wORD_SIZE dflags) `rem` 16 == 0
- then return (tot_arg_size, nilOL)
- else do -- we need to adjust...
- delta <- getDeltaNat
- setDeltaNat (delta - wORD_SIZE dflags)
- return (tot_arg_size + wORD_SIZE dflags, toOL [
- SUB II64 (OpImm (ImmInt (wORD_SIZE dflags))) (OpReg rsp),
- DELTA (delta - wORD_SIZE dflags) ])
-
- -- push the stack args, right to left
- push_code <- push_args (reverse stack_args) nilOL
- -- On Win64, we also have to leave stack space for the arguments
- -- that we are passing in registers
- lss_code <- if platformOS platform == OSMinGW32
- then leaveStackSpace (length (allArgRegs platform))
- else return nilOL
- delta <- getDeltaNat
-
- -- deal with static vs dynamic call targets
- (callinsns,_cconv) <-
- case target of
- ForeignTarget (CmmLit (CmmLabel lbl)) conv
- -> -- ToDo: stdcall arg sizes
- return (unitOL (CALL (Left fn_imm) arg_regs), conv)
- where fn_imm = ImmCLbl lbl
- ForeignTarget expr conv
- -> do (dyn_r, dyn_c) <- getSomeReg expr
- return (dyn_c `snocOL` CALL (Right dyn_r) arg_regs, conv)
- PrimTarget _
- -> panic $ "genCCall: Can't handle PrimTarget call type here, error "
- ++ "probably because too many return values."
-
- let
- -- The x86_64 ABI requires us to set %al to the number of SSE2
- -- registers that contain arguments, if the called routine
- -- is a varargs function. We don't know whether it's a
- -- varargs function or not, so we have to assume it is.
- --
- -- It's not safe to omit this assignment, even if the number
- -- of SSE2 regs in use is zero. If %al is larger than 8
- -- on entry to a varargs function, seg faults ensue.
- assign_eax n = unitOL (MOV II32 (OpImm (ImmInt n)) (OpReg eax))
-
- let call = callinsns `appOL`
- toOL (
- -- Deallocate parameters after call for ccall;
- -- stdcall has callee do it, but is not supported on
- -- x86_64 target (see #3336)
- (if real_size==0 then [] else
- [ADD (intFormat (wordWidth dflags)) (OpImm (ImmInt real_size)) (OpReg esp)])
- ++
- [DELTA (delta + real_size)]
- )
- setDeltaNat (delta + real_size)
-
- let
- -- assign the results, if necessary
- assign_code [] = nilOL
- assign_code [dest] =
- case typeWidth rep of
- W32 | isFloatType rep -> unitOL (MOV (floatFormat W32)
- (OpReg xmm0)
- (OpReg r_dest))
- W64 | isFloatType rep -> unitOL (MOV (floatFormat W64)
- (OpReg xmm0)
- (OpReg r_dest))
- _ -> unitOL (MOV (cmmTypeFormat rep) (OpReg rax) (OpReg r_dest))
- where
- rep = localRegType dest
- r_dest = getRegisterReg platform (CmmLocal dest)
- assign_code _many = panic "genCCall.assign_code many"
-
- return (adjust_rsp `appOL`
- push_code `appOL`
- load_args_code `appOL`
- assign_args_code `appOL`
- lss_code `appOL`
- assign_eax sse_regs `appOL`
- call `appOL`
- assign_code dest_regs)
-
- where platform = targetPlatform dflags
- arg_size = 8 -- always, at the mo
-
-
- load_args :: [CmmExpr]
- -> [Reg] -- int regs avail for args
- -> [Reg] -- FP regs avail for args
- -> InstrBlock -- code computing args
- -> InstrBlock -- code assigning args to ABI regs
- -> NatM ([CmmExpr],[Reg],[Reg],InstrBlock,InstrBlock)
- -- no more regs to use
- load_args args [] [] code acode =
- return (args, [], [], code, acode)
-
- -- no more args to push
- load_args [] aregs fregs code acode =
- return ([], aregs, fregs, code, acode)
-
- load_args (arg : rest) aregs fregs code acode
- | isFloatType arg_rep = case fregs of
- [] -> push_this_arg
- (r:rs) -> do
- (code',acode') <- reg_this_arg r
- load_args rest aregs rs code' acode'
- | otherwise = case aregs of
- [] -> push_this_arg
- (r:rs) -> do
- (code',acode') <- reg_this_arg r
- load_args rest rs fregs code' acode'
- where
-
- -- put arg into the list of stack pushed args
- push_this_arg = do
- (args',ars,frs,code',acode')
- <- load_args rest aregs fregs code acode
- return (arg:args', ars, frs, code', acode')
-
- -- pass the arg into the given register
- reg_this_arg r
- -- "operand" args can be directly assigned into r
- | isOperand False arg = do
- arg_code <- getAnyReg arg
- return (code, (acode `appOL` arg_code r))
- -- The last non-operand arg can be directly assigned after its
- -- computation without going into a temporary register
- | all (isOperand False) rest = do
- arg_code <- getAnyReg arg
- return (code `appOL` arg_code r,acode)
-
- -- other args need to be computed beforehand to avoid clobbering
- -- previously assigned registers used to pass parameters (see
- -- #11792, #12614). They are assigned into temporary registers
- -- and get assigned to proper call ABI registers after they all
- -- have been computed.
- | otherwise = do
- arg_code <- getAnyReg arg
- tmp <- getNewRegNat arg_fmt
- let
- code' = code `appOL` arg_code tmp
- acode' = acode `snocOL` reg2reg arg_fmt tmp r
- return (code',acode')
-
- arg_rep = cmmExprType dflags arg
- arg_fmt = cmmTypeFormat arg_rep
-
- load_args_win :: [CmmExpr]
- -> [Reg] -- used int regs
- -> [Reg] -- used FP regs
- -> [(Reg, Reg)] -- (int, FP) regs avail for args
- -> InstrBlock
- -> NatM ([CmmExpr],[Reg],[Reg],InstrBlock,InstrBlock)
- load_args_win args usedInt usedFP [] code
- = return (args, usedInt, usedFP, code, nilOL)
- -- no more regs to use
- load_args_win [] usedInt usedFP _ code
- = return ([], usedInt, usedFP, code, nilOL)
- -- no more args to push
- load_args_win (arg : rest) usedInt usedFP
- ((ireg, freg) : regs) code
- | isFloatType arg_rep = do
- arg_code <- getAnyReg arg
- load_args_win rest (ireg : usedInt) (freg : usedFP) regs
- (code `appOL`
- arg_code freg `snocOL`
- -- If we are calling a varargs function
- -- then we need to define ireg as well
- -- as freg
- MOV II64 (OpReg freg) (OpReg ireg))
- | otherwise = do
- arg_code <- getAnyReg arg
- load_args_win rest (ireg : usedInt) usedFP regs
- (code `appOL` arg_code ireg)
- where
- arg_rep = cmmExprType dflags arg
-
- push_args [] code = return code
- push_args (arg:rest) code
- | isFloatType arg_rep = do
- (arg_reg, arg_code) <- getSomeReg arg
- delta <- getDeltaNat
- setDeltaNat (delta-arg_size)
- let code' = code `appOL` arg_code `appOL` toOL [
- SUB (intFormat (wordWidth dflags)) (OpImm (ImmInt arg_size)) (OpReg rsp),
- DELTA (delta-arg_size),
- MOV (floatFormat width) (OpReg arg_reg) (OpAddr (spRel dflags 0))]
- push_args rest code'
-
- | otherwise = do
- -- Arguments can be smaller than 64-bit, but we still use @PUSH
- -- II64@ - the usual calling conventions expect integers to be
- -- 8-byte aligned.
- ASSERT(width <= W64) return ()
- (arg_op, arg_code) <- getOperand arg
- delta <- getDeltaNat
- setDeltaNat (delta-arg_size)
- let code' = code `appOL` arg_code `appOL` toOL [
- PUSH II64 arg_op,
- DELTA (delta-arg_size)]
- push_args rest code'
- where
- arg_rep = cmmExprType dflags arg
- width = typeWidth arg_rep
-
- leaveStackSpace n = do
- delta <- getDeltaNat
- setDeltaNat (delta - n * arg_size)
- return $ toOL [
- SUB II64 (OpImm (ImmInt (n * wORD_SIZE dflags))) (OpReg rsp),
- DELTA (delta - n * arg_size)]
-
-maybePromoteCArg :: DynFlags -> Width -> CmmExpr -> CmmExpr
-maybePromoteCArg dflags wto arg
- | wfrom < wto = CmmMachOp (MO_UU_Conv wfrom wto) [arg]
- | otherwise = arg
- where
- wfrom = cmmExprWidth dflags arg
-
-outOfLineCmmOp :: BlockId -> CallishMachOp -> Maybe CmmFormal -> [CmmActual]
- -> NatM InstrBlock
-outOfLineCmmOp bid mop res args
- = do
- dflags <- getDynFlags
- targetExpr <- cmmMakeDynamicReference dflags CallReference lbl
- let target = ForeignTarget targetExpr
- (ForeignConvention CCallConv [] [] CmmMayReturn)
-
- -- We know foreign calls results in no new basic blocks, so we can ignore
- -- the returned block id.
- (instrs, _) <- stmtToInstrs bid (CmmUnsafeForeignCall target (catMaybes [res]) args)
- return instrs
- where
- -- Assume we can call these functions directly, and that they're not in a dynamic library.
- -- TODO: Why is this ok? Under linux this code will be in libm.so
- -- Is it because they're really implemented as a primitive instruction by the assembler?? -- BL 2009/12/31
- lbl = mkForeignLabel fn Nothing ForeignLabelInThisPackage IsFunction
-
- fn = case mop of
- MO_F32_Sqrt -> fsLit "sqrtf"
- MO_F32_Fabs -> fsLit "fabsf"
- MO_F32_Sin -> fsLit "sinf"
- MO_F32_Cos -> fsLit "cosf"
- MO_F32_Tan -> fsLit "tanf"
- MO_F32_Exp -> fsLit "expf"
- MO_F32_ExpM1 -> fsLit "expm1f"
- MO_F32_Log -> fsLit "logf"
- MO_F32_Log1P -> fsLit "log1pf"
-
- MO_F32_Asin -> fsLit "asinf"
- MO_F32_Acos -> fsLit "acosf"
- MO_F32_Atan -> fsLit "atanf"
-
- MO_F32_Sinh -> fsLit "sinhf"
- MO_F32_Cosh -> fsLit "coshf"
- MO_F32_Tanh -> fsLit "tanhf"
- MO_F32_Pwr -> fsLit "powf"
-
- MO_F32_Asinh -> fsLit "asinhf"
- MO_F32_Acosh -> fsLit "acoshf"
- MO_F32_Atanh -> fsLit "atanhf"
-
- MO_F64_Sqrt -> fsLit "sqrt"
- MO_F64_Fabs -> fsLit "fabs"
- MO_F64_Sin -> fsLit "sin"
- MO_F64_Cos -> fsLit "cos"
- MO_F64_Tan -> fsLit "tan"
- MO_F64_Exp -> fsLit "exp"
- MO_F64_ExpM1 -> fsLit "expm1"
- MO_F64_Log -> fsLit "log"
- MO_F64_Log1P -> fsLit "log1p"
-
- MO_F64_Asin -> fsLit "asin"
- MO_F64_Acos -> fsLit "acos"
- MO_F64_Atan -> fsLit "atan"
-
- MO_F64_Sinh -> fsLit "sinh"
- MO_F64_Cosh -> fsLit "cosh"
- MO_F64_Tanh -> fsLit "tanh"
- MO_F64_Pwr -> fsLit "pow"
-
- MO_F64_Asinh -> fsLit "asinh"
- MO_F64_Acosh -> fsLit "acosh"
- MO_F64_Atanh -> fsLit "atanh"
-
- MO_Memcpy _ -> fsLit "memcpy"
- MO_Memset _ -> fsLit "memset"
- MO_Memmove _ -> fsLit "memmove"
- MO_Memcmp _ -> fsLit "memcmp"
-
- MO_PopCnt _ -> fsLit "popcnt"
- MO_BSwap _ -> fsLit "bswap"
- {- Here the C implementation is used as there is no x86
- instruction to reverse a word's bit order.
- -}
- MO_BRev w -> fsLit $ bRevLabel w
- MO_Clz w -> fsLit $ clzLabel w
- MO_Ctz _ -> unsupported
-
- MO_Pdep w -> fsLit $ pdepLabel w
- MO_Pext w -> fsLit $ pextLabel w
-
- MO_AtomicRMW _ _ -> fsLit "atomicrmw"
- MO_AtomicRead _ -> fsLit "atomicread"
- MO_AtomicWrite _ -> fsLit "atomicwrite"
- MO_Cmpxchg _ -> fsLit "cmpxchg"
-
- MO_UF_Conv _ -> unsupported
-
- MO_S_Mul2 {} -> unsupported
- MO_S_QuotRem {} -> unsupported
- MO_U_QuotRem {} -> unsupported
- MO_U_QuotRem2 {} -> unsupported
- MO_Add2 {} -> unsupported
- MO_AddIntC {} -> unsupported
- MO_SubIntC {} -> unsupported
- MO_AddWordC {} -> unsupported
- MO_SubWordC {} -> unsupported
- MO_U_Mul2 {} -> unsupported
- MO_ReadBarrier -> unsupported
- MO_WriteBarrier -> unsupported
- MO_Touch -> unsupported
- (MO_Prefetch_Data _ ) -> unsupported
- unsupported = panic ("outOfLineCmmOp: " ++ show mop
- ++ " not supported here")
-
--- -----------------------------------------------------------------------------
--- Generating a table-branch
-
-genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock
-
-genSwitch dflags expr targets
- | positionIndependent dflags
- = do
- (reg,e_code) <- getNonClobberedReg (cmmOffset dflags expr offset)
- -- getNonClobberedReg because it needs to survive across t_code
- lbl <- getNewLabelNat
- dflags <- getDynFlags
- let is32bit = target32Bit (targetPlatform dflags)
- os = platformOS (targetPlatform dflags)
- -- Might want to use .rodata.<function we're in> instead, but as
- -- long as it's something unique it'll work out since the
- -- references to the jump table are in the appropriate section.
- rosection = case os of
- -- on Mac OS X/x86_64, put the jump table in the text section to
- -- work around a limitation of the linker.
- -- ld64 is unable to handle the relocations for
- -- .quad L1 - L0
- -- if L0 is not preceded by a non-anonymous label in its section.
- OSDarwin | not is32bit -> Section Text lbl
- _ -> Section ReadOnlyData lbl
- dynRef <- cmmMakeDynamicReference dflags DataReference lbl
- (tableReg,t_code) <- getSomeReg $ dynRef
- let op = OpAddr (AddrBaseIndex (EABaseReg tableReg)
- (EAIndex reg (wORD_SIZE dflags)) (ImmInt 0))
-
- offsetReg <- getNewRegNat (intFormat (wordWidth dflags))
- return $ if is32bit || os == OSDarwin
- then e_code `appOL` t_code `appOL` toOL [
- ADD (intFormat (wordWidth dflags)) op (OpReg tableReg),
- JMP_TBL (OpReg tableReg) ids rosection lbl
- ]
- else -- HACK: On x86_64 binutils<2.17 is only able to generate
- -- PC32 relocations, hence we only get 32-bit offsets in
- -- the jump table. As these offsets are always negative
- -- we need to properly sign extend them to 64-bit. This
- -- hack should be removed in conjunction with the hack in
- -- PprMach.hs/pprDataItem once binutils 2.17 is standard.
- e_code `appOL` t_code `appOL` toOL [
- MOVSxL II32 op (OpReg offsetReg),
- ADD (intFormat (wordWidth dflags))
- (OpReg offsetReg)
- (OpReg tableReg),
- JMP_TBL (OpReg tableReg) ids rosection lbl
- ]
- | otherwise
- = do
- (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
- lbl <- getNewLabelNat
- let op = OpAddr (AddrBaseIndex EABaseNone (EAIndex reg (wORD_SIZE dflags)) (ImmCLbl lbl))
- code = e_code `appOL` toOL [
- JMP_TBL op ids (Section ReadOnlyData lbl) lbl
- ]
- return code
- where
- (offset, blockIds) = switchTargetsToTable targets
- ids = map (fmap DestBlockId) blockIds
-
-generateJumpTableForInstr :: DynFlags -> Instr -> Maybe (NatCmmDecl (Alignment, RawCmmStatics) Instr)
-generateJumpTableForInstr dflags (JMP_TBL _ ids section lbl)
- = let getBlockId (DestBlockId id) = id
- getBlockId _ = panic "Non-Label target in Jump Table"
- blockIds = map (fmap getBlockId) ids
- in Just (createJumpTable dflags blockIds section lbl)
-generateJumpTableForInstr _ _ = Nothing
-
-createJumpTable :: DynFlags -> [Maybe BlockId] -> Section -> CLabel
- -> GenCmmDecl (Alignment, RawCmmStatics) h g
-createJumpTable dflags ids section lbl
- = let jumpTable
- | positionIndependent dflags =
- let ww = wordWidth dflags
- jumpTableEntryRel Nothing
- = CmmStaticLit (CmmInt 0 ww)
- jumpTableEntryRel (Just blockid)
- = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0 ww)
- where blockLabel = blockLbl blockid
- in map jumpTableEntryRel ids
- | otherwise = map (jumpTableEntry dflags) ids
- in CmmData section (mkAlignment 1, RawCmmStatics lbl jumpTable)
-
-extractUnwindPoints :: [Instr] -> [UnwindPoint]
-extractUnwindPoints instrs =
- [ UnwindPoint lbl unwinds | UNWIND lbl unwinds <- instrs]
-
--- -----------------------------------------------------------------------------
--- 'condIntReg' and 'condFltReg': condition codes into registers
-
--- Turn those condition codes into integers now (when they appear on
--- the right hand side of an assignment).
---
--- (If applicable) Do not fill the delay slots here; you will confuse the
--- register allocator.
-
-condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
-
-condIntReg cond x y = do
- CondCode _ cond cond_code <- condIntCode cond x y
- tmp <- getNewRegNat II8
- let
- code dst = cond_code `appOL` toOL [
- SETCC cond (OpReg tmp),
- MOVZxL II8 (OpReg tmp) (OpReg dst)
- ]
- return (Any II32 code)
-
-
------------------------------------------------------------
---- Note [SSE Parity Checks] ---
------------------------------------------------------------
-
--- We have to worry about unordered operands (eg. comparisons
--- against NaN). If the operands are unordered, the comparison
--- sets the parity flag, carry flag and zero flag.
--- All comparisons are supposed to return false for unordered
--- operands except for !=, which returns true.
---
--- Optimisation: we don't have to test the parity flag if we
--- know the test has already excluded the unordered case: eg >
--- and >= test for a zero carry flag, which can only occur for
--- ordered operands.
---
--- By reversing comparisons we can avoid testing the parity
--- for < and <= as well. If any of the arguments is an NaN we
--- return false either way. If both arguments are valid then
--- x <= y <-> y >= x holds. So it's safe to swap these.
---
--- We invert the condition inside getRegister'and getCondCode
--- which should cover all invertable cases.
--- All other functions translating FP comparisons to assembly
--- use these to two generate the comparison code.
---
--- As an example consider a simple check:
---
--- func :: Float -> Float -> Int
--- func x y = if x < y then 1 else 0
---
--- Which in Cmm gives the floating point comparison.
---
--- if (%MO_F_Lt_W32(F1, F2)) goto c2gg; else goto c2gf;
---
--- We used to compile this to an assembly code block like this:
--- _c2gh:
--- ucomiss %xmm2,%xmm1
--- jp _c2gf
--- jb _c2gg
--- jmp _c2gf
---
--- Where we have to introduce an explicit
--- check for unordered results (using jmp parity):
---
--- We can avoid this by exchanging the arguments and inverting the direction
--- of the comparison. This results in the sequence of:
---
--- ucomiss %xmm1,%xmm2
--- ja _c2g2
--- jmp _c2g1
---
--- Removing the jump reduces the pressure on the branch predidiction system
--- and plays better with the uOP cache.
-
-condFltReg :: Bool -> Cond -> CmmExpr -> CmmExpr -> NatM Register
-condFltReg is32Bit cond x y = condFltReg_sse2
- where
-
-
- condFltReg_sse2 = do
- CondCode _ cond cond_code <- condFltCode cond x y
- tmp1 <- getNewRegNat (archWordFormat is32Bit)
- tmp2 <- getNewRegNat (archWordFormat is32Bit)
- let -- See Note [SSE Parity Checks]
- code dst =
- cond_code `appOL`
- (case cond of
- NE -> or_unordered dst
- GU -> plain_test dst
- GEU -> plain_test dst
- -- Use ASSERT so we don't break releases if these creep in.
- LTT -> ASSERT2(False, ppr "Should have been turned into >")
- and_ordered dst
- LE -> ASSERT2(False, ppr "Should have been turned into >=")
- and_ordered dst
- _ -> and_ordered dst)
-
- plain_test dst = toOL [
- SETCC cond (OpReg tmp1),
- MOVZxL II8 (OpReg tmp1) (OpReg dst)
- ]
- or_unordered dst = toOL [
- SETCC cond (OpReg tmp1),
- SETCC PARITY (OpReg tmp2),
- OR II8 (OpReg tmp1) (OpReg tmp2),
- MOVZxL II8 (OpReg tmp2) (OpReg dst)
- ]
- and_ordered dst = toOL [
- SETCC cond (OpReg tmp1),
- SETCC NOTPARITY (OpReg tmp2),
- AND II8 (OpReg tmp1) (OpReg tmp2),
- MOVZxL II8 (OpReg tmp2) (OpReg dst)
- ]
- return (Any II32 code)
-
-
--- -----------------------------------------------------------------------------
--- 'trivial*Code': deal with trivial instructions
-
--- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
--- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
--- Only look for constants on the right hand side, because that's
--- where the generic optimizer will have put them.
-
--- Similarly, for unary instructions, we don't have to worry about
--- matching an StInt as the argument, because genericOpt will already
--- have handled the constant-folding.
-
-
-{-
-The Rules of the Game are:
-
-* You cannot assume anything about the destination register dst;
- it may be anything, including a fixed reg.
-
-* You may compute an operand into a fixed reg, but you may not
- subsequently change the contents of that fixed reg. If you
- want to do so, first copy the value either to a temporary
- or into dst. You are free to modify dst even if it happens
- to be a fixed reg -- that's not your problem.
-
-* You cannot assume that a fixed reg will stay live over an
- arbitrary computation. The same applies to the dst reg.
-
-* Temporary regs obtained from getNewRegNat are distinct from
- each other and from all other regs, and stay live over
- arbitrary computations.
-
---------------------
-
-SDM's version of The Rules:
-
-* If getRegister returns Any, that means it can generate correct
- code which places the result in any register, period. Even if that
- register happens to be read during the computation.
-
- Corollary #1: this means that if you are generating code for an
- operation with two arbitrary operands, you cannot assign the result
- of the first operand into the destination register before computing
- the second operand. The second operand might require the old value
- of the destination register.
-
- Corollary #2: A function might be able to generate more efficient
- code if it knows the destination register is a new temporary (and
- therefore not read by any of the sub-computations).
-
-* If getRegister returns Any, then the code it generates may modify only:
- (a) fresh temporaries
- (b) the destination register
- (c) known registers (eg. %ecx is used by shifts)
- In particular, it may *not* modify global registers, unless the global
- register happens to be the destination register.
--}
-
-trivialCode :: Width -> (Operand -> Operand -> Instr)
- -> Maybe (Operand -> Operand -> Instr)
- -> CmmExpr -> CmmExpr -> NatM Register
-trivialCode width instr m a b
- = do is32Bit <- is32BitPlatform
- trivialCode' is32Bit width instr m a b
-
-trivialCode' :: Bool -> Width -> (Operand -> Operand -> Instr)
- -> Maybe (Operand -> Operand -> Instr)
- -> CmmExpr -> CmmExpr -> NatM Register
-trivialCode' is32Bit width _ (Just revinstr) (CmmLit lit_a) b
- | is32BitLit is32Bit lit_a = do
- b_code <- getAnyReg b
- let
- code dst
- = b_code dst `snocOL`
- revinstr (OpImm (litToImm lit_a)) (OpReg dst)
- return (Any (intFormat width) code)
-
-trivialCode' _ width instr _ a b
- = genTrivialCode (intFormat width) instr a b
-
--- This is re-used for floating pt instructions too.
-genTrivialCode :: Format -> (Operand -> Operand -> Instr)
- -> CmmExpr -> CmmExpr -> NatM Register
-genTrivialCode rep instr a b = do
- (b_op, b_code) <- getNonClobberedOperand b
- a_code <- getAnyReg a
- tmp <- getNewRegNat rep
- let
- -- We want the value of b to stay alive across the computation of a.
- -- But, we want to calculate a straight into the destination register,
- -- because the instruction only has two operands (dst := dst `op` src).
- -- The troublesome case is when the result of b is in the same register
- -- as the destination reg. In this case, we have to save b in a
- -- new temporary across the computation of a.
- code dst
- | dst `regClashesWithOp` b_op =
- b_code `appOL`
- unitOL (MOV rep b_op (OpReg tmp)) `appOL`
- a_code dst `snocOL`
- instr (OpReg tmp) (OpReg dst)
- | otherwise =
- b_code `appOL`
- a_code dst `snocOL`
- instr b_op (OpReg dst)
- return (Any rep code)
-
-regClashesWithOp :: Reg -> Operand -> Bool
-reg `regClashesWithOp` OpReg reg2 = reg == reg2
-reg `regClashesWithOp` OpAddr amode = any (==reg) (addrModeRegs amode)
-_ `regClashesWithOp` _ = False
-
------------
-
-trivialUCode :: Format -> (Operand -> Instr)
- -> CmmExpr -> NatM Register
-trivialUCode rep instr x = do
- x_code <- getAnyReg x
- let
- code dst =
- x_code dst `snocOL`
- instr (OpReg dst)
- return (Any rep code)
-
------------
-
-
-trivialFCode_sse2 :: Width -> (Format -> Operand -> Operand -> Instr)
- -> CmmExpr -> CmmExpr -> NatM Register
-trivialFCode_sse2 pk instr x y
- = genTrivialCode format (instr format) x y
- where format = floatFormat pk
-
-
-trivialUFCode :: Format -> (Reg -> Reg -> Instr) -> CmmExpr -> NatM Register
-trivialUFCode format instr x = do
- (x_reg, x_code) <- getSomeReg x
- let
- code dst =
- x_code `snocOL`
- instr x_reg dst
- return (Any format code)
-
-
---------------------------------------------------------------------------------
-coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
-coerceInt2FP from to x = coerce_sse2
- where
-
- coerce_sse2 = do
- (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
- let
- opc = case to of W32 -> CVTSI2SS; W64 -> CVTSI2SD
- n -> panic $ "coerceInt2FP.sse: unhandled width ("
- ++ show n ++ ")"
- code dst = x_code `snocOL` opc (intFormat from) x_op dst
- return (Any (floatFormat to) code)
- -- works even if the destination rep is <II32
-
---------------------------------------------------------------------------------
-coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
-coerceFP2Int from to x = coerceFP2Int_sse2
- where
- coerceFP2Int_sse2 = do
- (x_op, x_code) <- getOperand x -- ToDo: could be a safe operand
- let
- opc = case from of W32 -> CVTTSS2SIQ; W64 -> CVTTSD2SIQ;
- n -> panic $ "coerceFP2Init.sse: unhandled width ("
- ++ show n ++ ")"
- code dst = x_code `snocOL` opc (intFormat to) x_op dst
- return (Any (intFormat to) code)
- -- works even if the destination rep is <II32
-
-
---------------------------------------------------------------------------------
-coerceFP2FP :: Width -> CmmExpr -> NatM Register
-coerceFP2FP to x = do
- (x_reg, x_code) <- getSomeReg x
- let
- opc = case to of W32 -> CVTSD2SS; W64 -> CVTSS2SD;
- n -> panic $ "coerceFP2FP: unhandled width ("
- ++ show n ++ ")"
- code dst = x_code `snocOL` opc x_reg dst
- return (Any ( floatFormat to) code)
-
---------------------------------------------------------------------------------
-
-sse2NegCode :: Width -> CmmExpr -> NatM Register
-sse2NegCode w x = do
- let fmt = floatFormat w
- x_code <- getAnyReg x
- -- This is how gcc does it, so it can't be that bad:
- let
- const = case fmt of
- FF32 -> CmmInt 0x80000000 W32
- FF64 -> CmmInt 0x8000000000000000 W64
- x@II8 -> wrongFmt x
- x@II16 -> wrongFmt x
- x@II32 -> wrongFmt x
- x@II64 -> wrongFmt x
-
- where
- wrongFmt x = panic $ "sse2NegCode: " ++ show x
- Amode amode amode_code <- memConstant (mkAlignment $ widthInBytes w) const
- tmp <- getNewRegNat fmt
- let
- code dst = x_code dst `appOL` amode_code `appOL` toOL [
- MOV fmt (OpAddr amode) (OpReg tmp),
- XOR fmt (OpReg tmp) (OpReg dst)
- ]
- --
- return (Any fmt code)
-
-isVecExpr :: CmmExpr -> Bool
-isVecExpr (CmmMachOp (MO_V_Insert {}) _) = True
-isVecExpr (CmmMachOp (MO_V_Extract {}) _) = True
-isVecExpr (CmmMachOp (MO_V_Add {}) _) = True
-isVecExpr (CmmMachOp (MO_V_Sub {}) _) = True
-isVecExpr (CmmMachOp (MO_V_Mul {}) _) = True
-isVecExpr (CmmMachOp (MO_VS_Quot {}) _) = True
-isVecExpr (CmmMachOp (MO_VS_Rem {}) _) = True
-isVecExpr (CmmMachOp (MO_VS_Neg {}) _) = True
-isVecExpr (CmmMachOp (MO_VF_Insert {}) _) = True
-isVecExpr (CmmMachOp (MO_VF_Extract {}) _) = True
-isVecExpr (CmmMachOp (MO_VF_Add {}) _) = True
-isVecExpr (CmmMachOp (MO_VF_Sub {}) _) = True
-isVecExpr (CmmMachOp (MO_VF_Mul {}) _) = True
-isVecExpr (CmmMachOp (MO_VF_Quot {}) _) = True
-isVecExpr (CmmMachOp (MO_VF_Neg {}) _) = True
-isVecExpr (CmmMachOp _ [e]) = isVecExpr e
-isVecExpr _ = False
-
-needLlvm :: NatM a
-needLlvm =
- sorry $ unlines ["The native code generator does not support vector"
- ,"instructions. Please use -fllvm."]
-
--- | This works on the invariant that all jumps in the given blocks are required.
--- Starting from there we try to make a few more jumps redundant by reordering
--- them.
--- We depend on the information in the CFG to do so so without a given CFG
--- we do nothing.
-invertCondBranches :: Maybe CFG -- ^ CFG if present
- -> LabelMap a -- ^ Blocks with info tables
- -> [NatBasicBlock Instr] -- ^ List of basic blocks
- -> [NatBasicBlock Instr]
-invertCondBranches Nothing _ bs = bs
-invertCondBranches (Just cfg) keep bs =
- invert bs
- where
- invert :: [NatBasicBlock Instr] -> [NatBasicBlock Instr]
- invert ((BasicBlock lbl1 ins@(_:_:_xs)):b2@(BasicBlock lbl2 _):bs)
- | --pprTrace "Block" (ppr lbl1) True,
- (jmp1,jmp2) <- last2 ins
- , JXX cond1 target1 <- jmp1
- , target1 == lbl2
- --, pprTrace "CutChance" (ppr b1) True
- , JXX ALWAYS target2 <- jmp2
- -- We have enough information to check if we can perform the inversion
- -- TODO: We could also check for the last asm instruction which sets
- -- status flags instead. Which I suspect is worse in terms of compiler
- -- performance, but might be applicable to more cases
- , Just edgeInfo1 <- getEdgeInfo lbl1 target1 cfg
- , Just edgeInfo2 <- getEdgeInfo lbl1 target2 cfg
- -- Both jumps come from the same cmm statement
- , transitionSource edgeInfo1 == transitionSource edgeInfo2
- , CmmSource {trans_cmmNode = cmmCondBranch} <- transitionSource edgeInfo1
-
- --Int comparisons are invertable
- , CmmCondBranch (CmmMachOp op _args) _ _ _ <- cmmCondBranch
- , Just _ <- maybeIntComparison op
- , Just invCond <- maybeInvertCond cond1
-
- --Swap the last two jumps, invert the conditional jumps condition.
- = let jumps =
- case () of
- -- We are free the eliminate the jmp. So we do so.
- _ | not (mapMember target1 keep)
- -> [JXX invCond target2]
- -- If the conditional target is unlikely we put the other
- -- target at the front.
- | edgeWeight edgeInfo2 > edgeWeight edgeInfo1
- -> [JXX invCond target2, JXX ALWAYS target1]
- -- Keep things as-is otherwise
- | otherwise
- -> [jmp1, jmp2]
- in --pprTrace "Cutable" (ppr [jmp1,jmp2] <+> text "=>" <+> ppr jumps) $
- (BasicBlock lbl1
- (dropTail 2 ins ++ jumps))
- : invert (b2:bs)
- invert (b:bs) = b : invert bs
- invert [] = []
diff --git a/compiler/nativeGen/X86/Cond.hs b/compiler/nativeGen/X86/Cond.hs
deleted file mode 100644
index 49e3ce9254..0000000000
--- a/compiler/nativeGen/X86/Cond.hs
+++ /dev/null
@@ -1,109 +0,0 @@
-module X86.Cond (
- Cond(..),
- condUnsigned,
- condToSigned,
- condToUnsigned,
- maybeFlipCond,
- maybeInvertCond
-)
-
-where
-
-import GhcPrelude
-
-data Cond
- = ALWAYS -- What's really used? ToDo
- | EQQ
- | GE
- | GEU
- | GTT
- | GU
- | LE
- | LEU
- | LTT
- | LU
- | NE
- | NEG
- | POS
- | CARRY
- | OFLO
- | PARITY
- | NOTPARITY
- deriving Eq
-
-condUnsigned :: Cond -> Bool
-condUnsigned GU = True
-condUnsigned LU = True
-condUnsigned GEU = True
-condUnsigned LEU = True
-condUnsigned _ = False
-
-
-condToSigned :: Cond -> Cond
-condToSigned GU = GTT
-condToSigned LU = LTT
-condToSigned GEU = GE
-condToSigned LEU = LE
-condToSigned x = x
-
-
-condToUnsigned :: Cond -> Cond
-condToUnsigned GTT = GU
-condToUnsigned LTT = LU
-condToUnsigned GE = GEU
-condToUnsigned LE = LEU
-condToUnsigned x = x
-
--- | @maybeFlipCond c@ returns @Just c'@ if it is possible to flip the
--- arguments to the conditional @c@, and the new condition should be @c'@.
-maybeFlipCond :: Cond -> Maybe Cond
-maybeFlipCond cond = case cond of
- EQQ -> Just EQQ
- NE -> Just NE
- LU -> Just GU
- GU -> Just LU
- LEU -> Just GEU
- GEU -> Just LEU
- LTT -> Just GTT
- GTT -> Just LTT
- LE -> Just GE
- GE -> Just LE
- _other -> Nothing
-
--- | If we apply @maybeInvertCond@ to the condition of a jump we turn
--- jumps taken into jumps not taken and vice versa.
---
--- Careful! If the used comparison and the conditional jump
--- don't match the above behaviour will NOT hold.
--- When used for FP comparisons this does not consider unordered
--- numbers.
--- Also inverting twice might return a synonym for the original condition.
-maybeInvertCond :: Cond -> Maybe Cond
-maybeInvertCond cond = case cond of
- ALWAYS -> Nothing
- EQQ -> Just NE
- NE -> Just EQQ
-
- NEG -> Just POS
- POS -> Just NEG
-
- GEU -> Just LU
- LU -> Just GEU
-
- GE -> Just LTT
- LTT -> Just GE
-
- GTT -> Just LE
- LE -> Just GTT
-
- GU -> Just LEU
- LEU -> Just GU
-
- --GEU "==" NOTCARRY, they are synonyms
- --at the assembly level
- CARRY -> Just GEU
-
- OFLO -> Nothing
-
- PARITY -> Just NOTPARITY
- NOTPARITY -> Just PARITY
diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs
deleted file mode 100644
index 099437265c..0000000000
--- a/compiler/nativeGen/X86/Instr.hs
+++ /dev/null
@@ -1,1054 +0,0 @@
-{-# LANGUAGE CPP, TypeFamilies #-}
-
------------------------------------------------------------------------------
---
--- Machine-dependent assembly language
---
--- (c) The University of Glasgow 1993-2004
---
------------------------------------------------------------------------------
-
-module X86.Instr (Instr(..), Operand(..), PrefetchVariant(..), JumpDest(..),
- getJumpDestBlockId, canShortcut, shortcutStatics,
- shortcutJump, allocMoreStack,
- maxSpillSlots, archWordFormat )
-where
-
-#include "HsVersions.h"
-
-import GhcPrelude
-
-import X86.Cond
-import X86.Regs
-import Instruction
-import Format
-import RegClass
-import Reg
-import TargetReg
-
-import GHC.Cmm.BlockId
-import GHC.Cmm.Dataflow.Collections
-import GHC.Cmm.Dataflow.Label
-import GHC.Platform.Regs
-import GHC.Cmm
-import FastString
-import Outputable
-import GHC.Platform
-
-import BasicTypes (Alignment)
-import GHC.Cmm.CLabel
-import GHC.Driver.Session
-import UniqSet
-import Unique
-import UniqSupply
-import GHC.Cmm.DebugBlock (UnwindTable)
-
-import Control.Monad
-import Data.Maybe (fromMaybe)
-
--- Format of an x86/x86_64 memory address, in bytes.
---
-archWordFormat :: Bool -> Format
-archWordFormat is32Bit
- | is32Bit = II32
- | otherwise = II64
-
--- | Instruction instance for x86 instruction set.
-instance Instruction Instr where
- regUsageOfInstr = x86_regUsageOfInstr
- patchRegsOfInstr = x86_patchRegsOfInstr
- isJumpishInstr = x86_isJumpishInstr
- jumpDestsOfInstr = x86_jumpDestsOfInstr
- patchJumpInstr = x86_patchJumpInstr
- mkSpillInstr = x86_mkSpillInstr
- mkLoadInstr = x86_mkLoadInstr
- takeDeltaInstr = x86_takeDeltaInstr
- isMetaInstr = x86_isMetaInstr
- mkRegRegMoveInstr = x86_mkRegRegMoveInstr
- takeRegRegMoveInstr = x86_takeRegRegMoveInstr
- mkJumpInstr = x86_mkJumpInstr
- mkStackAllocInstr = x86_mkStackAllocInstr
- mkStackDeallocInstr = x86_mkStackDeallocInstr
-
-
--- -----------------------------------------------------------------------------
--- Intel x86 instructions
-
-{-
-Intel, in their infinite wisdom, selected a stack model for floating
-point registers on x86. That might have made sense back in 1979 --
-nowadays we can see it for the nonsense it really is. A stack model
-fits poorly with the existing nativeGen infrastructure, which assumes
-flat integer and FP register sets. Prior to this commit, nativeGen
-could not generate correct x86 FP code -- to do so would have meant
-somehow working the register-stack paradigm into the register
-allocator and spiller, which sounds very difficult.
-
-We have decided to cheat, and go for a simple fix which requires no
-infrastructure modifications, at the expense of generating ropey but
-correct FP code. All notions of the x86 FP stack and its insns have
-been removed. Instead, we pretend (to the instruction selector and
-register allocator) that x86 has six floating point registers, %fake0
-.. %fake5, which can be used in the usual flat manner. We further
-claim that x86 has floating point instructions very similar to SPARC
-and Alpha, that is, a simple 3-operand register-register arrangement.
-Code generation and register allocation proceed on this basis.
-
-When we come to print out the final assembly, our convenient fiction
-is converted to dismal reality. Each fake instruction is
-independently converted to a series of real x86 instructions.
-%fake0 .. %fake5 are mapped to %st(0) .. %st(5). To do reg-reg
-arithmetic operations, the two operands are pushed onto the top of the
-FP stack, the operation done, and the result copied back into the
-relevant register. There are only six %fake registers because 2 are
-needed for the translation, and x86 has 8 in total.
-
-The translation is inefficient but is simple and it works. A cleverer
-translation would handle a sequence of insns, simulating the FP stack
-contents, would not impose a fixed mapping from %fake to %st regs, and
-hopefully could avoid most of the redundant reg-reg moves of the
-current translation.
-
-We might as well make use of whatever unique FP facilities Intel have
-chosen to bless us with (let's not be churlish, after all).
-Hence GLDZ and GLD1. Bwahahahahahahaha!
--}
-
-{-
-Note [x86 Floating point precision]
-
-Intel's internal floating point registers are by default 80 bit
-extended precision. This means that all operations done on values in
-registers are done at 80 bits, and unless the intermediate values are
-truncated to the appropriate size (32 or 64 bits) by storing in
-memory, calculations in registers will give different results from
-calculations which pass intermediate values in memory (eg. via
-function calls).
-
-One solution is to set the FPU into 64 bit precision mode. Some OSs
-do this (eg. FreeBSD) and some don't (eg. Linux). The problem here is
-that this will only affect 64-bit precision arithmetic; 32-bit
-calculations will still be done at 64-bit precision in registers. So
-it doesn't solve the whole problem.
-
-There's also the issue of what the C library is expecting in terms of
-precision. It seems to be the case that glibc on Linux expects the
-FPU to be set to 80 bit precision, so setting it to 64 bit could have
-unexpected effects. Changing the default could have undesirable
-effects on other 3rd-party library code too, so the right thing would
-be to save/restore the FPU control word across Haskell code if we were
-to do this.
-
-gcc's -ffloat-store gives consistent results by always storing the
-results of floating-point calculations in memory, which works for both
-32 and 64-bit precision. However, it only affects the values of
-user-declared floating point variables in C, not intermediate results.
-GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
-flag).
-
-Another problem is how to spill floating point registers in the
-register allocator. Should we spill the whole 80 bits, or just 64?
-On an OS which is set to 64 bit precision, spilling 64 is fine. On
-Linux, spilling 64 bits will round the results of some operations.
-This is what gcc does. Spilling at 80 bits requires taking up a full
-128 bit slot (so we get alignment). We spill at 80-bits and ignore
-the alignment problems.
-
-In the future [edit: now available in GHC 7.0.1, with the -msse2
-flag], we'll use the SSE registers for floating point. This requires
-a CPU that supports SSE2 (ordinary SSE only supports 32 bit precision
-float ops), which means P4 or Xeon and above. Using SSE will solve
-all these problems, because the SSE registers use fixed 32 bit or 64
-bit precision.
-
---SDM 1/2003
--}
-
-data Instr
- -- comment pseudo-op
- = COMMENT FastString
-
- -- location pseudo-op (file, line, col, name)
- | LOCATION Int Int Int String
-
- -- some static data spat out during code
- -- generation. Will be extracted before
- -- pretty-printing.
- | LDATA Section (Alignment, RawCmmStatics)
-
- -- start a new basic block. Useful during
- -- codegen, removed later. Preceding
- -- instruction should be a jump, as per the
- -- invariants for a BasicBlock (see Cmm).
- | NEWBLOCK BlockId
-
- -- unwinding information
- -- See Note [Unwinding information in the NCG].
- | UNWIND CLabel UnwindTable
-
- -- specify current stack offset for benefit of subsequent passes.
- -- This carries a BlockId so it can be used in unwinding information.
- | DELTA Int
-
- -- Moves.
- | MOV Format Operand Operand
- | CMOV Cond Format Operand Reg
- | MOVZxL Format Operand Operand -- format is the size of operand 1
- | MOVSxL Format Operand Operand -- format is the size of operand 1
- -- x86_64 note: plain mov into a 32-bit register always zero-extends
- -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
- -- don't affect the high bits of the register.
-
- -- Load effective address (also a very useful three-operand add instruction :-)
- | LEA Format Operand Operand
-
- -- Int Arithmetic.
- | ADD Format Operand Operand
- | ADC Format Operand Operand
- | SUB Format Operand Operand
- | SBB Format Operand Operand
-
- | MUL Format Operand Operand
- | MUL2 Format Operand -- %edx:%eax = operand * %rax
- | IMUL Format Operand Operand -- signed int mul
- | IMUL2 Format Operand -- %edx:%eax = operand * %eax
-
- | DIV Format Operand -- eax := eax:edx/op, edx := eax:edx%op
- | IDIV Format Operand -- ditto, but signed
-
- -- Int Arithmetic, where the effects on the condition register
- -- are important. Used in specialized sequences such as MO_Add2.
- -- Do not rewrite these instructions to "equivalent" ones that
- -- have different effect on the condition register! (See #9013.)
- | ADD_CC Format Operand Operand
- | SUB_CC Format Operand Operand
-
- -- Simple bit-twiddling.
- | AND Format Operand Operand
- | OR Format Operand Operand
- | XOR Format Operand Operand
- | NOT Format Operand
- | NEGI Format Operand -- NEG instruction (name clash with Cond)
- | BSWAP Format Reg
-
- -- Shifts (amount may be immediate or %cl only)
- | SHL Format Operand{-amount-} Operand
- | SAR Format Operand{-amount-} Operand
- | SHR Format Operand{-amount-} Operand
-
- | BT Format Imm Operand
- | NOP
-
-
- -- We need to support the FSTP (x87 store and pop) instruction
- -- so that we can correctly read off the return value of an
- -- x86 CDECL C function call when its floating point.
- -- so we dont include a register argument, and just use st(0)
- -- this instruction is used ONLY for return values of C ffi calls
- -- in x86_32 abi
- | X87Store Format AddrMode -- st(0), dst
-
-
- -- SSE2 floating point: we use a restricted set of the available SSE2
- -- instructions for floating-point.
- -- use MOV for moving (either movss or movsd (movlpd better?))
- | CVTSS2SD Reg Reg -- F32 to F64
- | CVTSD2SS Reg Reg -- F64 to F32
- | CVTTSS2SIQ Format Operand Reg -- F32 to I32/I64 (with truncation)
- | CVTTSD2SIQ Format Operand Reg -- F64 to I32/I64 (with truncation)
- | CVTSI2SS Format Operand Reg -- I32/I64 to F32
- | CVTSI2SD Format Operand Reg -- I32/I64 to F64
-
- -- use ADD, SUB, and SQRT for arithmetic. In both cases, operands
- -- are Operand Reg.
-
- -- SSE2 floating-point division:
- | FDIV Format Operand Operand -- divisor, dividend(dst)
-
- -- use CMP for comparisons. ucomiss and ucomisd instructions
- -- compare single/double prec floating point respectively.
-
- | SQRT Format Operand Reg -- src, dst
-
-
- -- Comparison
- | TEST Format Operand Operand
- | CMP Format Operand Operand
- | SETCC Cond Operand
-
- -- Stack Operations.
- | PUSH Format Operand
- | POP Format Operand
- -- both unused (SDM):
- -- | PUSHA
- -- | POPA
-
- -- Jumping around.
- | JMP Operand [Reg] -- including live Regs at the call
- | JXX Cond BlockId -- includes unconditional branches
- | JXX_GBL Cond Imm -- non-local version of JXX
- -- Table jump
- | JMP_TBL Operand -- Address to jump to
- [Maybe JumpDest] -- Targets of the jump table
- Section -- Data section jump table should be put in
- CLabel -- Label of jump table
- -- | X86 call instruction
- | CALL (Either Imm Reg) -- ^ Jump target
- [Reg] -- ^ Arguments (required for register allocation)
-
- -- Other things.
- | CLTD Format -- sign extend %eax into %edx:%eax
-
- | FETCHGOT Reg -- pseudo-insn for ELF position-independent code
- -- pretty-prints as
- -- call 1f
- -- 1: popl %reg
- -- addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
- | FETCHPC Reg -- pseudo-insn for Darwin position-independent code
- -- pretty-prints as
- -- call 1f
- -- 1: popl %reg
-
- -- bit counting instructions
- | POPCNT Format Operand Reg -- [SSE4.2] count number of bits set to 1
- | LZCNT Format Operand Reg -- [BMI2] count number of leading zeros
- | TZCNT Format Operand Reg -- [BMI2] count number of trailing zeros
- | BSF Format Operand Reg -- bit scan forward
- | BSR Format Operand Reg -- bit scan reverse
-
- -- bit manipulation instructions
- | PDEP Format Operand Operand Reg -- [BMI2] deposit bits to the specified mask
- | PEXT Format Operand Operand Reg -- [BMI2] extract bits from the specified mask
-
- -- prefetch
- | PREFETCH PrefetchVariant Format Operand -- prefetch Variant, addr size, address to prefetch
- -- variant can be NTA, Lvl0, Lvl1, or Lvl2
-
- | LOCK Instr -- lock prefix
- | XADD Format Operand Operand -- src (r), dst (r/m)
- | CMPXCHG Format Operand Operand -- src (r), dst (r/m), eax implicit
- | MFENCE
-
-data PrefetchVariant = NTA | Lvl0 | Lvl1 | Lvl2
-
-
-data Operand
- = OpReg Reg -- register
- | OpImm Imm -- immediate value
- | OpAddr AddrMode -- memory reference
-
-
-
--- | Returns which registers are read and written as a (read, written)
--- pair.
-x86_regUsageOfInstr :: Platform -> Instr -> RegUsage
-x86_regUsageOfInstr platform instr
- = case instr of
- MOV _ src dst -> usageRW src dst
- CMOV _ _ src dst -> mkRU (use_R src [dst]) [dst]
- MOVZxL _ src dst -> usageRW src dst
- MOVSxL _ src dst -> usageRW src dst
- LEA _ src dst -> usageRW src dst
- ADD _ src dst -> usageRM src dst
- ADC _ src dst -> usageRM src dst
- SUB _ src dst -> usageRM src dst
- SBB _ src dst -> usageRM src dst
- IMUL _ src dst -> usageRM src dst
-
- -- Result of IMULB will be in just in %ax
- IMUL2 II8 src -> mkRU (eax:use_R src []) [eax]
- -- Result of IMUL for wider values, will be split between %dx/%edx/%rdx and
- -- %ax/%eax/%rax.
- IMUL2 _ src -> mkRU (eax:use_R src []) [eax,edx]
-
- MUL _ src dst -> usageRM src dst
- MUL2 _ src -> mkRU (eax:use_R src []) [eax,edx]
- DIV _ op -> mkRU (eax:edx:use_R op []) [eax,edx]
- IDIV _ op -> mkRU (eax:edx:use_R op []) [eax,edx]
- ADD_CC _ src dst -> usageRM src dst
- SUB_CC _ src dst -> usageRM src dst
- AND _ src dst -> usageRM src dst
- OR _ src dst -> usageRM src dst
-
- XOR _ (OpReg src) (OpReg dst)
- | src == dst -> mkRU [] [dst]
-
- XOR _ src dst -> usageRM src dst
- NOT _ op -> usageM op
- BSWAP _ reg -> mkRU [reg] [reg]
- NEGI _ op -> usageM op
- SHL _ imm dst -> usageRM imm dst
- SAR _ imm dst -> usageRM imm dst
- SHR _ imm dst -> usageRM imm dst
- BT _ _ src -> mkRUR (use_R src [])
-
- PUSH _ op -> mkRUR (use_R op [])
- POP _ op -> mkRU [] (def_W op)
- TEST _ src dst -> mkRUR (use_R src $! use_R dst [])
- CMP _ src dst -> mkRUR (use_R src $! use_R dst [])
- SETCC _ op -> mkRU [] (def_W op)
- JXX _ _ -> mkRU [] []
- JXX_GBL _ _ -> mkRU [] []
- JMP op regs -> mkRUR (use_R op regs)
- JMP_TBL op _ _ _ -> mkRUR (use_R op [])
- CALL (Left _) params -> mkRU params (callClobberedRegs platform)
- CALL (Right reg) params -> mkRU (reg:params) (callClobberedRegs platform)
- CLTD _ -> mkRU [eax] [edx]
- NOP -> mkRU [] []
-
- X87Store _ dst -> mkRUR ( use_EA dst [])
-
- CVTSS2SD src dst -> mkRU [src] [dst]
- CVTSD2SS src dst -> mkRU [src] [dst]
- CVTTSS2SIQ _ src dst -> mkRU (use_R src []) [dst]
- CVTTSD2SIQ _ src dst -> mkRU (use_R src []) [dst]
- CVTSI2SS _ src dst -> mkRU (use_R src []) [dst]
- CVTSI2SD _ src dst -> mkRU (use_R src []) [dst]
- FDIV _ src dst -> usageRM src dst
- SQRT _ src dst -> mkRU (use_R src []) [dst]
-
- FETCHGOT reg -> mkRU [] [reg]
- FETCHPC reg -> mkRU [] [reg]
-
- COMMENT _ -> noUsage
- LOCATION{} -> noUsage
- UNWIND{} -> noUsage
- DELTA _ -> noUsage
-
- POPCNT _ src dst -> mkRU (use_R src []) [dst]
- LZCNT _ src dst -> mkRU (use_R src []) [dst]
- TZCNT _ src dst -> mkRU (use_R src []) [dst]
- BSF _ src dst -> mkRU (use_R src []) [dst]
- BSR _ src dst -> mkRU (use_R src []) [dst]
-
- PDEP _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst]
- PEXT _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst]
-
- -- note: might be a better way to do this
- PREFETCH _ _ src -> mkRU (use_R src []) []
- LOCK i -> x86_regUsageOfInstr platform i
- XADD _ src dst -> usageMM src dst
- CMPXCHG _ src dst -> usageRMM src dst (OpReg eax)
- MFENCE -> noUsage
-
- _other -> panic "regUsage: unrecognised instr"
- where
- -- # Definitions
- --
- -- Written: If the operand is a register, it's written. If it's an
- -- address, registers mentioned in the address are read.
- --
- -- Modified: If the operand is a register, it's both read and
- -- written. If it's an address, registers mentioned in the address
- -- are read.
-
- -- 2 operand form; first operand Read; second Written
- usageRW :: Operand -> Operand -> RegUsage
- usageRW op (OpReg reg) = mkRU (use_R op []) [reg]
- usageRW op (OpAddr ea) = mkRUR (use_R op $! use_EA ea [])
- usageRW _ _ = panic "X86.RegInfo.usageRW: no match"
-
- -- 2 operand form; first operand Read; second Modified
- usageRM :: Operand -> Operand -> RegUsage
- usageRM op (OpReg reg) = mkRU (use_R op [reg]) [reg]
- usageRM op (OpAddr ea) = mkRUR (use_R op $! use_EA ea [])
- usageRM _ _ = panic "X86.RegInfo.usageRM: no match"
-
- -- 2 operand form; first operand Modified; second Modified
- usageMM :: Operand -> Operand -> RegUsage
- usageMM (OpReg src) (OpReg dst) = mkRU [src, dst] [src, dst]
- usageMM (OpReg src) (OpAddr ea) = mkRU (use_EA ea [src]) [src]
- usageMM _ _ = panic "X86.RegInfo.usageMM: no match"
-
- -- 3 operand form; first operand Read; second Modified; third Modified
- usageRMM :: Operand -> Operand -> Operand -> RegUsage
- usageRMM (OpReg src) (OpReg dst) (OpReg reg) = mkRU [src, dst, reg] [dst, reg]
- usageRMM (OpReg src) (OpAddr ea) (OpReg reg) = mkRU (use_EA ea [src, reg]) [reg]
- usageRMM _ _ _ = panic "X86.RegInfo.usageRMM: no match"
-
- -- 1 operand form; operand Modified
- usageM :: Operand -> RegUsage
- usageM (OpReg reg) = mkRU [reg] [reg]
- usageM (OpAddr ea) = mkRUR (use_EA ea [])
- usageM _ = panic "X86.RegInfo.usageM: no match"
-
- -- Registers defd when an operand is written.
- def_W (OpReg reg) = [reg]
- def_W (OpAddr _ ) = []
- def_W _ = panic "X86.RegInfo.def_W: no match"
-
- -- Registers used when an operand is read.
- use_R (OpReg reg) tl = reg : tl
- use_R (OpImm _) tl = tl
- use_R (OpAddr ea) tl = use_EA ea tl
-
- -- Registers used to compute an effective address.
- use_EA (ImmAddr _ _) tl = tl
- use_EA (AddrBaseIndex base index _) tl =
- use_base base $! use_index index tl
- where use_base (EABaseReg r) tl = r : tl
- use_base _ tl = tl
- use_index EAIndexNone tl = tl
- use_index (EAIndex i _) tl = i : tl
-
- mkRUR src = src' `seq` RU src' []
- where src' = filter (interesting platform) src
-
- mkRU src dst = src' `seq` dst' `seq` RU src' dst'
- where src' = filter (interesting platform) src
- dst' = filter (interesting platform) dst
-
--- | Is this register interesting for the register allocator?
-interesting :: Platform -> Reg -> Bool
-interesting _ (RegVirtual _) = True
-interesting platform (RegReal (RealRegSingle i)) = freeReg platform i
-interesting _ (RegReal (RealRegPair{})) = panic "X86.interesting: no reg pairs on this arch"
-
-
-
--- | Applies the supplied function to all registers in instructions.
--- Typically used to change virtual registers to real registers.
-x86_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
-x86_patchRegsOfInstr instr env
- = case instr of
- MOV fmt src dst -> patch2 (MOV fmt) src dst
- CMOV cc fmt src dst -> CMOV cc fmt (patchOp src) (env dst)
- MOVZxL fmt src dst -> patch2 (MOVZxL fmt) src dst
- MOVSxL fmt src dst -> patch2 (MOVSxL fmt) src dst
- LEA fmt src dst -> patch2 (LEA fmt) src dst
- ADD fmt src dst -> patch2 (ADD fmt) src dst
- ADC fmt src dst -> patch2 (ADC fmt) src dst
- SUB fmt src dst -> patch2 (SUB fmt) src dst
- SBB fmt src dst -> patch2 (SBB fmt) src dst
- IMUL fmt src dst -> patch2 (IMUL fmt) src dst
- IMUL2 fmt src -> patch1 (IMUL2 fmt) src
- MUL fmt src dst -> patch2 (MUL fmt) src dst
- MUL2 fmt src -> patch1 (MUL2 fmt) src
- IDIV fmt op -> patch1 (IDIV fmt) op
- DIV fmt op -> patch1 (DIV fmt) op
- ADD_CC fmt src dst -> patch2 (ADD_CC fmt) src dst
- SUB_CC fmt src dst -> patch2 (SUB_CC fmt) src dst
- AND fmt src dst -> patch2 (AND fmt) src dst
- OR fmt src dst -> patch2 (OR fmt) src dst
- XOR fmt src dst -> patch2 (XOR fmt) src dst
- NOT fmt op -> patch1 (NOT fmt) op
- BSWAP fmt reg -> BSWAP fmt (env reg)
- NEGI fmt op -> patch1 (NEGI fmt) op
- SHL fmt imm dst -> patch1 (SHL fmt imm) dst
- SAR fmt imm dst -> patch1 (SAR fmt imm) dst
- SHR fmt imm dst -> patch1 (SHR fmt imm) dst
- BT fmt imm src -> patch1 (BT fmt imm) src
- TEST fmt src dst -> patch2 (TEST fmt) src dst
- CMP fmt src dst -> patch2 (CMP fmt) src dst
- PUSH fmt op -> patch1 (PUSH fmt) op
- POP fmt op -> patch1 (POP fmt) op
- SETCC cond op -> patch1 (SETCC cond) op
- JMP op regs -> JMP (patchOp op) regs
- JMP_TBL op ids s lbl -> JMP_TBL (patchOp op) ids s lbl
-
- -- literally only support storing the top x87 stack value st(0)
- X87Store fmt dst -> X87Store fmt (lookupAddr dst)
-
- CVTSS2SD src dst -> CVTSS2SD (env src) (env dst)
- CVTSD2SS src dst -> CVTSD2SS (env src) (env dst)
- CVTTSS2SIQ fmt src dst -> CVTTSS2SIQ fmt (patchOp src) (env dst)
- CVTTSD2SIQ fmt src dst -> CVTTSD2SIQ fmt (patchOp src) (env dst)
- CVTSI2SS fmt src dst -> CVTSI2SS fmt (patchOp src) (env dst)
- CVTSI2SD fmt src dst -> CVTSI2SD fmt (patchOp src) (env dst)
- FDIV fmt src dst -> FDIV fmt (patchOp src) (patchOp dst)
- SQRT fmt src dst -> SQRT fmt (patchOp src) (env dst)
-
- CALL (Left _) _ -> instr
- CALL (Right reg) p -> CALL (Right (env reg)) p
-
- FETCHGOT reg -> FETCHGOT (env reg)
- FETCHPC reg -> FETCHPC (env reg)
-
- NOP -> instr
- COMMENT _ -> instr
- LOCATION {} -> instr
- UNWIND {} -> instr
- DELTA _ -> instr
-
- JXX _ _ -> instr
- JXX_GBL _ _ -> instr
- CLTD _ -> instr
-
- POPCNT fmt src dst -> POPCNT fmt (patchOp src) (env dst)
- LZCNT fmt src dst -> LZCNT fmt (patchOp src) (env dst)
- TZCNT fmt src dst -> TZCNT fmt (patchOp src) (env dst)
- PDEP fmt src mask dst -> PDEP fmt (patchOp src) (patchOp mask) (env dst)
- PEXT fmt src mask dst -> PEXT fmt (patchOp src) (patchOp mask) (env dst)
- BSF fmt src dst -> BSF fmt (patchOp src) (env dst)
- BSR fmt src dst -> BSR fmt (patchOp src) (env dst)
-
- PREFETCH lvl format src -> PREFETCH lvl format (patchOp src)
-
- LOCK i -> LOCK (x86_patchRegsOfInstr i env)
- XADD fmt src dst -> patch2 (XADD fmt) src dst
- CMPXCHG fmt src dst -> patch2 (CMPXCHG fmt) src dst
- MFENCE -> instr
-
- _other -> panic "patchRegs: unrecognised instr"
-
- where
- patch1 :: (Operand -> a) -> Operand -> a
- patch1 insn op = insn $! patchOp op
- patch2 :: (Operand -> Operand -> a) -> Operand -> Operand -> a
- patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
-
- patchOp (OpReg reg) = OpReg $! env reg
- patchOp (OpImm imm) = OpImm imm
- patchOp (OpAddr ea) = OpAddr $! lookupAddr ea
-
- lookupAddr (ImmAddr imm off) = ImmAddr imm off
- lookupAddr (AddrBaseIndex base index disp)
- = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp
- where
- lookupBase EABaseNone = EABaseNone
- lookupBase EABaseRip = EABaseRip
- lookupBase (EABaseReg r) = EABaseReg $! env r
-
- lookupIndex EAIndexNone = EAIndexNone
- lookupIndex (EAIndex r i) = (EAIndex $! env r) i
-
-
---------------------------------------------------------------------------------
-x86_isJumpishInstr
- :: Instr -> Bool
-
-x86_isJumpishInstr instr
- = case instr of
- JMP{} -> True
- JXX{} -> True
- JXX_GBL{} -> True
- JMP_TBL{} -> True
- CALL{} -> True
- _ -> False
-
-
-x86_jumpDestsOfInstr
- :: Instr
- -> [BlockId]
-
-x86_jumpDestsOfInstr insn
- = case insn of
- JXX _ id -> [id]
- JMP_TBL _ ids _ _ -> [id | Just (DestBlockId id) <- ids]
- _ -> []
-
-
-x86_patchJumpInstr
- :: Instr -> (BlockId -> BlockId) -> Instr
-
-x86_patchJumpInstr insn patchF
- = case insn of
- JXX cc id -> JXX cc (patchF id)
- JMP_TBL op ids section lbl
- -> JMP_TBL op (map (fmap (patchJumpDest patchF)) ids) section lbl
- _ -> insn
- where
- patchJumpDest f (DestBlockId id) = DestBlockId (f id)
- patchJumpDest _ dest = dest
-
-
-
-
-
--- -----------------------------------------------------------------------------
--- | Make a spill instruction.
-x86_mkSpillInstr
- :: DynFlags
- -> Reg -- register to spill
- -> Int -- current stack delta
- -> Int -- spill slot to use
- -> Instr
-
-x86_mkSpillInstr dflags reg delta slot
- = let off = spillSlotToOffset platform slot - delta
- in
- case targetClassOfReg platform reg of
- RcInteger -> MOV (archWordFormat is32Bit)
- (OpReg reg) (OpAddr (spRel dflags off))
- RcDouble -> MOV FF64 (OpReg reg) (OpAddr (spRel dflags off))
- _ -> panic "X86.mkSpillInstr: no match"
- where platform = targetPlatform dflags
- is32Bit = target32Bit platform
-
--- | Make a spill reload instruction.
-x86_mkLoadInstr
- :: DynFlags
- -> Reg -- register to load
- -> Int -- current stack delta
- -> Int -- spill slot to use
- -> Instr
-
-x86_mkLoadInstr dflags reg delta slot
- = let off = spillSlotToOffset platform slot - delta
- in
- case targetClassOfReg platform reg of
- RcInteger -> MOV (archWordFormat is32Bit)
- (OpAddr (spRel dflags off)) (OpReg reg)
- RcDouble -> MOV FF64 (OpAddr (spRel dflags off)) (OpReg reg)
- _ -> panic "X86.x86_mkLoadInstr"
- where platform = targetPlatform dflags
- is32Bit = target32Bit platform
-
-spillSlotSize :: Platform -> Int
-spillSlotSize dflags = if is32Bit then 12 else 8
- where is32Bit = target32Bit dflags
-
-maxSpillSlots :: DynFlags -> Int
-maxSpillSlots dflags
- = ((rESERVED_C_STACK_BYTES dflags - 64) `div` spillSlotSize (targetPlatform dflags)) - 1
--- = 0 -- useful for testing allocMoreStack
-
--- number of bytes that the stack pointer should be aligned to
-stackAlign :: Int
-stackAlign = 16
-
--- convert a spill slot number to a *byte* offset, with no sign:
--- decide on a per arch basis whether you are spilling above or below
--- the C stack pointer.
-spillSlotToOffset :: Platform -> Int -> Int
-spillSlotToOffset platform slot
- = 64 + spillSlotSize platform * slot
-
---------------------------------------------------------------------------------
-
--- | See if this instruction is telling us the current C stack delta
-x86_takeDeltaInstr
- :: Instr
- -> Maybe Int
-
-x86_takeDeltaInstr instr
- = case instr of
- DELTA i -> Just i
- _ -> Nothing
-
-
-x86_isMetaInstr
- :: Instr
- -> Bool
-
-x86_isMetaInstr instr
- = case instr of
- COMMENT{} -> True
- LOCATION{} -> True
- LDATA{} -> True
- NEWBLOCK{} -> True
- UNWIND{} -> True
- DELTA{} -> True
- _ -> False
-
-
-
---- TODO: why is there
--- | Make a reg-reg move instruction.
--- On SPARC v8 there are no instructions to move directly between
--- floating point and integer regs. If we need to do that then we
--- have to go via memory.
---
-x86_mkRegRegMoveInstr
- :: Platform
- -> Reg
- -> Reg
- -> Instr
-
-x86_mkRegRegMoveInstr platform src dst
- = case targetClassOfReg platform src of
- RcInteger -> case platformArch platform of
- ArchX86 -> MOV II32 (OpReg src) (OpReg dst)
- ArchX86_64 -> MOV II64 (OpReg src) (OpReg dst)
- _ -> panic "x86_mkRegRegMoveInstr: Bad arch"
- RcDouble -> MOV FF64 (OpReg src) (OpReg dst)
- -- this code is the lie we tell ourselves because both float and double
- -- use the same register class.on x86_64 and x86 32bit with SSE2,
- -- more plainly, both use the XMM registers
- _ -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
-
--- | Check whether an instruction represents a reg-reg move.
--- The register allocator attempts to eliminate reg->reg moves whenever it can,
--- by assigning the src and dest temporaries to the same real register.
---
-x86_takeRegRegMoveInstr
- :: Instr
- -> Maybe (Reg,Reg)
-
-x86_takeRegRegMoveInstr (MOV _ (OpReg r1) (OpReg r2))
- = Just (r1,r2)
-
-x86_takeRegRegMoveInstr _ = Nothing
-
-
--- | Make an unconditional branch instruction.
-x86_mkJumpInstr
- :: BlockId
- -> [Instr]
-
-x86_mkJumpInstr id
- = [JXX ALWAYS id]
-
--- Note [Windows stack layout]
--- | On most OSes the kernel will place a guard page after the current stack
--- page. If you allocate larger than a page worth you may jump over this
--- guard page. Not only is this a security issue, but on certain OSes such
--- as Windows a new page won't be allocated if you don't hit the guard. This
--- will cause a segfault or access fault.
---
--- This function defines if the current allocation amount requires a probe.
--- On Windows (for now) we emit a call to _chkstk for this. For other OSes
--- this is not yet implemented.
--- See https://docs.microsoft.com/en-us/windows/desktop/DevNotes/-win32-chkstk
--- The Windows stack looks like this:
---
--- +-------------------+
--- | SP |
--- +-------------------+
--- | |
--- | GUARD PAGE |
--- | |
--- +-------------------+
--- | |
--- | |
--- | UNMAPPED |
--- | |
--- | |
--- +-------------------+
---
--- In essence each allocation larger than a page size needs to be chunked and
--- a probe emitted after each page allocation. You have to hit the guard
--- page so the kernel can map in the next page, otherwise you'll segfault.
---
-needs_probe_call :: Platform -> Int -> Bool
-needs_probe_call platform amount
- = case platformOS platform of
- OSMinGW32 -> case platformArch platform of
- ArchX86 -> amount > (4 * 1024)
- ArchX86_64 -> amount > (8 * 1024)
- _ -> False
- _ -> False
-
-x86_mkStackAllocInstr
- :: Platform
- -> Int
- -> [Instr]
-x86_mkStackAllocInstr platform amount
- = case platformOS platform of
- OSMinGW32 ->
- -- These will clobber AX but this should be ok because
- --
- -- 1. It is the first thing we do when entering the closure and AX is
- -- a caller saved registers on Windows both on x86_64 and x86.
- --
- -- 2. The closures are only entered via a call or longjmp in which case
- -- there are no expectations for volatile registers.
- --
- -- 3. When the target is a local branch point it is re-targeted
- -- after the dealloc, preserving #2. See note [extra spill slots].
- --
- -- We emit a call because the stack probes are quite involved and
- -- would bloat code size a lot. GHC doesn't really have an -Os.
- -- __chkstk is guaranteed to leave all nonvolatile registers and AX
- -- untouched. It's part of the standard prologue code for any Windows
- -- function dropping the stack more than a page.
- -- See Note [Windows stack layout]
- case platformArch platform of
- ArchX86 | needs_probe_call platform amount ->
- [ MOV II32 (OpImm (ImmInt amount)) (OpReg eax)
- , CALL (Left $ strImmLit "___chkstk_ms") [eax]
- , SUB II32 (OpReg eax) (OpReg esp)
- ]
- | otherwise ->
- [ SUB II32 (OpImm (ImmInt amount)) (OpReg esp)
- , TEST II32 (OpReg esp) (OpReg esp)
- ]
- ArchX86_64 | needs_probe_call platform amount ->
- [ MOV II64 (OpImm (ImmInt amount)) (OpReg rax)
- , CALL (Left $ strImmLit "___chkstk_ms") [rax]
- , SUB II64 (OpReg rax) (OpReg rsp)
- ]
- | otherwise ->
- [ SUB II64 (OpImm (ImmInt amount)) (OpReg rsp)
- , TEST II64 (OpReg rsp) (OpReg rsp)
- ]
- _ -> panic "x86_mkStackAllocInstr"
- _ ->
- case platformArch platform of
- ArchX86 -> [ SUB II32 (OpImm (ImmInt amount)) (OpReg esp) ]
- ArchX86_64 -> [ SUB II64 (OpImm (ImmInt amount)) (OpReg rsp) ]
- _ -> panic "x86_mkStackAllocInstr"
-
-x86_mkStackDeallocInstr
- :: Platform
- -> Int
- -> [Instr]
-x86_mkStackDeallocInstr platform amount
- = case platformArch platform of
- ArchX86 -> [ADD II32 (OpImm (ImmInt amount)) (OpReg esp)]
- ArchX86_64 -> [ADD II64 (OpImm (ImmInt amount)) (OpReg rsp)]
- _ -> panic "x86_mkStackDeallocInstr"
-
-
---
--- Note [extra spill slots]
---
--- If the register allocator used more spill slots than we have
--- pre-allocated (rESERVED_C_STACK_BYTES), then we must allocate more
--- C stack space on entry and exit from this proc. Therefore we
--- insert a "sub $N, %rsp" at every entry point, and an "add $N, %rsp"
--- before every non-local jump.
---
--- This became necessary when the new codegen started bundling entire
--- functions together into one proc, because the register allocator
--- assigns a different stack slot to each virtual reg within a proc.
--- To avoid using so many slots we could also:
---
--- - split up the proc into connected components before code generator
---
--- - rename the virtual regs, so that we re-use vreg names and hence
--- stack slots for non-overlapping vregs.
---
--- Note that when a block is both a non-local entry point (with an
--- info table) and a local branch target, we have to split it into
--- two, like so:
---
--- <info table>
--- L:
--- <code>
---
--- becomes
---
--- <info table>
--- L:
--- subl $rsp, N
--- jmp Lnew
--- Lnew:
--- <code>
---
--- and all branches pointing to L are retargetted to point to Lnew.
--- Otherwise, we would repeat the $rsp adjustment for each branch to
--- L.
---
--- Returns a list of (L,Lnew) pairs.
---
-allocMoreStack
- :: Platform
- -> Int
- -> NatCmmDecl statics X86.Instr.Instr
- -> UniqSM (NatCmmDecl statics X86.Instr.Instr, [(BlockId,BlockId)])
-
-allocMoreStack _ _ top@(CmmData _ _) = return (top,[])
-allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do
- let entries = entryBlocks proc
-
- uniqs <- replicateM (length entries) getUniqueM
-
- let
- delta = ((x + stackAlign - 1) `quot` stackAlign) * stackAlign -- round up
- where x = slots * spillSlotSize platform -- sp delta
-
- alloc = mkStackAllocInstr platform delta
- dealloc = mkStackDeallocInstr platform delta
-
- retargetList = (zip entries (map mkBlockId uniqs))
-
- new_blockmap :: LabelMap BlockId
- new_blockmap = mapFromList retargetList
-
- insert_stack_insns (BasicBlock id insns)
- | Just new_blockid <- mapLookup id new_blockmap
- = [ BasicBlock id $ alloc ++ [JXX ALWAYS new_blockid]
- , BasicBlock new_blockid block' ]
- | otherwise
- = [ BasicBlock id block' ]
- where
- block' = foldr insert_dealloc [] insns
-
- insert_dealloc insn r = case insn of
- JMP _ _ -> dealloc ++ (insn : r)
- JXX_GBL _ _ -> panic "insert_dealloc: cannot handle JXX_GBL"
- _other -> x86_patchJumpInstr insn retarget : r
- where retarget b = fromMaybe b (mapLookup b new_blockmap)
-
- new_code = concatMap insert_stack_insns code
- -- in
- return (CmmProc info lbl live (ListGraph new_code), retargetList)
-
-data JumpDest = DestBlockId BlockId | DestImm Imm
-
--- Debug Instance
-instance Outputable JumpDest where
- ppr (DestBlockId bid) = text "jd<blk>:" <> ppr bid
- ppr (DestImm _imm) = text "jd<imm>:noShow"
-
-
-getJumpDestBlockId :: JumpDest -> Maybe BlockId
-getJumpDestBlockId (DestBlockId bid) = Just bid
-getJumpDestBlockId _ = Nothing
-
-canShortcut :: Instr -> Maybe JumpDest
-canShortcut (JXX ALWAYS id) = Just (DestBlockId id)
-canShortcut (JMP (OpImm imm) _) = Just (DestImm imm)
-canShortcut _ = Nothing
-
-
--- This helper shortcuts a sequence of branches.
--- The blockset helps avoid following cycles.
-shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
-shortcutJump fn insn = shortcutJump' fn (setEmpty :: LabelSet) insn
- where
- shortcutJump' :: (BlockId -> Maybe JumpDest) -> LabelSet -> Instr -> Instr
- shortcutJump' fn seen insn@(JXX cc id) =
- if setMember id seen then insn
- else case fn id of
- Nothing -> insn
- Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id')
- Just (DestImm imm) -> shortcutJump' fn seen' (JXX_GBL cc imm)
- where seen' = setInsert id seen
- shortcutJump' fn _ (JMP_TBL addr blocks section tblId) =
- let updateBlock (Just (DestBlockId bid)) =
- case fn bid of
- Nothing -> Just (DestBlockId bid )
- Just dest -> Just dest
- updateBlock dest = dest
- blocks' = map updateBlock blocks
- in JMP_TBL addr blocks' section tblId
- shortcutJump' _ _ other = other
-
--- Here because it knows about JumpDest
-shortcutStatics :: (BlockId -> Maybe JumpDest) -> (Alignment, RawCmmStatics) -> (Alignment, RawCmmStatics)
-shortcutStatics fn (align, RawCmmStatics lbl statics)
- = (align, RawCmmStatics lbl $ map (shortcutStatic fn) statics)
- -- we need to get the jump tables, so apply the mapping to the entries
- -- of a CmmData too.
-
-shortcutLabel :: (BlockId -> Maybe JumpDest) -> CLabel -> CLabel
-shortcutLabel fn lab
- | Just blkId <- maybeLocalBlockLabel lab = shortBlockId fn emptyUniqSet blkId
- | otherwise = lab
-
-shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
-shortcutStatic fn (CmmStaticLit (CmmLabel lab))
- = CmmStaticLit (CmmLabel (shortcutLabel fn lab))
-shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off w))
- = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off w)
- -- slightly dodgy, we're ignoring the second label, but this
- -- works with the way we use CmmLabelDiffOff for jump tables now.
-shortcutStatic _ other_static
- = other_static
-
-shortBlockId
- :: (BlockId -> Maybe JumpDest)
- -> UniqSet Unique
- -> BlockId
- -> CLabel
-
-shortBlockId fn seen blockid =
- case (elementOfUniqSet uq seen, fn blockid) of
- (True, _) -> blockLbl blockid
- (_, Nothing) -> blockLbl blockid
- (_, Just (DestBlockId blockid')) -> shortBlockId fn (addOneToUniqSet seen uq) blockid'
- (_, Just (DestImm (ImmCLbl lbl))) -> lbl
- (_, _other) -> panic "shortBlockId"
- where uq = getUnique blockid
diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs
deleted file mode 100644
index 4abc15cedd..0000000000
--- a/compiler/nativeGen/X86/Ppr.hs
+++ /dev/null
@@ -1,1014 +0,0 @@
-{-# LANGUAGE CPP #-}
-
------------------------------------------------------------------------------
---
--- Pretty-printing assembly language
---
--- (c) The University of Glasgow 1993-2005
---
------------------------------------------------------------------------------
-
-{-# OPTIONS_GHC -fno-warn-orphans #-}
-module X86.Ppr (
- pprNatCmmDecl,
- pprData,
- pprInstr,
- pprFormat,
- pprImm,
- pprDataItem,
-)
-
-where
-
-#include "HsVersions.h"
-
-import GhcPrelude
-
-import X86.Regs
-import X86.Instr
-import X86.Cond
-import Instruction
-import Format
-import Reg
-import PprBase
-
-
-import GHC.Cmm.Dataflow.Collections
-import GHC.Cmm.Dataflow.Label
-import BasicTypes (Alignment, mkAlignment, alignmentBytes)
-import GHC.Driver.Session
-import GHC.Cmm hiding (topInfoTable)
-import GHC.Cmm.BlockId
-import GHC.Cmm.CLabel
-import Unique ( pprUniqueAlways )
-import GHC.Platform
-import FastString
-import Outputable
-
-import Data.Word
-import Data.Bits
-
--- -----------------------------------------------------------------------------
--- Printing this stuff out
---
---
--- Note [Subsections Via Symbols]
---
--- If we are using the .subsections_via_symbols directive
--- (available on recent versions of Darwin),
--- we have to make sure that there is some kind of reference
--- from the entry code to a label on the _top_ of of the info table,
--- so that the linker will not think it is unreferenced and dead-strip
--- it. That's why the label is called a DeadStripPreventer (_dsp).
---
--- The LLVM code gen already creates `iTableSuf` symbols, where
--- the X86 would generate the DeadStripPreventer (_dsp) symbol.
--- Therefore all that is left for llvm code gen, is to ensure
--- that all the `iTableSuf` symbols are marked as used.
--- As of this writing the documentation regarding the
--- .subsections_via_symbols and -dead_strip can be found at
--- <https://developer.apple.com/library/mac/documentation/DeveloperTools/Reference/Assembler/040-Assembler_Directives/asm_directives.html#//apple_ref/doc/uid/TP30000823-TPXREF101>
-
-pprProcAlignment :: SDoc
-pprProcAlignment = sdocWithDynFlags $ \dflags ->
- (maybe empty (pprAlign . mkAlignment) (cmmProcAlignment dflags))
-
-pprNatCmmDecl :: NatCmmDecl (Alignment, RawCmmStatics) Instr -> SDoc
-pprNatCmmDecl (CmmData section dats) =
- pprSectionAlign section $$ pprDatas dats
-
-pprNatCmmDecl proc@(CmmProc top_info lbl _ (ListGraph blocks)) =
- sdocWithDynFlags $ \dflags ->
- pprProcAlignment $$
- case topInfoTable proc of
- Nothing ->
- -- special case for code without info table:
- pprSectionAlign (Section Text lbl) $$
- pprProcAlignment $$
- pprLabel lbl $$ -- blocks guaranteed not null, so label needed
- vcat (map (pprBasicBlock top_info) blocks) $$
- (if debugLevel dflags > 0
- then ppr (mkAsmTempEndLabel lbl) <> char ':' else empty) $$
- pprSizeDecl lbl
-
- Just (RawCmmStatics info_lbl _) ->
- sdocWithPlatform $ \platform ->
- pprSectionAlign (Section Text info_lbl) $$
- pprProcAlignment $$
- (if platformHasSubsectionsViaSymbols platform
- then ppr (mkDeadStripPreventer info_lbl) <> char ':'
- else empty) $$
- vcat (map (pprBasicBlock top_info) blocks) $$
- -- above: Even the first block gets a label, because with branch-chain
- -- elimination, it might be the target of a goto.
- (if platformHasSubsectionsViaSymbols platform
- then -- See Note [Subsections Via Symbols]
- text "\t.long "
- <+> ppr info_lbl
- <+> char '-'
- <+> ppr (mkDeadStripPreventer info_lbl)
- else empty) $$
- pprSizeDecl info_lbl
-
--- | Output the ELF .size directive.
-pprSizeDecl :: CLabel -> SDoc
-pprSizeDecl lbl
- = sdocWithPlatform $ \platform ->
- if osElfTarget (platformOS platform)
- then text "\t.size" <+> ppr lbl <> ptext (sLit ", .-") <> ppr lbl
- else empty
-
-pprBasicBlock :: LabelMap RawCmmStatics -> NatBasicBlock Instr -> SDoc
-pprBasicBlock info_env (BasicBlock blockid instrs)
- = maybe_infotable $
- pprLabel asmLbl $$
- vcat (map pprInstr instrs) $$
- (sdocOption sdocDebugLevel $ \level ->
- if level > 0
- then ppr (mkAsmTempEndLabel asmLbl) <> char ':'
- else empty
- )
- where
- asmLbl = blockLbl blockid
- maybe_infotable c = case mapLookup blockid info_env of
- Nothing -> c
- Just (RawCmmStatics infoLbl info) ->
- pprAlignForSection Text $$
- infoTableLoc $$
- vcat (map pprData info) $$
- pprLabel infoLbl $$
- c $$
- (sdocOption sdocDebugLevel $ \level ->
- if level > 0
- then ppr (mkAsmTempEndLabel infoLbl) <> char ':'
- else empty
- )
- -- Make sure the info table has the right .loc for the block
- -- coming right after it. See [Note: Info Offset]
- infoTableLoc = case instrs of
- (l@LOCATION{} : _) -> pprInstr l
- _other -> empty
-
-
-pprDatas :: (Alignment, RawCmmStatics) -> SDoc
--- See note [emit-time elimination of static indirections] in CLabel.
-pprDatas (_, RawCmmStatics alias [CmmStaticLit (CmmLabel lbl), CmmStaticLit ind, _, _])
- | lbl == mkIndStaticInfoLabel
- , let labelInd (CmmLabelOff l _) = Just l
- labelInd (CmmLabel l) = Just l
- labelInd _ = Nothing
- , Just ind' <- labelInd ind
- , alias `mayRedirectTo` ind'
- = pprGloblDecl alias
- $$ text ".equiv" <+> ppr alias <> comma <> ppr (CmmLabel ind')
-
-pprDatas (align, (RawCmmStatics lbl dats))
- = vcat (pprAlign align : pprLabel lbl : map pprData dats)
-
-pprData :: CmmStatic -> SDoc
-pprData (CmmString str) = pprBytes str
-
-pprData (CmmUninitialised bytes)
- = sdocWithPlatform $ \platform ->
- if platformOS platform == OSDarwin then text ".space " <> int bytes
- else text ".skip " <> int bytes
-
-pprData (CmmStaticLit lit) = pprDataItem lit
-
-pprGloblDecl :: CLabel -> SDoc
-pprGloblDecl lbl
- | not (externallyVisibleCLabel lbl) = empty
- | otherwise = text ".globl " <> ppr lbl
-
-pprLabelType' :: DynFlags -> CLabel -> SDoc
-pprLabelType' dflags lbl =
- if isCFunctionLabel lbl || functionOkInfoTable then
- text "@function"
- else
- text "@object"
- where
- {-
- NOTE: This is a bit hacky.
-
- With the `tablesNextToCode` info tables look like this:
- ```
- <info table data>
- label_info:
- <info table code>
- ```
- So actually info table label points exactly to the code and we can mark
- the label as @function. (This is required to make perf and potentially other
- tools to work on Haskell binaries).
- This usually works well but it can cause issues with a linker.
- A linker uses different algorithms for the relocation depending on
- the symbol type.For some reason, a linker will generate JUMP_SLOT relocation
- when constructor info table is referenced from a data section.
- This only happens with static constructor call so
- we mark _con_info symbols as `@object` to avoid the issue with relocations.
-
- @SimonMarlow hack explanation:
- "The reasoning goes like this:
-
- * The danger when we mark a symbol as `@function` is that the linker will
- redirect it to point to the PLT and use a `JUMP_SLOT` relocation when
- the symbol refers to something outside the current shared object.
- A PLT / JUMP_SLOT reference only works for symbols that we jump to, not
- for symbols representing data,, nor for info table symbol references which
- we expect to point directly to the info table.
- * GHC generates code that might refer to any info table symbol from the text
- segment, but that's OK, because those will be explicit GOT references
- generated by the code generator.
- * When we refer to info tables from the data segment, it's either
- * a FUN_STATIC/THUNK_STATIC local to this module
- * a `con_info` that could be from anywhere
-
- So, the only info table symbols that we might refer to from the data segment
- of another shared object are `con_info` symbols, so those are the ones we
- need to exclude from getting the @function treatment.
- "
-
- A good place to check for more
- https://gitlab.haskell.org/ghc/ghc/wikis/commentary/position-independent-code
-
- Another possible hack is to create an extra local function symbol for
- every code-like thing to give the needed information for to the tools
- but mess up with the relocation. https://phabricator.haskell.org/D4730
- -}
- functionOkInfoTable = tablesNextToCode dflags &&
- isInfoTableLabel lbl && not (isConInfoTableLabel lbl)
-
-
-pprTypeDecl :: CLabel -> SDoc
-pprTypeDecl lbl
- = sdocWithPlatform $ \platform ->
- if osElfTarget (platformOS platform) && externallyVisibleCLabel lbl
- then
- sdocWithDynFlags $ \df ->
- text ".type " <> ppr lbl <> ptext (sLit ", ") <> pprLabelType' df lbl
- else empty
-
-pprLabel :: CLabel -> SDoc
-pprLabel lbl = pprGloblDecl lbl
- $$ pprTypeDecl lbl
- $$ (ppr lbl <> char ':')
-
-pprAlign :: Alignment -> SDoc
-pprAlign alignment
- = sdocWithPlatform $ \platform ->
- text ".align " <> int (alignmentOn platform)
- where
- bytes = alignmentBytes alignment
- alignmentOn platform = if platformOS platform == OSDarwin
- then log2 bytes
- else bytes
-
- log2 :: Int -> Int -- cache the common ones
- log2 1 = 0
- log2 2 = 1
- log2 4 = 2
- log2 8 = 3
- log2 n = 1 + log2 (n `quot` 2)
-
--- -----------------------------------------------------------------------------
--- pprInstr: print an 'Instr'
-
-instance Outputable Instr where
- ppr instr = pprInstr instr
-
-
-pprReg :: Format -> Reg -> SDoc
-pprReg f r
- = case r of
- RegReal (RealRegSingle i) ->
- sdocWithPlatform $ \platform ->
- if target32Bit platform then ppr32_reg_no f i
- else ppr64_reg_no f i
- RegReal (RealRegPair _ _) -> panic "X86.Ppr: no reg pairs on this arch"
- RegVirtual (VirtualRegI u) -> text "%vI_" <> pprUniqueAlways u
- RegVirtual (VirtualRegHi u) -> text "%vHi_" <> pprUniqueAlways u
- RegVirtual (VirtualRegF u) -> text "%vF_" <> pprUniqueAlways u
- RegVirtual (VirtualRegD u) -> text "%vD_" <> pprUniqueAlways u
-
- where
- ppr32_reg_no :: Format -> Int -> SDoc
- ppr32_reg_no II8 = ppr32_reg_byte
- ppr32_reg_no II16 = ppr32_reg_word
- ppr32_reg_no _ = ppr32_reg_long
-
- ppr32_reg_byte i = ptext
- (case i of {
- 0 -> sLit "%al"; 1 -> sLit "%bl";
- 2 -> sLit "%cl"; 3 -> sLit "%dl";
- _ -> sLit $ "very naughty I386 byte register: " ++ show i
- })
-
- ppr32_reg_word i = ptext
- (case i of {
- 0 -> sLit "%ax"; 1 -> sLit "%bx";
- 2 -> sLit "%cx"; 3 -> sLit "%dx";
- 4 -> sLit "%si"; 5 -> sLit "%di";
- 6 -> sLit "%bp"; 7 -> sLit "%sp";
- _ -> sLit "very naughty I386 word register"
- })
-
- ppr32_reg_long i = ptext
- (case i of {
- 0 -> sLit "%eax"; 1 -> sLit "%ebx";
- 2 -> sLit "%ecx"; 3 -> sLit "%edx";
- 4 -> sLit "%esi"; 5 -> sLit "%edi";
- 6 -> sLit "%ebp"; 7 -> sLit "%esp";
- _ -> ppr_reg_float i
- })
-
- ppr64_reg_no :: Format -> Int -> SDoc
- ppr64_reg_no II8 = ppr64_reg_byte
- ppr64_reg_no II16 = ppr64_reg_word
- ppr64_reg_no II32 = ppr64_reg_long
- ppr64_reg_no _ = ppr64_reg_quad
-
- ppr64_reg_byte i = ptext
- (case i of {
- 0 -> sLit "%al"; 1 -> sLit "%bl";
- 2 -> sLit "%cl"; 3 -> sLit "%dl";
- 4 -> sLit "%sil"; 5 -> sLit "%dil"; -- new 8-bit regs!
- 6 -> sLit "%bpl"; 7 -> sLit "%spl";
- 8 -> sLit "%r8b"; 9 -> sLit "%r9b";
- 10 -> sLit "%r10b"; 11 -> sLit "%r11b";
- 12 -> sLit "%r12b"; 13 -> sLit "%r13b";
- 14 -> sLit "%r14b"; 15 -> sLit "%r15b";
- _ -> sLit $ "very naughty x86_64 byte register: " ++ show i
- })
-
- ppr64_reg_word i = ptext
- (case i of {
- 0 -> sLit "%ax"; 1 -> sLit "%bx";
- 2 -> sLit "%cx"; 3 -> sLit "%dx";
- 4 -> sLit "%si"; 5 -> sLit "%di";
- 6 -> sLit "%bp"; 7 -> sLit "%sp";
- 8 -> sLit "%r8w"; 9 -> sLit "%r9w";
- 10 -> sLit "%r10w"; 11 -> sLit "%r11w";
- 12 -> sLit "%r12w"; 13 -> sLit "%r13w";
- 14 -> sLit "%r14w"; 15 -> sLit "%r15w";
- _ -> sLit "very naughty x86_64 word register"
- })
-
- ppr64_reg_long i = ptext
- (case i of {
- 0 -> sLit "%eax"; 1 -> sLit "%ebx";
- 2 -> sLit "%ecx"; 3 -> sLit "%edx";
- 4 -> sLit "%esi"; 5 -> sLit "%edi";
- 6 -> sLit "%ebp"; 7 -> sLit "%esp";
- 8 -> sLit "%r8d"; 9 -> sLit "%r9d";
- 10 -> sLit "%r10d"; 11 -> sLit "%r11d";
- 12 -> sLit "%r12d"; 13 -> sLit "%r13d";
- 14 -> sLit "%r14d"; 15 -> sLit "%r15d";
- _ -> sLit "very naughty x86_64 register"
- })
-
- ppr64_reg_quad i = ptext
- (case i of {
- 0 -> sLit "%rax"; 1 -> sLit "%rbx";
- 2 -> sLit "%rcx"; 3 -> sLit "%rdx";
- 4 -> sLit "%rsi"; 5 -> sLit "%rdi";
- 6 -> sLit "%rbp"; 7 -> sLit "%rsp";
- 8 -> sLit "%r8"; 9 -> sLit "%r9";
- 10 -> sLit "%r10"; 11 -> sLit "%r11";
- 12 -> sLit "%r12"; 13 -> sLit "%r13";
- 14 -> sLit "%r14"; 15 -> sLit "%r15";
- _ -> ppr_reg_float i
- })
-
-ppr_reg_float :: Int -> PtrString
-ppr_reg_float i = case i of
- 16 -> sLit "%xmm0" ; 17 -> sLit "%xmm1"
- 18 -> sLit "%xmm2" ; 19 -> sLit "%xmm3"
- 20 -> sLit "%xmm4" ; 21 -> sLit "%xmm5"
- 22 -> sLit "%xmm6" ; 23 -> sLit "%xmm7"
- 24 -> sLit "%xmm8" ; 25 -> sLit "%xmm9"
- 26 -> sLit "%xmm10"; 27 -> sLit "%xmm11"
- 28 -> sLit "%xmm12"; 29 -> sLit "%xmm13"
- 30 -> sLit "%xmm14"; 31 -> sLit "%xmm15"
- _ -> sLit "very naughty x86 register"
-
-pprFormat :: Format -> SDoc
-pprFormat x
- = ptext (case x of
- II8 -> sLit "b"
- II16 -> sLit "w"
- II32 -> sLit "l"
- II64 -> sLit "q"
- FF32 -> sLit "ss" -- "scalar single-precision float" (SSE2)
- FF64 -> sLit "sd" -- "scalar double-precision float" (SSE2)
- )
-
-pprFormat_x87 :: Format -> SDoc
-pprFormat_x87 x
- = ptext $ case x of
- FF32 -> sLit "s"
- FF64 -> sLit "l"
- _ -> panic "X86.Ppr.pprFormat_x87"
-
-
-pprCond :: Cond -> SDoc
-pprCond c
- = ptext (case c of {
- GEU -> sLit "ae"; LU -> sLit "b";
- EQQ -> sLit "e"; GTT -> sLit "g";
- GE -> sLit "ge"; GU -> sLit "a";
- LTT -> sLit "l"; LE -> sLit "le";
- LEU -> sLit "be"; NE -> sLit "ne";
- NEG -> sLit "s"; POS -> sLit "ns";
- CARRY -> sLit "c"; OFLO -> sLit "o";
- PARITY -> sLit "p"; NOTPARITY -> sLit "np";
- ALWAYS -> sLit "mp"})
-
-
-pprImm :: Imm -> SDoc
-pprImm (ImmInt i) = int i
-pprImm (ImmInteger i) = integer i
-pprImm (ImmCLbl l) = ppr l
-pprImm (ImmIndex l i) = ppr l <> char '+' <> int i
-pprImm (ImmLit s) = s
-
-pprImm (ImmFloat _) = text "naughty float immediate"
-pprImm (ImmDouble _) = text "naughty double immediate"
-
-pprImm (ImmConstantSum a b) = pprImm a <> char '+' <> pprImm b
-pprImm (ImmConstantDiff a b) = pprImm a <> char '-'
- <> lparen <> pprImm b <> rparen
-
-
-
-pprAddr :: AddrMode -> SDoc
-pprAddr (ImmAddr imm off)
- = let pp_imm = pprImm imm
- in
- if (off == 0) then
- pp_imm
- else if (off < 0) then
- pp_imm <> int off
- else
- pp_imm <> char '+' <> int off
-
-pprAddr (AddrBaseIndex base index displacement)
- = sdocWithPlatform $ \platform ->
- let
- pp_disp = ppr_disp displacement
- pp_off p = pp_disp <> char '(' <> p <> char ')'
- pp_reg r = pprReg (archWordFormat (target32Bit platform)) r
- in
- case (base, index) of
- (EABaseNone, EAIndexNone) -> pp_disp
- (EABaseReg b, EAIndexNone) -> pp_off (pp_reg b)
- (EABaseRip, EAIndexNone) -> pp_off (text "%rip")
- (EABaseNone, EAIndex r i) -> pp_off (comma <> pp_reg r <> comma <> int i)
- (EABaseReg b, EAIndex r i) -> pp_off (pp_reg b <> comma <> pp_reg r
- <> comma <> int i)
- _ -> panic "X86.Ppr.pprAddr: no match"
-
- where
- ppr_disp (ImmInt 0) = empty
- ppr_disp imm = pprImm imm
-
--- | Print section header and appropriate alignment for that section.
-pprSectionAlign :: Section -> SDoc
-pprSectionAlign (Section (OtherSection _) _) =
- panic "X86.Ppr.pprSectionAlign: unknown section"
-pprSectionAlign sec@(Section seg _) =
- sdocWithPlatform $ \platform ->
- pprSectionHeader platform sec $$
- pprAlignForSection seg
-
--- | Print appropriate alignment for the given section type.
-pprAlignForSection :: SectionType -> SDoc
-pprAlignForSection seg =
- sdocWithPlatform $ \platform ->
- text ".align " <>
- case platformOS platform of
- -- Darwin: alignments are given as shifts.
- OSDarwin
- | target32Bit platform ->
- case seg of
- ReadOnlyData16 -> int 4
- CString -> int 1
- _ -> int 2
- | otherwise ->
- case seg of
- ReadOnlyData16 -> int 4
- CString -> int 1
- _ -> int 3
- -- Other: alignments are given as bytes.
- _
- | target32Bit platform ->
- case seg of
- Text -> text "4,0x90"
- ReadOnlyData16 -> int 16
- CString -> int 1
- _ -> int 4
- | otherwise ->
- case seg of
- ReadOnlyData16 -> int 16
- CString -> int 1
- _ -> int 8
-
-pprDataItem :: CmmLit -> SDoc
-pprDataItem lit = sdocWithDynFlags $ \dflags -> pprDataItem' dflags lit
-
-pprDataItem' :: DynFlags -> CmmLit -> SDoc
-pprDataItem' dflags lit
- = vcat (ppr_item (cmmTypeFormat $ cmmLitType dflags lit) lit)
- where
- platform = targetPlatform dflags
- imm = litToImm lit
-
- -- These seem to be common:
- ppr_item II8 _ = [text "\t.byte\t" <> pprImm imm]
- ppr_item II16 _ = [text "\t.word\t" <> pprImm imm]
- ppr_item II32 _ = [text "\t.long\t" <> pprImm imm]
-
- ppr_item FF32 (CmmFloat r _)
- = let bs = floatToBytes (fromRational r)
- in map (\b -> text "\t.byte\t" <> pprImm (ImmInt b)) bs
-
- ppr_item FF64 (CmmFloat r _)
- = let bs = doubleToBytes (fromRational r)
- in map (\b -> text "\t.byte\t" <> pprImm (ImmInt b)) bs
-
- ppr_item II64 _
- = case platformOS platform of
- OSDarwin
- | target32Bit platform ->
- case lit of
- CmmInt x _ ->
- [text "\t.long\t"
- <> int (fromIntegral (fromIntegral x :: Word32)),
- text "\t.long\t"
- <> int (fromIntegral
- (fromIntegral (x `shiftR` 32) :: Word32))]
- _ -> panic "X86.Ppr.ppr_item: no match for II64"
- | otherwise ->
- [text "\t.quad\t" <> pprImm imm]
- _
- | target32Bit platform ->
- [text "\t.quad\t" <> pprImm imm]
- | otherwise ->
- -- x86_64: binutils can't handle the R_X86_64_PC64
- -- relocation type, which means we can't do
- -- pc-relative 64-bit addresses. Fortunately we're
- -- assuming the small memory model, in which all such
- -- offsets will fit into 32 bits, so we have to stick
- -- to 32-bit offset fields and modify the RTS
- -- appropriately
- --
- -- See Note [x86-64-relative] in includes/rts/storage/InfoTables.h
- --
- case lit of
- -- A relative relocation:
- CmmLabelDiffOff _ _ _ _ ->
- [text "\t.long\t" <> pprImm imm,
- text "\t.long\t0"]
- _ ->
- [text "\t.quad\t" <> pprImm imm]
-
- ppr_item _ _
- = panic "X86.Ppr.ppr_item: no match"
-
-
-asmComment :: SDoc -> SDoc
-asmComment c = whenPprDebug $ text "# " <> c
-
-pprInstr :: Instr -> SDoc
-
-pprInstr (COMMENT s)
- = asmComment (ftext s)
-
-pprInstr (LOCATION file line col _name)
- = text "\t.loc " <> ppr file <+> ppr line <+> ppr col
-
-pprInstr (DELTA d)
- = asmComment $ text ("\tdelta = " ++ show d)
-
-pprInstr (NEWBLOCK _)
- = panic "PprMach.pprInstr: NEWBLOCK"
-
-pprInstr (UNWIND lbl d)
- = asmComment (text "\tunwind = " <> ppr d)
- $$ ppr lbl <> colon
-
-pprInstr (LDATA _ _)
- = panic "PprMach.pprInstr: LDATA"
-
-{-
-pprInstr (SPILL reg slot)
- = hcat [
- text "\tSPILL",
- char ' ',
- pprUserReg reg,
- comma,
- text "SLOT" <> parens (int slot)]
-
-pprInstr (RELOAD slot reg)
- = hcat [
- text "\tRELOAD",
- char ' ',
- text "SLOT" <> parens (int slot),
- comma,
- pprUserReg reg]
--}
-
--- Replace 'mov $0x0,%reg' by 'xor %reg,%reg', which is smaller and cheaper.
--- The code generator catches most of these already, but not all.
-pprInstr (MOV format (OpImm (ImmInt 0)) dst@(OpReg _))
- = pprInstr (XOR format' dst dst)
- where format' = case format of
- II64 -> II32 -- 32-bit version is equivalent, and smaller
- _ -> format
-pprInstr (MOV format src dst)
- = pprFormatOpOp (sLit "mov") format src dst
-
-pprInstr (CMOV cc format src dst)
- = pprCondOpReg (sLit "cmov") format cc src dst
-
-pprInstr (MOVZxL II32 src dst) = pprFormatOpOp (sLit "mov") II32 src dst
- -- 32-to-64 bit zero extension on x86_64 is accomplished by a simple
- -- movl. But we represent it as a MOVZxL instruction, because
- -- the reg alloc would tend to throw away a plain reg-to-reg
- -- move, and we still want it to do that.
-
-pprInstr (MOVZxL formats src dst)
- = pprFormatOpOpCoerce (sLit "movz") formats II32 src dst
- -- zero-extension only needs to extend to 32 bits: on x86_64,
- -- the remaining zero-extension to 64 bits is automatic, and the 32-bit
- -- instruction is shorter.
-
-pprInstr (MOVSxL formats src dst)
- = sdocWithPlatform $ \platform ->
- pprFormatOpOpCoerce (sLit "movs") formats (archWordFormat (target32Bit platform)) src dst
-
--- here we do some patching, since the physical registers are only set late
--- in the code generation.
-pprInstr (LEA format (OpAddr (AddrBaseIndex (EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3))
- | reg1 == reg3
- = pprFormatOpOp (sLit "add") format (OpReg reg2) dst
-
-pprInstr (LEA format (OpAddr (AddrBaseIndex (EABaseReg reg1) (EAIndex reg2 1) (ImmInt 0))) dst@(OpReg reg3))
- | reg2 == reg3
- = pprFormatOpOp (sLit "add") format (OpReg reg1) dst
-
-pprInstr (LEA format (OpAddr (AddrBaseIndex (EABaseReg reg1) EAIndexNone displ)) dst@(OpReg reg3))
- | reg1 == reg3
- = pprInstr (ADD format (OpImm displ) dst)
-
-pprInstr (LEA format src dst) = pprFormatOpOp (sLit "lea") format src dst
-
-pprInstr (ADD format (OpImm (ImmInt (-1))) dst)
- = pprFormatOp (sLit "dec") format dst
-pprInstr (ADD format (OpImm (ImmInt 1)) dst)
- = pprFormatOp (sLit "inc") format dst
-pprInstr (ADD format src dst) = pprFormatOpOp (sLit "add") format src dst
-pprInstr (ADC format src dst) = pprFormatOpOp (sLit "adc") format src dst
-pprInstr (SUB format src dst) = pprFormatOpOp (sLit "sub") format src dst
-pprInstr (SBB format src dst) = pprFormatOpOp (sLit "sbb") format src dst
-pprInstr (IMUL format op1 op2) = pprFormatOpOp (sLit "imul") format op1 op2
-
-pprInstr (ADD_CC format src dst)
- = pprFormatOpOp (sLit "add") format src dst
-pprInstr (SUB_CC format src dst)
- = pprFormatOpOp (sLit "sub") format src dst
-
-{- A hack. The Intel documentation says that "The two and three
- operand forms [of IMUL] may also be used with unsigned operands
- because the lower half of the product is the same regardless if
- (sic) the operands are signed or unsigned. The CF and OF flags,
- however, cannot be used to determine if the upper half of the
- result is non-zero." So there.
--}
-
--- Use a 32-bit instruction when possible as it saves a byte.
--- Notably, extracting the tag bits of a pointer has this form.
--- TODO: we could save a byte in a subsequent CMP instruction too,
--- but need something like a peephole pass for this
-pprInstr (AND II64 src@(OpImm (ImmInteger mask)) dst)
- | 0 <= mask && mask < 0xffffffff
- = pprInstr (AND II32 src dst)
-pprInstr (AND FF32 src dst) = pprOpOp (sLit "andps") FF32 src dst
-pprInstr (AND FF64 src dst) = pprOpOp (sLit "andpd") FF64 src dst
-pprInstr (AND format src dst) = pprFormatOpOp (sLit "and") format src dst
-pprInstr (OR format src dst) = pprFormatOpOp (sLit "or") format src dst
-
-pprInstr (XOR FF32 src dst) = pprOpOp (sLit "xorps") FF32 src dst
-pprInstr (XOR FF64 src dst) = pprOpOp (sLit "xorpd") FF64 src dst
-pprInstr (XOR format src dst) = pprFormatOpOp (sLit "xor") format src dst
-
-pprInstr (POPCNT format src dst) = pprOpOp (sLit "popcnt") format src (OpReg dst)
-pprInstr (LZCNT format src dst) = pprOpOp (sLit "lzcnt") format src (OpReg dst)
-pprInstr (TZCNT format src dst) = pprOpOp (sLit "tzcnt") format src (OpReg dst)
-pprInstr (BSF format src dst) = pprOpOp (sLit "bsf") format src (OpReg dst)
-pprInstr (BSR format src dst) = pprOpOp (sLit "bsr") format src (OpReg dst)
-
-pprInstr (PDEP format src mask dst) = pprFormatOpOpReg (sLit "pdep") format src mask dst
-pprInstr (PEXT format src mask dst) = pprFormatOpOpReg (sLit "pext") format src mask dst
-
-pprInstr (PREFETCH NTA format src ) = pprFormatOp_ (sLit "prefetchnta") format src
-pprInstr (PREFETCH Lvl0 format src) = pprFormatOp_ (sLit "prefetcht0") format src
-pprInstr (PREFETCH Lvl1 format src) = pprFormatOp_ (sLit "prefetcht1") format src
-pprInstr (PREFETCH Lvl2 format src) = pprFormatOp_ (sLit "prefetcht2") format src
-
-pprInstr (NOT format op) = pprFormatOp (sLit "not") format op
-pprInstr (BSWAP format op) = pprFormatOp (sLit "bswap") format (OpReg op)
-pprInstr (NEGI format op) = pprFormatOp (sLit "neg") format op
-
-pprInstr (SHL format src dst) = pprShift (sLit "shl") format src dst
-pprInstr (SAR format src dst) = pprShift (sLit "sar") format src dst
-pprInstr (SHR format src dst) = pprShift (sLit "shr") format src dst
-
-pprInstr (BT format imm src) = pprFormatImmOp (sLit "bt") format imm src
-
-pprInstr (CMP format src dst)
- | isFloatFormat format = pprFormatOpOp (sLit "ucomi") format src dst -- SSE2
- | otherwise = pprFormatOpOp (sLit "cmp") format src dst
-
-pprInstr (TEST format src dst) = sdocWithPlatform $ \platform ->
- let format' = case (src,dst) of
- -- Match instructions like 'test $0x3,%esi' or 'test $0x7,%rbx'.
- -- We can replace them by equivalent, but smaller instructions
- -- by reducing the size of the immediate operand as far as possible.
- -- (We could handle masks larger than a single byte too,
- -- but it would complicate the code considerably
- -- and tag checks are by far the most common case.)
- -- The mask must have the high bit clear for this smaller encoding
- -- to be completely equivalent to the original; in particular so
- -- that the signed comparison condition bits are the same as they
- -- would be if doing a full word comparison. See #13425.
- (OpImm (ImmInteger mask), OpReg dstReg)
- | 0 <= mask && mask < 128 -> minSizeOfReg platform dstReg
- _ -> format
- in pprFormatOpOp (sLit "test") format' src dst
- where
- minSizeOfReg platform (RegReal (RealRegSingle i))
- | target32Bit platform && i <= 3 = II8 -- al, bl, cl, dl
- | target32Bit platform && i <= 7 = II16 -- si, di, bp, sp
- | not (target32Bit platform) && i <= 15 = II8 -- al .. r15b
- minSizeOfReg _ _ = format -- other
-
-pprInstr (PUSH format op) = pprFormatOp (sLit "push") format op
-pprInstr (POP format op) = pprFormatOp (sLit "pop") format op
-
--- both unused (SDM):
--- pprInstr PUSHA = text "\tpushal"
--- pprInstr POPA = text "\tpopal"
-
-pprInstr NOP = text "\tnop"
-pprInstr (CLTD II8) = text "\tcbtw"
-pprInstr (CLTD II16) = text "\tcwtd"
-pprInstr (CLTD II32) = text "\tcltd"
-pprInstr (CLTD II64) = text "\tcqto"
-pprInstr (CLTD x) = panic $ "pprInstr: " ++ show x
-
-pprInstr (SETCC cond op) = pprCondInstr (sLit "set") cond (pprOperand II8 op)
-
-pprInstr (JXX cond blockid)
- = pprCondInstr (sLit "j") cond (ppr lab)
- where lab = blockLbl blockid
-
-pprInstr (JXX_GBL cond imm) = pprCondInstr (sLit "j") cond (pprImm imm)
-
-pprInstr (JMP (OpImm imm) _) = text "\tjmp " <> pprImm imm
-pprInstr (JMP op _) = sdocWithPlatform $ \platform ->
- text "\tjmp *"
- <> pprOperand (archWordFormat (target32Bit platform)) op
-pprInstr (JMP_TBL op _ _ _) = pprInstr (JMP op [])
-pprInstr (CALL (Left imm) _) = text "\tcall " <> pprImm imm
-pprInstr (CALL (Right reg) _) = sdocWithPlatform $ \platform ->
- text "\tcall *"
- <> pprReg (archWordFormat (target32Bit platform)) reg
-
-pprInstr (IDIV fmt op) = pprFormatOp (sLit "idiv") fmt op
-pprInstr (DIV fmt op) = pprFormatOp (sLit "div") fmt op
-pprInstr (IMUL2 fmt op) = pprFormatOp (sLit "imul") fmt op
-
--- x86_64 only
-pprInstr (MUL format op1 op2) = pprFormatOpOp (sLit "mul") format op1 op2
-pprInstr (MUL2 format op) = pprFormatOp (sLit "mul") format op
-
-pprInstr (FDIV format op1 op2) = pprFormatOpOp (sLit "div") format op1 op2
-pprInstr (SQRT format op1 op2) = pprFormatOpReg (sLit "sqrt") format op1 op2
-
-pprInstr (CVTSS2SD from to) = pprRegReg (sLit "cvtss2sd") from to
-pprInstr (CVTSD2SS from to) = pprRegReg (sLit "cvtsd2ss") from to
-pprInstr (CVTTSS2SIQ fmt from to) = pprFormatFormatOpReg (sLit "cvttss2si") FF32 fmt from to
-pprInstr (CVTTSD2SIQ fmt from to) = pprFormatFormatOpReg (sLit "cvttsd2si") FF64 fmt from to
-pprInstr (CVTSI2SS fmt from to) = pprFormatOpReg (sLit "cvtsi2ss") fmt from to
-pprInstr (CVTSI2SD fmt from to) = pprFormatOpReg (sLit "cvtsi2sd") fmt from to
-
- -- FETCHGOT for PIC on ELF platforms
-pprInstr (FETCHGOT reg)
- = vcat [ text "\tcall 1f",
- hcat [ text "1:\tpopl\t", pprReg II32 reg ],
- hcat [ text "\taddl\t$_GLOBAL_OFFSET_TABLE_+(.-1b), ",
- pprReg II32 reg ]
- ]
-
- -- FETCHPC for PIC on Darwin/x86
- -- get the instruction pointer into a register
- -- (Terminology note: the IP is called Program Counter on PPC,
- -- and it's a good thing to use the same name on both platforms)
-pprInstr (FETCHPC reg)
- = vcat [ text "\tcall 1f",
- hcat [ text "1:\tpopl\t", pprReg II32 reg ]
- ]
-
-
--- the
--- GST fmt src addr ==> FLD dst ; FSTPsz addr
-pprInstr g@(X87Store fmt addr)
- = pprX87 g (hcat [gtab,
- text "fstp", pprFormat_x87 fmt, gsp, pprAddr addr])
-
-
--- Atomics
-
-pprInstr (LOCK i) = text "\tlock" $$ pprInstr i
-
-pprInstr MFENCE = text "\tmfence"
-
-pprInstr (XADD format src dst) = pprFormatOpOp (sLit "xadd") format src dst
-
-pprInstr (CMPXCHG format src dst)
- = pprFormatOpOp (sLit "cmpxchg") format src dst
-
-
-
---------------------------
--- some left over
-
-
-
-gtab :: SDoc
-gtab = char '\t'
-
-gsp :: SDoc
-gsp = char ' '
-
-
-
-pprX87 :: Instr -> SDoc -> SDoc
-pprX87 fake actual
- = (char '#' <> pprX87Instr fake) $$ actual
-
-pprX87Instr :: Instr -> SDoc
-pprX87Instr (X87Store fmt dst) = pprFormatAddr (sLit "gst") fmt dst
-pprX87Instr _ = panic "X86.Ppr.pprX87Instr: no match"
-
-pprDollImm :: Imm -> SDoc
-pprDollImm i = text "$" <> pprImm i
-
-
-pprOperand :: Format -> Operand -> SDoc
-pprOperand f (OpReg r) = pprReg f r
-pprOperand _ (OpImm i) = pprDollImm i
-pprOperand _ (OpAddr ea) = pprAddr ea
-
-
-pprMnemonic_ :: PtrString -> SDoc
-pprMnemonic_ name =
- char '\t' <> ptext name <> space
-
-
-pprMnemonic :: PtrString -> Format -> SDoc
-pprMnemonic name format =
- char '\t' <> ptext name <> pprFormat format <> space
-
-
-pprFormatImmOp :: PtrString -> Format -> Imm -> Operand -> SDoc
-pprFormatImmOp name format imm op1
- = hcat [
- pprMnemonic name format,
- char '$',
- pprImm imm,
- comma,
- pprOperand format op1
- ]
-
-
-pprFormatOp_ :: PtrString -> Format -> Operand -> SDoc
-pprFormatOp_ name format op1
- = hcat [
- pprMnemonic_ name ,
- pprOperand format op1
- ]
-
-pprFormatOp :: PtrString -> Format -> Operand -> SDoc
-pprFormatOp name format op1
- = hcat [
- pprMnemonic name format,
- pprOperand format op1
- ]
-
-
-pprFormatOpOp :: PtrString -> Format -> Operand -> Operand -> SDoc
-pprFormatOpOp name format op1 op2
- = hcat [
- pprMnemonic name format,
- pprOperand format op1,
- comma,
- pprOperand format op2
- ]
-
-
-pprOpOp :: PtrString -> Format -> Operand -> Operand -> SDoc
-pprOpOp name format op1 op2
- = hcat [
- pprMnemonic_ name,
- pprOperand format op1,
- comma,
- pprOperand format op2
- ]
-
-
-
-pprRegReg :: PtrString -> Reg -> Reg -> SDoc
-pprRegReg name reg1 reg2
- = sdocWithPlatform $ \platform ->
- hcat [
- pprMnemonic_ name,
- pprReg (archWordFormat (target32Bit platform)) reg1,
- comma,
- pprReg (archWordFormat (target32Bit platform)) reg2
- ]
-
-
-pprFormatOpReg :: PtrString -> Format -> Operand -> Reg -> SDoc
-pprFormatOpReg name format op1 reg2
- = sdocWithPlatform $ \platform ->
- hcat [
- pprMnemonic name format,
- pprOperand format op1,
- comma,
- pprReg (archWordFormat (target32Bit platform)) reg2
- ]
-
-pprCondOpReg :: PtrString -> Format -> Cond -> Operand -> Reg -> SDoc
-pprCondOpReg name format cond op1 reg2
- = hcat [
- char '\t',
- ptext name,
- pprCond cond,
- space,
- pprOperand format op1,
- comma,
- pprReg format reg2
- ]
-
-pprFormatFormatOpReg :: PtrString -> Format -> Format -> Operand -> Reg -> SDoc
-pprFormatFormatOpReg name format1 format2 op1 reg2
- = hcat [
- pprMnemonic name format2,
- pprOperand format1 op1,
- comma,
- pprReg format2 reg2
- ]
-
-pprFormatOpOpReg :: PtrString -> Format -> Operand -> Operand -> Reg -> SDoc
-pprFormatOpOpReg name format op1 op2 reg3
- = hcat [
- pprMnemonic name format,
- pprOperand format op1,
- comma,
- pprOperand format op2,
- comma,
- pprReg format reg3
- ]
-
-
-
-pprFormatAddr :: PtrString -> Format -> AddrMode -> SDoc
-pprFormatAddr name format op
- = hcat [
- pprMnemonic name format,
- comma,
- pprAddr op
- ]
-
-pprShift :: PtrString -> Format -> Operand -> Operand -> SDoc
-pprShift name format src dest
- = hcat [
- pprMnemonic name format,
- pprOperand II8 src, -- src is 8-bit sized
- comma,
- pprOperand format dest
- ]
-
-
-pprFormatOpOpCoerce :: PtrString -> Format -> Format -> Operand -> Operand -> SDoc
-pprFormatOpOpCoerce name format1 format2 op1 op2
- = hcat [ char '\t', ptext name, pprFormat format1, pprFormat format2, space,
- pprOperand format1 op1,
- comma,
- pprOperand format2 op2
- ]
-
-
-pprCondInstr :: PtrString -> Cond -> SDoc -> SDoc
-pprCondInstr name cond arg
- = hcat [ char '\t', ptext name, pprCond cond, space, arg]
diff --git a/compiler/nativeGen/X86/RegInfo.hs b/compiler/nativeGen/X86/RegInfo.hs
deleted file mode 100644
index eb5e9bc7fc..0000000000
--- a/compiler/nativeGen/X86/RegInfo.hs
+++ /dev/null
@@ -1,73 +0,0 @@
-{-# LANGUAGE CPP #-}
-module X86.RegInfo (
- mkVirtualReg,
- regDotColor
-)
-
-where
-
-#include "HsVersions.h"
-
-import GhcPrelude
-
-import Format
-import Reg
-
-import Outputable
-import GHC.Platform
-import Unique
-
-import UniqFM
-import X86.Regs
-
-
-mkVirtualReg :: Unique -> Format -> VirtualReg
-mkVirtualReg u format
- = case format of
- FF32 -> VirtualRegD u
- -- for scalar F32, we use the same xmm as F64!
- -- this is a hack that needs some improvement.
- -- For now we map both to being allocated as "Double" Registers
- -- on X86/X86_64
- FF64 -> VirtualRegD u
- _other -> VirtualRegI u
-
-regDotColor :: Platform -> RealReg -> SDoc
-regDotColor platform reg
- = case (lookupUFM (regColors platform) reg) of
- Just str -> text str
- _ -> panic "Register not assigned a color"
-
-regColors :: Platform -> UniqFM [Char]
-regColors platform = listToUFM (normalRegColors platform)
-
-normalRegColors :: Platform -> [(Reg,String)]
-normalRegColors platform =
- zip (map regSingle [0..lastint platform]) colors
- ++ zip (map regSingle [firstxmm..lastxmm platform]) greys
- where
- -- 16 colors - enough for amd64 gp regs
- colors = ["#800000","#ff0000","#808000","#ffff00","#008000"
- ,"#00ff00","#008080","#00ffff","#000080","#0000ff"
- ,"#800080","#ff00ff","#87005f","#875f00","#87af00"
- ,"#ff00af"]
-
- -- 16 shades of grey, enough for the currently supported
- -- SSE extensions.
- greys = ["#0e0e0e","#1c1c1c","#2a2a2a","#383838","#464646"
- ,"#545454","#626262","#707070","#7e7e7e","#8c8c8c"
- ,"#9a9a9a","#a8a8a8","#b6b6b6","#c4c4c4","#d2d2d2"
- ,"#e0e0e0"]
-
-
-
--- 32 shades of grey - use for avx 512 if we ever need it
--- greys = ["#070707","#0e0e0e","#151515","#1c1c1c"
--- ,"#232323","#2a2a2a","#313131","#383838","#3f3f3f"
--- ,"#464646","#4d4d4d","#545454","#5b5b5b","#626262"
--- ,"#696969","#707070","#777777","#7e7e7e","#858585"
--- ,"#8c8c8c","#939393","#9a9a9a","#a1a1a1","#a8a8a8"
--- ,"#afafaf","#b6b6b6","#bdbdbd","#c4c4c4","#cbcbcb"
--- ,"#d2d2d2","#d9d9d9","#e0e0e0"]
-
-
diff --git a/compiler/nativeGen/X86/Regs.hs b/compiler/nativeGen/X86/Regs.hs
deleted file mode 100644
index 4c8943a284..0000000000
--- a/compiler/nativeGen/X86/Regs.hs
+++ /dev/null
@@ -1,442 +0,0 @@
-{-# LANGUAGE CPP #-}
-
-module X86.Regs (
- -- squeese functions for the graph allocator
- virtualRegSqueeze,
- realRegSqueeze,
-
- -- immediates
- Imm(..),
- strImmLit,
- litToImm,
-
- -- addressing modes
- AddrMode(..),
- addrOffset,
-
- -- registers
- spRel,
- argRegs,
- allArgRegs,
- allIntArgRegs,
- callClobberedRegs,
- instrClobberedRegs,
- allMachRegNos,
- classOfRealReg,
- showReg,
-
- -- machine specific
- EABase(..), EAIndex(..), addrModeRegs,
-
- eax, ebx, ecx, edx, esi, edi, ebp, esp,
-
-
- rax, rbx, rcx, rdx, rsi, rdi, rbp, rsp,
- r8, r9, r10, r11, r12, r13, r14, r15,
- lastint,
- xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
- xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
- xmm,
- firstxmm, lastxmm,
-
- ripRel,
- allFPArgRegs,
-
- allocatableRegs
-)
-
-where
-
-#include "HsVersions.h"
-
-import GhcPrelude
-
-import GHC.Platform.Regs
-import Reg
-import RegClass
-
-import GHC.Cmm
-import GHC.Cmm.CLabel ( CLabel )
-import GHC.Driver.Session
-import Outputable
-import GHC.Platform
-
-import qualified Data.Array as A
-
--- | regSqueeze_class reg
--- Calculate the maximum number of register colors that could be
--- denied to a node of this class due to having this reg
--- as a neighbour.
---
-{-# INLINE virtualRegSqueeze #-}
-virtualRegSqueeze :: RegClass -> VirtualReg -> Int
-
-virtualRegSqueeze cls vr
- = case cls of
- RcInteger
- -> case vr of
- VirtualRegI{} -> 1
- VirtualRegHi{} -> 1
- _other -> 0
-
- RcDouble
- -> case vr of
- VirtualRegD{} -> 1
- VirtualRegF{} -> 0
- _other -> 0
-
-
- _other -> 0
-
-{-# INLINE realRegSqueeze #-}
-realRegSqueeze :: RegClass -> RealReg -> Int
-realRegSqueeze cls rr
- = case cls of
- RcInteger
- -> case rr of
- RealRegSingle regNo
- | regNo < firstxmm -> 1
- | otherwise -> 0
-
- RealRegPair{} -> 0
-
- RcDouble
- -> case rr of
- RealRegSingle regNo
- | regNo >= firstxmm -> 1
- | otherwise -> 0
-
- RealRegPair{} -> 0
-
-
- _other -> 0
-
--- -----------------------------------------------------------------------------
--- Immediates
-
-data Imm
- = ImmInt Int
- | ImmInteger Integer -- Sigh.
- | ImmCLbl CLabel -- AbstractC Label (with baggage)
- | ImmLit SDoc -- Simple string
- | ImmIndex CLabel Int
- | ImmFloat Rational
- | ImmDouble Rational
- | ImmConstantSum Imm Imm
- | ImmConstantDiff Imm Imm
-
-strImmLit :: String -> Imm
-strImmLit s = ImmLit (text s)
-
-
-litToImm :: CmmLit -> Imm
-litToImm (CmmInt i w) = ImmInteger (narrowS w i)
- -- narrow to the width: a CmmInt might be out of
- -- range, but we assume that ImmInteger only contains
- -- in-range values. A signed value should be fine here.
-litToImm (CmmFloat f W32) = ImmFloat f
-litToImm (CmmFloat f W64) = ImmDouble f
-litToImm (CmmLabel l) = ImmCLbl l
-litToImm (CmmLabelOff l off) = ImmIndex l off
-litToImm (CmmLabelDiffOff l1 l2 off _)
- = ImmConstantSum
- (ImmConstantDiff (ImmCLbl l1) (ImmCLbl l2))
- (ImmInt off)
-litToImm _ = panic "X86.Regs.litToImm: no match"
-
--- addressing modes ------------------------------------------------------------
-
-data AddrMode
- = AddrBaseIndex EABase EAIndex Displacement
- | ImmAddr Imm Int
-
-data EABase = EABaseNone | EABaseReg Reg | EABaseRip
-data EAIndex = EAIndexNone | EAIndex Reg Int
-type Displacement = Imm
-
-
-addrOffset :: AddrMode -> Int -> Maybe AddrMode
-addrOffset addr off
- = case addr of
- ImmAddr i off0 -> Just (ImmAddr i (off0 + off))
-
- AddrBaseIndex r i (ImmInt n) -> Just (AddrBaseIndex r i (ImmInt (n + off)))
- AddrBaseIndex r i (ImmInteger n)
- -> Just (AddrBaseIndex r i (ImmInt (fromInteger (n + toInteger off))))
-
- AddrBaseIndex r i (ImmCLbl lbl)
- -> Just (AddrBaseIndex r i (ImmIndex lbl off))
-
- AddrBaseIndex r i (ImmIndex lbl ix)
- -> Just (AddrBaseIndex r i (ImmIndex lbl (ix+off)))
-
- _ -> Nothing -- in theory, shouldn't happen
-
-
-addrModeRegs :: AddrMode -> [Reg]
-addrModeRegs (AddrBaseIndex b i _) = b_regs ++ i_regs
- where
- b_regs = case b of { EABaseReg r -> [r]; _ -> [] }
- i_regs = case i of { EAIndex r _ -> [r]; _ -> [] }
-addrModeRegs _ = []
-
-
--- registers -------------------------------------------------------------------
-
--- @spRel@ gives us a stack relative addressing mode for volatile
--- temporaries and for excess call arguments. @fpRel@, where
--- applicable, is the same but for the frame pointer.
-
-
-spRel :: DynFlags
- -> Int -- ^ desired stack offset in bytes, positive or negative
- -> AddrMode
-spRel dflags n
- | target32Bit (targetPlatform dflags)
- = AddrBaseIndex (EABaseReg esp) EAIndexNone (ImmInt n)
- | otherwise
- = AddrBaseIndex (EABaseReg rsp) EAIndexNone (ImmInt n)
-
--- The register numbers must fit into 32 bits on x86, so that we can
--- use a Word32 to represent the set of free registers in the register
--- allocator.
-
-
-
-firstxmm :: RegNo
-firstxmm = 16
-
--- on 32bit platformOSs, only the first 8 XMM/YMM/ZMM registers are available
-lastxmm :: Platform -> RegNo
-lastxmm platform
- | target32Bit platform = firstxmm + 7 -- xmm0 - xmmm7
- | otherwise = firstxmm + 15 -- xmm0 -xmm15
-
-lastint :: Platform -> RegNo
-lastint platform
- | target32Bit platform = 7 -- not %r8..%r15
- | otherwise = 15
-
-intregnos :: Platform -> [RegNo]
-intregnos platform = [0 .. lastint platform]
-
-
-
-xmmregnos :: Platform -> [RegNo]
-xmmregnos platform = [firstxmm .. lastxmm platform]
-
-floatregnos :: Platform -> [RegNo]
-floatregnos platform = xmmregnos platform
-
--- argRegs is the set of regs which are read for an n-argument call to C.
--- For archs which pass all args on the stack (x86), is empty.
--- Sparc passes up to the first 6 args in regs.
-argRegs :: RegNo -> [Reg]
-argRegs _ = panic "MachRegs.argRegs(x86): should not be used!"
-
--- | The complete set of machine registers.
-allMachRegNos :: Platform -> [RegNo]
-allMachRegNos platform = intregnos platform ++ floatregnos platform
-
--- | Take the class of a register.
-{-# INLINE classOfRealReg #-}
-classOfRealReg :: Platform -> RealReg -> RegClass
--- On x86, we might want to have an 8-bit RegClass, which would
--- contain just regs 1-4 (the others don't have 8-bit versions).
--- However, we can get away without this at the moment because the
--- only allocatable integer regs are also 8-bit compatible (1, 3, 4).
-classOfRealReg platform reg
- = case reg of
- RealRegSingle i
- | i <= lastint platform -> RcInteger
- | i <= lastxmm platform -> RcDouble
- | otherwise -> panic "X86.Reg.classOfRealReg registerSingle too high"
- _ -> panic "X86.Regs.classOfRealReg: RegPairs on this arch"
-
--- | Get the name of the register with this number.
--- NOTE: fixme, we dont track which "way" the XMM registers are used
-showReg :: Platform -> RegNo -> String
-showReg platform n
- | n >= firstxmm && n <= lastxmm platform = "%xmm" ++ show (n-firstxmm)
- | n >= 8 && n < firstxmm = "%r" ++ show n
- | otherwise = regNames platform A.! n
-
-regNames :: Platform -> A.Array Int String
-regNames platform
- = if target32Bit platform
- then A.listArray (0,8) ["%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi", "%ebp", "%esp"]
- else A.listArray (0,8) ["%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi", "%rbp", "%rsp"]
-
-
-
--- machine specific ------------------------------------------------------------
-
-
-{-
-Intel x86 architecture:
-- All registers except 7 (esp) are available for use.
-- Only ebx, esi, edi and esp are available across a C call (they are callee-saves).
-- Registers 0-7 have 16-bit counterparts (ax, bx etc.)
-- Registers 0-3 have 8 bit counterparts (ah, bh etc.)
-
-The fp registers are all Double registers; we don't have any RcFloat class
-regs. @regClass@ barfs if you give it a VirtualRegF, and mkVReg above should
-never generate them.
-
-TODO: cleanup modelling float vs double registers and how they are the same class.
--}
-
-
-eax, ebx, ecx, edx, esp, ebp, esi, edi :: Reg
-
-eax = regSingle 0
-ebx = regSingle 1
-ecx = regSingle 2
-edx = regSingle 3
-esi = regSingle 4
-edi = regSingle 5
-ebp = regSingle 6
-esp = regSingle 7
-
-
-
-
-{-
-AMD x86_64 architecture:
-- All 16 integer registers are addressable as 8, 16, 32 and 64-bit values:
-
- 8 16 32 64
- ---------------------
- al ax eax rax
- bl bx ebx rbx
- cl cx ecx rcx
- dl dx edx rdx
- sil si esi rsi
- dil si edi rdi
- bpl bp ebp rbp
- spl sp esp rsp
- r10b r10w r10d r10
- r11b r11w r11d r11
- r12b r12w r12d r12
- r13b r13w r13d r13
- r14b r14w r14d r14
- r15b r15w r15d r15
--}
-
-rax, rbx, rcx, rdx, rsp, rbp, rsi, rdi,
- r8, r9, r10, r11, r12, r13, r14, r15,
- xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,
- xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15 :: Reg
-
-rax = regSingle 0
-rbx = regSingle 1
-rcx = regSingle 2
-rdx = regSingle 3
-rsi = regSingle 4
-rdi = regSingle 5
-rbp = regSingle 6
-rsp = regSingle 7
-r8 = regSingle 8
-r9 = regSingle 9
-r10 = regSingle 10
-r11 = regSingle 11
-r12 = regSingle 12
-r13 = regSingle 13
-r14 = regSingle 14
-r15 = regSingle 15
-xmm0 = regSingle 16
-xmm1 = regSingle 17
-xmm2 = regSingle 18
-xmm3 = regSingle 19
-xmm4 = regSingle 20
-xmm5 = regSingle 21
-xmm6 = regSingle 22
-xmm7 = regSingle 23
-xmm8 = regSingle 24
-xmm9 = regSingle 25
-xmm10 = regSingle 26
-xmm11 = regSingle 27
-xmm12 = regSingle 28
-xmm13 = regSingle 29
-xmm14 = regSingle 30
-xmm15 = regSingle 31
-
-ripRel :: Displacement -> AddrMode
-ripRel imm = AddrBaseIndex EABaseRip EAIndexNone imm
-
-
- -- so we can re-use some x86 code:
-{-
-eax = rax
-ebx = rbx
-ecx = rcx
-edx = rdx
-esi = rsi
-edi = rdi
-ebp = rbp
-esp = rsp
--}
-
-xmm :: RegNo -> Reg
-xmm n = regSingle (firstxmm+n)
-
-
-
-
--- | these are the regs which we cannot assume stay alive over a C call.
-callClobberedRegs :: Platform -> [Reg]
--- caller-saves registers
-callClobberedRegs platform
- | target32Bit platform = [eax,ecx,edx] ++ map regSingle (floatregnos platform)
- | platformOS platform == OSMinGW32
- = [rax,rcx,rdx,r8,r9,r10,r11]
- -- Only xmm0-5 are caller-saves registers on 64bit windows.
- -- ( https://docs.microsoft.com/en-us/cpp/build/register-usage )
- -- For details check the Win64 ABI.
- ++ map xmm [0 .. 5]
- | otherwise
- -- all xmm regs are caller-saves
- -- caller-saves registers
- = [rax,rcx,rdx,rsi,rdi,r8,r9,r10,r11]
- ++ map regSingle (floatregnos platform)
-
-allArgRegs :: Platform -> [(Reg, Reg)]
-allArgRegs platform
- | platformOS platform == OSMinGW32 = zip [rcx,rdx,r8,r9]
- (map regSingle [firstxmm ..])
- | otherwise = panic "X86.Regs.allArgRegs: not defined for this arch"
-
-allIntArgRegs :: Platform -> [Reg]
-allIntArgRegs platform
- | (platformOS platform == OSMinGW32) || target32Bit platform
- = panic "X86.Regs.allIntArgRegs: not defined for this platform"
- | otherwise = [rdi,rsi,rdx,rcx,r8,r9]
-
-
--- | on 64bit platforms we pass the first 8 float/double arguments
--- in the xmm registers.
-allFPArgRegs :: Platform -> [Reg]
-allFPArgRegs platform
- | platformOS platform == OSMinGW32
- = panic "X86.Regs.allFPArgRegs: not defined for this platform"
- | otherwise = map regSingle [firstxmm .. firstxmm + 7 ]
-
-
--- Machine registers which might be clobbered by instructions that
--- generate results into fixed registers, or need arguments in a fixed
--- register.
-instrClobberedRegs :: Platform -> [Reg]
-instrClobberedRegs platform
- | target32Bit platform = [ eax, ecx, edx ]
- | otherwise = [ rax, rcx, rdx ]
-
---
-
--- allocatableRegs is allMachRegNos with the fixed-use regs removed.
--- i.e., these are the regs for which we are prepared to allow the
--- register allocator to attempt to map VRegs to.
-allocatableRegs :: Platform -> [RealReg]
-allocatableRegs platform
- = let isFree i = freeReg platform i
- in map RealRegSingle $ filter isFree (allMachRegNos platform)
-