diff options
48 files changed, 3594 insertions, 132 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 3e0db17aa4..2d33eaef13 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -740,14 +740,6 @@ validate-aarch64-darwin: tags: - aarch64-linux -.build-aarch64-linux-deb10-llvm: - extends: .build-aarch64-linux-deb10 - stage: full-build - variables: - BUILD_FLAVOUR: perf-llvm - tags: - - aarch64-linux - validate-aarch64-linux-deb10: extends: .build-aarch64-linux-deb10 artifacts: @@ -760,6 +752,20 @@ nightly-aarch64-linux-deb10: variables: TEST_TYPE: slowtest +.build-aarch64-linux-deb10-llvm: + extends: .build-aarch64-linux-deb10 + stage: full-build + variables: + BUILD_FLAVOUR: perf-llvm + tags: + - aarch64-linux + +validate-aarch64-linux-deb10-llvm: + extends: .build-aarch64-linux-deb10-llvm + artifacts: + when: always + expire_in: 2 week + ################################# # armv7-linux-deb10 ################################# diff --git a/compiler/GHC/Cmm/CLabel.hs b/compiler/GHC/Cmm/CLabel.hs index cda35a4943..b6ad2b3431 100644 --- a/compiler/GHC/Cmm/CLabel.hs +++ b/compiler/GHC/Cmm/CLabel.hs @@ -287,6 +287,12 @@ data CLabel deriving Eq +instance Show CLabel where + show = showPprUnsafe . pprDebugCLabel genericPlatform + +instance Outputable CLabel where + ppr = text . show + isIdLabel :: CLabel -> Bool isIdLabel IdLabel{} = True isIdLabel _ = False @@ -1544,6 +1550,7 @@ pprDynamicLinkerAsmLabel !platform dllInfo ppLbl = SymbolPtr -> char 'L' <> ppLbl <> text "$non_lazy_ptr" GotSymbolPtr -> ppLbl <> text "@GOTPCREL" GotSymbolOffset -> ppLbl + | platformArch platform == ArchAArch64 -> ppLbl | otherwise -> case dllInfo of CodeStub -> char 'L' <> ppLbl <> text "$stub" @@ -1572,6 +1579,10 @@ pprDynamicLinkerAsmLabel !platform dllInfo ppLbl = SymbolPtr -> text ".LC_" <> ppLbl _ -> panic "pprDynamicLinkerAsmLabel" + | platformArch platform == ArchAArch64 + = ppLbl + + | platformArch platform == ArchX86_64 = case dllInfo of CodeStub -> ppLbl <> text "@plt" diff --git a/compiler/GHC/Cmm/Expr.hs b/compiler/GHC/Cmm/Expr.hs index 86b06271d1..52cb63c901 100644 --- a/compiler/GHC/Cmm/Expr.hs +++ b/compiler/GHC/Cmm/Expr.hs @@ -65,6 +65,7 @@ data CmmExpr -- ** is shorthand only, meaning ** -- CmmMachOp (MO_Add rep) [x, CmmLit (CmmInt (fromIntegral i) rep)] -- where rep = typeWidth (cmmRegType reg) + deriving Show instance Eq CmmExpr where -- Equality ignores the types CmmLit l1 == CmmLit l2 = l1==l2 @@ -78,7 +79,7 @@ instance Eq CmmExpr where -- Equality ignores the types data CmmReg = CmmLocal {-# UNPACK #-} !LocalReg | CmmGlobal GlobalReg - deriving( Eq, Ord ) + deriving( Eq, Ord, Show ) -- | A stack area is either the stack slot where a variable is spilled -- or the stack space where function arguments and results are passed. @@ -86,7 +87,7 @@ data Area = Old -- See Note [Old Area] | Young {-# UNPACK #-} !BlockId -- Invariant: must be a continuation BlockId -- See Note [Continuation BlockId] in GHC.Cmm.Node. - deriving (Eq, Ord) + deriving (Eq, Ord, Show) {- Note [Old Area] ~~~~~~~~~~~~~~~~~~ @@ -209,7 +210,7 @@ data CmmLit -- During the stack-layout pass, CmmHighStackMark -- is replaced by a CmmInt for the actual number -- of bytes used - deriving Eq + deriving (Eq, Show) instance Outputable CmmLit where ppr (CmmInt n w) = text "CmmInt" <+> ppr n <+> ppr w @@ -279,6 +280,7 @@ data LocalReg -- ^ Parameters: -- 1. Identifier -- 2. Type + deriving Show instance Eq LocalReg where (LocalReg u1 _) == (LocalReg u2 _) = u1 == u2 diff --git a/compiler/GHC/Cmm/Parser.y b/compiler/GHC/Cmm/Parser.y index 490a3c4976..b8a6f7de7c 100644 --- a/compiler/GHC/Cmm/Parser.y +++ b/compiler/GHC/Cmm/Parser.y @@ -1021,6 +1021,45 @@ machOps = listToUFM $ callishMachOps :: Platform -> UniqFM FastString ([CmmExpr] -> (CallishMachOp, [CmmExpr])) callishMachOps platform = listToUFM $ map (\(x, y) -> (mkFastString x, y)) [ + + ( "pow64f", (MO_F64_Pwr,) ), + ( "sin64f", (MO_F64_Sin,) ), + ( "cos64f", (MO_F64_Cos,) ), + ( "tan64f", (MO_F64_Tan,) ), + ( "sinh64f", (MO_F64_Sinh,) ), + ( "cosh64f", (MO_F64_Cosh,) ), + ( "tanh64f", (MO_F64_Tanh,) ), + ( "asin64f", (MO_F64_Asin,) ), + ( "acos64f", (MO_F64_Acos,) ), + ( "atan64f", (MO_F64_Atan,) ), + ( "asinh64f", (MO_F64_Asinh,) ), + ( "acosh64f", (MO_F64_Acosh,) ), + ( "log64f", (MO_F64_Log,) ), + ( "log1p64f", (MO_F64_Log1P,) ), + ( "exp64f", (MO_F64_Exp,) ), + ( "expM164f", (MO_F64_ExpM1,) ), + ( "fabs64f", (MO_F64_Fabs,) ), + ( "sqrt64f", (MO_F64_Sqrt,) ), + + ( "pow32f", (MO_F32_Pwr,) ), + ( "sin32f", (MO_F32_Sin,) ), + ( "cos32f", (MO_F32_Cos,) ), + ( "tan32f", (MO_F32_Tan,) ), + ( "sinh32f", (MO_F32_Sinh,) ), + ( "cosh32f", (MO_F32_Cosh,) ), + ( "tanh32f", (MO_F32_Tanh,) ), + ( "asin32f", (MO_F32_Asin,) ), + ( "acos32f", (MO_F32_Acos,) ), + ( "atan32f", (MO_F32_Atan,) ), + ( "asinh32f", (MO_F32_Asinh,) ), + ( "acosh32f", (MO_F32_Acosh,) ), + ( "log32f", (MO_F32_Log,) ), + ( "log1p32f", (MO_F32_Log1P,) ), + ( "exp32f", (MO_F32_Exp,) ), + ( "expM132f", (MO_F32_ExpM1,) ), + ( "fabs32f", (MO_F32_Fabs,) ), + ( "sqrt32f", (MO_F32_Sqrt,) ), + ( "read_barrier", (MO_ReadBarrier,)), ( "write_barrier", (MO_WriteBarrier,)), ( "memcpy", memcpyLikeTweakArgs MO_Memcpy ), @@ -1060,10 +1099,6 @@ callishMachOps platform = listToUFM $ ( "xchg16", (MO_Xchg W16,)), ( "xchg32", (MO_Xchg W32,)), ( "xchg64", (MO_Xchg W64,)) - - -- ToDo: the rest, maybe - -- edit: which rest? - -- also: how do we tell CMM Lint how to type check callish macops? ] where memcpyLikeTweakArgs :: (Int -> CallishMachOp) -> [CmmExpr] -> (CallishMachOp, [CmmExpr]) diff --git a/compiler/GHC/Cmm/Type.hs b/compiler/GHC/Cmm/Type.hs index c7e2a4069b..1227b37ced 100644 --- a/compiler/GHC/Cmm/Type.hs +++ b/compiler/GHC/Cmm/Type.hs @@ -51,13 +51,14 @@ import Data.Int data CmmType -- The important one! = CmmType CmmCat !Width + deriving Show data CmmCat -- "Category" (not exported) = GcPtrCat -- GC pointer | BitsCat -- Non-pointer | FloatCat -- Float | VecCat Length CmmCat -- Vector - deriving( Eq ) + deriving( Eq, Show ) -- See Note [Signed vs unsigned] at the end instance Outputable CmmType where @@ -434,4 +435,3 @@ C calling convention rather early on in the compiler). However, given this, the cons outweigh the pros. -} - diff --git a/compiler/GHC/Cmm/Utils.hs b/compiler/GHC/Cmm/Utils.hs index c1419cdd12..596b8d050f 100644 --- a/compiler/GHC/Cmm/Utils.hs +++ b/compiler/GHC/Cmm/Utils.hs @@ -14,7 +14,7 @@ module GHC.Cmm.Utils( -- CmmType - primRepCmmType, slotCmmType, slotForeignHint, + primRepCmmType, slotCmmType, typeCmmType, typeForeignHint, primRepForeignHint, -- CmmLit @@ -159,14 +159,6 @@ primRepForeignHint FloatRep = NoHint primRepForeignHint DoubleRep = NoHint primRepForeignHint (VecRep {}) = NoHint -slotForeignHint :: SlotTy -> ForeignHint -slotForeignHint PtrLiftedSlot = AddrHint -slotForeignHint PtrUnliftedSlot = AddrHint -slotForeignHint WordSlot = NoHint -slotForeignHint Word64Slot = NoHint -slotForeignHint FloatSlot = NoHint -slotForeignHint DoubleSlot = NoHint - typeForeignHint :: UnaryType -> ForeignHint typeForeignHint = primRepForeignHint . typePrimRep1 diff --git a/compiler/GHC/CmmToAsm.hs b/compiler/GHC/CmmToAsm.hs index 00ef59660f..82122911b6 100644 --- a/compiler/GHC/CmmToAsm.hs +++ b/compiler/GHC/CmmToAsm.hs @@ -84,6 +84,7 @@ import GHC.Prelude import qualified GHC.CmmToAsm.X86 as X86 import qualified GHC.CmmToAsm.PPC as PPC import qualified GHC.CmmToAsm.SPARC as SPARC +import qualified GHC.CmmToAsm.AArch64 as AArch64 import GHC.CmmToAsm.Reg.Liveness import qualified GHC.CmmToAsm.Reg.Linear as Linear @@ -166,7 +167,7 @@ nativeCodeGen logger dflags this_mod modLoc h us cmms ArchSPARC64 -> panic "nativeCodeGen: No NCG for SPARC64" ArchS390X -> panic "nativeCodeGen: No NCG for S390X" ArchARM {} -> panic "nativeCodeGen: No NCG for ARM" - ArchAArch64 -> panic "nativeCodeGen: No NCG for AArch64" + ArchAArch64 -> nCG' (AArch64.ncgAArch64 config) ArchAlpha -> panic "nativeCodeGen: No NCG for Alpha" ArchMipseb -> panic "nativeCodeGen: No NCG for mipseb" ArchMipsel -> panic "nativeCodeGen: No NCG for mipsel" @@ -174,7 +175,6 @@ nativeCodeGen logger dflags this_mod modLoc h us cmms ArchUnknown -> panic "nativeCodeGen: No NCG for unknown arch" ArchJavaScript-> panic "nativeCodeGen: No NCG for JavaScript" - -- | Data accumulated during code generation. Mostly about statistics, -- but also collects debug data for DWARF generation. data NativeGenAcc statics instr @@ -1191,9 +1191,9 @@ initNCGConfig dflags this_mod = NCGConfig ArchX86 -> v _ -> Nothing - , ncgDwarfEnabled = debugLevel dflags > 0 - , ncgDwarfUnwindings = debugLevel dflags >= 1 + , ncgDwarfEnabled = osElfTarget (platformOS (targetPlatform dflags)) && debugLevel dflags > 0 && platformArch (targetPlatform dflags) /= ArchAArch64 + , ncgDwarfUnwindings = osElfTarget (platformOS (targetPlatform dflags)) && debugLevel dflags > 0 + , ncgDwarfStripBlockInfo = osElfTarget (platformOS (targetPlatform dflags)) && debugLevel dflags < 2 -- We strip out block information when running with -g0 or -g1. + , ncgDwarfSourceNotes = osElfTarget (platformOS (targetPlatform dflags)) && debugLevel dflags > 2 -- We produce GHC-specific source-note DIEs only with -g3 , ncgExposeInternalSymbols = gopt Opt_ExposeInternalSymbols dflags - , ncgDwarfStripBlockInfo = debugLevel dflags < 2 -- We strip out block information when running with -g0 or -g1. - , ncgDwarfSourceNotes = debugLevel dflags >= 3 -- We produce GHC-specific source-note DIEs only with -g3 } diff --git a/compiler/GHC/CmmToAsm/AArch64.hs b/compiler/GHC/CmmToAsm/AArch64.hs new file mode 100644 index 0000000000..14f4b5d0bd --- /dev/null +++ b/compiler/GHC/CmmToAsm/AArch64.hs @@ -0,0 +1,60 @@ +{-# OPTIONS_GHC -fno-warn-orphans #-} + +-- | Native code generator for x86 and x86-64 architectures +module GHC.CmmToAsm.AArch64 + ( ncgAArch64 ) +where + +import GHC.Prelude + +import GHC.CmmToAsm.Instr +import GHC.CmmToAsm.Monad +import GHC.CmmToAsm.Config +import GHC.CmmToAsm.Types + +import qualified GHC.CmmToAsm.AArch64.Instr as AArch64 +import qualified GHC.CmmToAsm.AArch64.Ppr as AArch64 +import qualified GHC.CmmToAsm.AArch64.CodeGen as AArch64 +import qualified GHC.CmmToAsm.AArch64.Regs as AArch64 +import qualified GHC.CmmToAsm.AArch64.RegInfo as AArch64 + +ncgAArch64 :: NCGConfig -> NcgImpl RawCmmStatics AArch64.Instr AArch64.JumpDest +ncgAArch64 config + = NcgImpl { + ncgConfig = config + ,cmmTopCodeGen = AArch64.cmmTopCodeGen + ,generateJumpTableForInstr = AArch64.generateJumpTableForInstr config + ,getJumpDestBlockId = AArch64.getJumpDestBlockId + ,canShortcut = AArch64.canShortcut + ,shortcutStatics = AArch64.shortcutStatics + ,shortcutJump = AArch64.shortcutJump + ,pprNatCmmDecl = AArch64.pprNatCmmDecl config + ,maxSpillSlots = AArch64.maxSpillSlots config + ,allocatableRegs = AArch64.allocatableRegs platform + ,ncgAllocMoreStack = AArch64.allocMoreStack platform + ,ncgExpandTop = id + ,ncgMakeFarBranches = const id + ,extractUnwindPoints = const [] + ,invertCondBranches = \_ _ -> id + } + where + platform = ncgPlatform config + +-- | Instruction instance for aarch64 +instance Instruction AArch64.Instr where + regUsageOfInstr = AArch64.regUsageOfInstr + patchRegsOfInstr = AArch64.patchRegsOfInstr + isJumpishInstr = AArch64.isJumpishInstr + jumpDestsOfInstr = AArch64.jumpDestsOfInstr + patchJumpInstr = AArch64.patchJumpInstr + mkSpillInstr = AArch64.mkSpillInstr + mkLoadInstr = AArch64.mkLoadInstr + takeDeltaInstr = AArch64.takeDeltaInstr + isMetaInstr = AArch64.isMetaInstr + mkRegRegMoveInstr _ = AArch64.mkRegRegMoveInstr + takeRegRegMoveInstr = AArch64.takeRegRegMoveInstr + mkJumpInstr = AArch64.mkJumpInstr + mkStackAllocInstr = AArch64.mkStackAllocInstr + mkStackDeallocInstr = AArch64.mkStackDeallocInstr + mkComment = pure . AArch64.COMMENT + pprInstr = AArch64.pprInstr diff --git a/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs b/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs new file mode 100644 index 0000000000..b0984070fc --- /dev/null +++ b/compiler/GHC/CmmToAsm/AArch64/CodeGen.hs @@ -0,0 +1,1358 @@ +{-# language GADTs #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE BinaryLiterals #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE NumericUnderscores #-} +module GHC.CmmToAsm.AArch64.CodeGen ( + cmmTopCodeGen + , generateJumpTableForInstr +) + +where + +-- NCG stuff: +import GHC.Prelude hiding (EQ) + +import GHC.Platform.Regs +import GHC.CmmToAsm.AArch64.Instr +import GHC.CmmToAsm.AArch64.Regs +import GHC.CmmToAsm.AArch64.Cond + +import GHC.CmmToAsm.CPrim +import GHC.Cmm.DebugBlock +import GHC.CmmToAsm.Monad + ( NatM, getNewRegNat + , getPicBaseMaybeNat, getPlatform, getConfig + , getDebugBlock, getFileId + ) +-- import GHC.CmmToAsm.Instr +import GHC.CmmToAsm.PIC +import GHC.CmmToAsm.Format +import GHC.CmmToAsm.Config +import GHC.CmmToAsm.Types +import GHC.Platform.Reg +import GHC.Platform + +-- Our intermediate code: +import GHC.Cmm.BlockId +import GHC.Cmm +import GHC.Cmm.Utils +import GHC.Cmm.Switch +import GHC.Cmm.CLabel +import GHC.Cmm.Dataflow.Block +import GHC.Cmm.Dataflow.Graph +import GHC.Types.Tickish ( GenTickish(..) ) +import GHC.Types.SrcLoc ( srcSpanFile, srcSpanStartLine, srcSpanStartCol ) + +-- The rest: +import GHC.Data.OrdList +import GHC.Utils.Outputable + +import Control.Monad ( mapAndUnzipM, when, foldM ) +import Data.Word +import Data.Maybe +import GHC.Float + +import GHC.Types.Basic +import GHC.Types.ForeignCall +import GHC.Data.FastString +import GHC.Utils.Misc +import GHC.Utils.Panic + +-- Note [General layout of an NCG] +-- @cmmTopCodeGen@ will be our main entry point to code gen. Here we'll get +-- @RawCmmDecl@; see GHC.Cmm +-- +-- RawCmmDecl = GenCmmDecl RawCmmStatics (LabelMap RawCmmStatics) CmmGraph +-- +-- GenCmmDecl d h g = CmmProc h CLabel [GlobalReg] g +-- | CmmData Section d +-- +-- As a result we want to transform this to a list of @NatCmmDecl@, which is +-- defined @GHC.CmmToAsm.Instr@ as +-- +-- type NatCmmDecl statics instr +-- = GenCmmDecl statics (LabelMap RawCmmStatics) (ListGraph instr) +-- +-- Thus well' turn +-- GenCmmDecl RawCmmStatics (LabelMap RawCmmStatics) CmmGraph +-- into +-- [GenCmmDecl RawCmmStatics (LabelMap RawCmmStatics) (ListGraph Instr)] +-- +-- where @CmmGraph@ is +-- +-- type CmmGraph = GenCmmGraph CmmNode +-- data GenCmmGraph n = CmmGraph { g_entry :: BlockId, g_graph :: Graph n C C } +-- type CmmBlock = Block CmmNode C C +-- +-- and @ListGraph Instr@ is +-- +-- newtype ListGraph i = ListGraph [GenBasicBlock i] +-- data GenBasicBlock i = BasicBlock BlockId [i] + +cmmTopCodeGen + :: RawCmmDecl + -> NatM [NatCmmDecl RawCmmStatics Instr] + +-- Thus we'll have to deal with either CmmProc ... +cmmTopCodeGen _cmm@(CmmProc info lab live graph) = do + -- do + -- traceM $ "-- -------------------------- cmmTopGen (CmmProc) -------------------------- --\n" + -- ++ showSDocUnsafe (ppr cmm) + + let blocks = toBlockListEntryFirst graph + (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks + picBaseMb <- getPicBaseMaybeNat + + let proc = CmmProc info lab live (ListGraph $ concat nat_blocks) + tops = proc : concat statics + + case picBaseMb of + Just _picBase -> panic "AArch64.cmmTopCodeGen: picBase not implemented" + Nothing -> return tops + +-- ... or CmmData. +cmmTopCodeGen _cmm@(CmmData sec dat) = do + -- do + -- traceM $ "-- -------------------------- cmmTopGen (CmmData) -------------------------- --\n" + -- ++ showSDocUnsafe (ppr cmm) + return [CmmData sec dat] -- no translation, we just use CmmStatic + +basicBlockCodeGen + :: Block CmmNode C C + -> NatM ( [NatBasicBlock Instr] + , [NatCmmDecl RawCmmStatics Instr]) + +basicBlockCodeGen block = do + config <- getConfig + -- do + -- traceM $ "-- --------------------------- basicBlockCodeGen --------------------------- --\n" + -- ++ showSDocUnsafe (ppr block) + let (_, nodes, tail) = blockSplit block + id = entryLabel block + stmts = blockToList nodes + + header_comment_instr = unitOL $ MULTILINE_COMMENT ( + text "-- --------------------------- basicBlockCodeGen --------------------------- --\n" + $+$ pdoc (ncgPlatform config) block + ) + -- Generate location directive + dbg <- getDebugBlock (entryLabel block) + loc_instrs <- case dblSourceTick =<< dbg of + Just (SourceNote span name) + -> do fileId <- getFileId (srcSpanFile span) + let line = srcSpanStartLine span; col = srcSpanStartCol span + return $ unitOL $ LOCATION fileId line col name + _ -> return nilOL + (mid_instrs,mid_bid) <- stmtsToInstrs id stmts + (!tail_instrs,_) <- stmtToInstrs mid_bid tail + let instrs = header_comment_instr `appOL` loc_instrs `appOL` mid_instrs `appOL` tail_instrs + -- TODO: Then x86 backend run @verifyBasicBlock@ here and inserts + -- unwinding info. See Ticket 19913 + -- code generation may introduce new basic block boundaries, which + -- are indicated by the NEWBLOCK instruction. We must split up the + -- instruction stream into basic blocks again. Also, we extract + -- LDATAs here too. + let + (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs + + mkBlocks (NEWBLOCK id) (instrs,blocks,statics) + = ([], BasicBlock id instrs : blocks, statics) + mkBlocks (LDATA sec dat) (instrs,blocks,statics) + = (instrs, blocks, CmmData sec dat:statics) + mkBlocks instr (instrs,blocks,statics) + = (instr:instrs, blocks, statics) + return (BasicBlock id top : other_blocks, statics) + + +-- ----------------------------------------------------------------------------- +-- | Utilities +ann :: SDoc -> Instr -> Instr +ann doc instr {- | debugIsOn -} = ANN doc instr +-- ann _ instr = instr +{-# INLINE ann #-} + +-- Using pprExpr will hide the AST, @ANN@ will end up in the assembly with +-- -dppr-debug. The idea is that we can trivially see how a cmm expression +-- ended up producing the assmebly we see. By having the verbatim AST printed +-- we can simply check the patterns that were matched to arrive at the assmebly +-- we generated. +-- +-- pprExpr will hide a lot of noise of the underlying data structure and print +-- the expression into something that can be easily read by a human. However +-- going back to the exact CmmExpr representation can be labourous and adds +-- indirections to find the matches that lead to the assembly. +-- +-- An improvement oculd be to have +-- +-- (pprExpr genericPlatform e) <> parens (text. show e) +-- +-- to have the best of both worlds. +-- +-- Note: debugIsOn is too restrictive, it only works for debug compilers. +-- However, we do not only want to inspect this for debug compilers. Ideally +-- we'd have a check for -dppr-debug here already, such that we don't even +-- generate the ANN expressions. However, as they are lazy, they shouldn't be +-- forced until we actually force them, and without -dppr-debug they should +-- never end up being forced. +annExpr :: CmmExpr -> Instr -> Instr +annExpr e instr {- | debugIsOn -} = ANN (text . show $ e) instr +-- annExpr e instr {- | debugIsOn -} = ANN (pprExpr genericPlatform e) instr +-- annExpr _ instr = instr +{-# INLINE annExpr #-} + +-- ----------------------------------------------------------------------------- +-- Generating a table-branch + +-- TODO jump tables would be a lot faster, but we'll use bare bones for now. +-- this is usually done by sticking the jump table ids into an instruction +-- and then have the @generateJumpTableForInstr@ callback produce the jump +-- table as a static. +-- +-- See Ticket 19912 +-- +-- data SwitchTargets = +-- SwitchTargets +-- Bool -- Signed values +-- (Integer, Integer) -- Range +-- (Maybe Label) -- Default value +-- (M.Map Integer Label) -- The branches +-- +-- Non Jumptable plan: +-- xE <- expr +-- +genSwitch :: CmmExpr -> SwitchTargets -> NatM InstrBlock +genSwitch expr targets = do -- pprPanic "genSwitch" (ppr expr) + (reg, format, code) <- getSomeReg expr + let w = formatToWidth format + let mkbranch acc (key, bid) = do + (keyReg, _format, code) <- getSomeReg (CmmLit (CmmInt key w)) + return $ code `appOL` + toOL [ CMP (OpReg w reg) (OpReg w keyReg) + , BCOND EQ (TBlock bid) + ] `appOL` acc + def_code = case switchTargetsDefault targets of + Just bid -> unitOL (B (TBlock bid)) + Nothing -> nilOL + + switch_code <- foldM mkbranch nilOL (switchTargetsCases targets) + return $ code `appOL` switch_code `appOL` def_code + +-- We don't do jump tables for now, see Ticket 19912 +generateJumpTableForInstr :: NCGConfig -> Instr + -> Maybe (NatCmmDecl RawCmmStatics Instr) +generateJumpTableForInstr _ _ = Nothing + +-- ----------------------------------------------------------------------------- +-- Top-level of the instruction selector + +-- See Note [Keeping track of the current block] for why +-- we pass the BlockId. +stmtsToInstrs :: BlockId -- ^ Basic block these statement will start to be placed in. + -> [CmmNode O O] -- ^ Cmm Statement + -> NatM (InstrBlock, BlockId) -- ^ Resulting instruction +stmtsToInstrs bid stmts = + go bid stmts nilOL + where + go bid [] instrs = return (instrs,bid) + go bid (s:stmts) instrs = do + (instrs',bid') <- stmtToInstrs bid s + -- If the statement introduced a new block, we use that one + let !newBid = fromMaybe bid bid' + go newBid stmts (instrs `appOL` instrs') + +-- | `bid` refers to the current block and is used to update the CFG +-- if new blocks are inserted in the control flow. +-- See Note [Keeping track of the current block] for more details. +stmtToInstrs :: BlockId -- ^ Basic block this statement will start to be placed in. + -> CmmNode e x + -> NatM (InstrBlock, Maybe BlockId) + -- ^ Instructions, and bid of new block if successive + -- statements are placed in a different basic block. +stmtToInstrs bid stmt = do + -- traceM $ "-- -------------------------- stmtToInstrs -------------------------- --\n" + -- ++ showSDocUnsafe (ppr stmt) + platform <- getPlatform + case stmt of + CmmUnsafeForeignCall target result_regs args + -> genCCall target result_regs args bid + + _ -> (,Nothing) <$> case stmt of + CmmComment s -> return (unitOL (COMMENT (ftext s))) + CmmTick {} -> return nilOL + + CmmAssign reg src + | isFloatType ty -> assignReg_FltCode format reg src + | otherwise -> assignReg_IntCode format reg src + where ty = cmmRegType platform reg + format = cmmTypeFormat ty + + CmmStore addr src + | isFloatType ty -> assignMem_FltCode format addr src + | otherwise -> assignMem_IntCode format addr src + where ty = cmmExprType platform src + format = cmmTypeFormat ty + + CmmBranch id -> genBranch id + + --We try to arrange blocks such that the likely branch is the fallthrough + --in GHC.Cmm.ContFlowOpt. So we can assume the condition is likely false here. + CmmCondBranch arg true false _prediction -> + genCondBranch bid true false arg + + CmmSwitch arg ids -> genSwitch arg ids + + CmmCall { cml_target = arg } -> genJump arg + + CmmUnwind _regs -> return nilOL + + _ -> pprPanic "stmtToInstrs: statement should have been cps'd away" (pdoc platform stmt) + +-------------------------------------------------------------------------------- +-- | 'InstrBlock's are the insn sequences generated by the insn selectors. +-- They are really trees of insns to facilitate fast appending, where a +-- left-to-right traversal yields the insns in the correct order. +-- +type InstrBlock + = OrdList Instr + +-- | Register's passed up the tree. If the stix code forces the register +-- to live in a pre-decided machine register, it comes out as @Fixed@; +-- otherwise, it comes out as @Any@, and the parent can decide which +-- register to put it in. +-- +data Register + = Fixed Format Reg InstrBlock + | Any Format (Reg -> InstrBlock) + +-- | Sometimes we need to change the Format of a register. Primarily during +-- conversion. +swizzleRegisterRep :: Format -> Register -> Register +swizzleRegisterRep format (Fixed _ reg code) = Fixed format reg code +swizzleRegisterRep format (Any _ codefn) = Any format codefn + +-- | Grab the Reg for a CmmReg +getRegisterReg :: Platform -> CmmReg -> Reg + +getRegisterReg _ (CmmLocal (LocalReg u pk)) + = RegVirtual $ mkVirtualReg u (cmmTypeFormat pk) + +getRegisterReg platform (CmmGlobal mid) + = case globalRegMaybe platform mid of + Just reg -> RegReal reg + Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid) + -- By this stage, the only MagicIds remaining should be the + -- ones which map to a real machine register on this + -- platform. Hence if it's not mapped to a registers something + -- went wrong earlier in the pipeline. +-- | Convert a BlockId to some CmmStatic data +-- TODO: Add JumpTable Logic, see Ticket 19912 +-- jumpTableEntry :: NCGConfig -> Maybe BlockId -> CmmStatic +-- jumpTableEntry config Nothing = CmmStaticLit (CmmInt 0 (ncgWordWidth config)) +-- jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel) +-- where blockLabel = blockLbl blockid + +-- ----------------------------------------------------------------------------- +-- General things for putting together code sequences + +-- | The dual to getAnyReg: compute an expression into a register, but +-- we don't mind which one it is. +getSomeReg :: CmmExpr -> NatM (Reg, Format, InstrBlock) +getSomeReg expr = do + r <- getRegister expr + case r of + Any rep code -> do + tmp <- getNewRegNat rep + return (tmp, rep, code tmp) + Fixed rep reg code -> + return (reg, rep, code) + +-- TODO OPT: we might be able give getRegister +-- a hint, what kind of register we want. +getFloatReg :: HasCallStack => CmmExpr -> NatM (Reg, Format, InstrBlock) +getFloatReg expr = do + r <- getRegister expr + case r of + Any rep code | isFloatFormat rep -> do + tmp <- getNewRegNat rep + return (tmp, rep, code tmp) + Any II32 code -> do + tmp <- getNewRegNat FF32 + return (tmp, FF32, code tmp) + Any II64 code -> do + tmp <- getNewRegNat FF64 + return (tmp, FF64, code tmp) + Any _w _code -> do + config <- getConfig + pprPanic "can't do getFloatReg on" (pdoc (ncgPlatform config) expr) + -- can't do much for fixed. + Fixed rep reg code -> + return (reg, rep, code) + +-- TODO: TODO, bounds. We can't put any immediate +-- value in. They are constrained. +-- See Ticket 19911 +litToImm' :: CmmLit -> NatM (Operand, InstrBlock) +litToImm' lit = return (OpImm (litToImm lit), nilOL) + + +getRegister :: CmmExpr -> NatM Register +getRegister e = do + config <- getConfig + getRegister' config (ncgPlatform config) e + +-- Note [Handling PIC on AArch64] +-- AArch64 does not have a special PIC register, the general approach is to +-- simply go through the GOT, and there is assembly support for this: +-- +-- // Load the address of 'sym' from the GOT using ADRP and LDR (used for +-- // position-independent code on AArch64): +-- adrp x0, #:got:sym +-- ldr x0, [x0, #:got_lo12:sym] +-- +-- See also: https://developer.arm.com/documentation/dui0774/i/armclang-integrated-assembler-directives/assembly-expressions +-- +-- CmmGlobal @PicBaseReg@'s are generated in @GHC.CmmToAsm.PIC@ in the +-- @cmmMakePicReference@. This is in turn called from @cmmMakeDynamicReference@ +-- also in @Cmm.CmmToAsm.PIC@ from where it is also exported. There are two +-- callsites for this. One is in this module to produce the @target@ in @genCCall@ +-- the other is in @GHC.CmmToAsm@ in @cmmExprNative@. +-- +-- Conceptually we do not want any special PicBaseReg to be used on AArch64. If +-- we want to distinguish between symbol loading, we need to address this through +-- the way we load it, not through a register. +-- + +getRegister' :: NCGConfig -> Platform -> CmmExpr -> NatM Register +-- OPTIMIZATION WARNING: CmmExpr rewrites +-- 1. Rewrite: Reg + (-n) => Reg - n +-- TODO: this expression souldn't even be generated to begin with. +getRegister' config plat (CmmMachOp (MO_Add w0) [x, CmmLit (CmmInt i w1)]) | i < 0 + = getRegister' config plat (CmmMachOp (MO_Sub w0) [x, CmmLit (CmmInt (-i) w1)]) + +getRegister' config plat (CmmMachOp (MO_Sub w0) [x, CmmLit (CmmInt i w1)]) | i < 0 + = getRegister' config plat (CmmMachOp (MO_Add w0) [x, CmmLit (CmmInt (-i) w1)]) + + +-- Generic case. +getRegister' config plat expr + = case expr of + CmmReg (CmmGlobal PicBaseReg) + -> pprPanic "getRegisterReg-memory" (ppr $ PicBaseReg) + CmmLit lit + -> case lit of + + -- TODO handle CmmInt 0 specially, use wzr or xzr. + + CmmInt i W8 -> do + return (Any (intFormat W8) (\dst -> unitOL $ annExpr expr (MOV (OpReg W8 dst) (OpImm (ImmInteger (narrowS W8 i)))))) + CmmInt i W16 -> do + return (Any (intFormat W16) (\dst -> unitOL $ annExpr expr (MOV (OpReg W16 dst) (OpImm (ImmInteger (narrowS W16 i)))))) + + -- We need to be careful to not shorten this for negative literals. + -- Those need the upper bits set. We'd either have to explicitly sign + -- or figure out something smarter. Lowered to + -- `MOV dst XZR` + CmmInt i w | isNbitEncodeable 16 i, i >= 0 -> do + return (Any (intFormat w) (\dst -> unitOL $ annExpr expr (MOV (OpReg W16 dst) (OpImm (ImmInteger i))))) + CmmInt i w | isNbitEncodeable 32 i, i >= 0 -> do + let half0 = fromIntegral (fromIntegral i :: Word16) + half1 = fromIntegral (fromIntegral (i `shiftR` 16) :: Word16) + return (Any (intFormat w) (\dst -> toOL [ annExpr expr + $ MOV (OpReg W32 dst) (OpImm (ImmInt half0)) + , MOVK (OpReg W32 dst) (OpImmShift (ImmInt half1) SLSL 16) + ])) + -- fallback for W32 + CmmInt i W32 -> do + let half0 = fromIntegral (fromIntegral i :: Word16) + half1 = fromIntegral (fromIntegral (i `shiftR` 16) :: Word16) + return (Any (intFormat W32) (\dst -> toOL [ annExpr expr + $ MOV (OpReg W32 dst) (OpImm (ImmInt half0)) + , MOVK (OpReg W32 dst) (OpImmShift (ImmInt half1) SLSL 16) + ])) + -- anything else + CmmInt i W64 -> do + let half0 = fromIntegral (fromIntegral i :: Word16) + half1 = fromIntegral (fromIntegral (i `shiftR` 16) :: Word16) + half2 = fromIntegral (fromIntegral (i `shiftR` 32) :: Word16) + half3 = fromIntegral (fromIntegral (i `shiftR` 48) :: Word16) + return (Any (intFormat W64) (\dst -> toOL [ annExpr expr + $ MOV (OpReg W64 dst) (OpImm (ImmInt half0)) + , MOVK (OpReg W64 dst) (OpImmShift (ImmInt half1) SLSL 16) + , MOVK (OpReg W64 dst) (OpImmShift (ImmInt half2) SLSL 32) + , MOVK (OpReg W64 dst) (OpImmShift (ImmInt half3) SLSL 48) + ])) + CmmInt _i rep -> do + (op, imm_code) <- litToImm' lit + return (Any (intFormat rep) (\dst -> imm_code `snocOL` annExpr expr (MOV (OpReg rep dst) op))) + + -- floatToBytes (fromRational f) + CmmFloat 0 w -> do + (op, imm_code) <- litToImm' lit + return (Any (floatFormat w) (\dst -> imm_code `snocOL` annExpr expr (MOV (OpReg w dst) op))) + + CmmFloat _f W8 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for bytes" (pdoc plat expr) + CmmFloat _f W16 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for halfs" (pdoc plat expr) + CmmFloat f W32 -> do + let word = castFloatToWord32 (fromRational f) :: Word32 + half0 = fromIntegral (fromIntegral word :: Word16) + half1 = fromIntegral (fromIntegral (word `shiftR` 16) :: Word16) + tmp <- getNewRegNat (intFormat W32) + return (Any (floatFormat W32) (\dst -> toOL [ annExpr expr + $ MOV (OpReg W32 tmp) (OpImm (ImmInt half0)) + , MOVK (OpReg W32 tmp) (OpImmShift (ImmInt half1) SLSL 16) + , MOV (OpReg W32 dst) (OpReg W32 tmp) + ])) + CmmFloat f W64 -> do + let word = castDoubleToWord64 (fromRational f) :: Word64 + half0 = fromIntegral (fromIntegral word :: Word16) + half1 = fromIntegral (fromIntegral (word `shiftR` 16) :: Word16) + half2 = fromIntegral (fromIntegral (word `shiftR` 32) :: Word16) + half3 = fromIntegral (fromIntegral (word `shiftR` 48) :: Word16) + tmp <- getNewRegNat (intFormat W64) + return (Any (floatFormat W64) (\dst -> toOL [ annExpr expr + $ MOV (OpReg W64 tmp) (OpImm (ImmInt half0)) + , MOVK (OpReg W64 tmp) (OpImmShift (ImmInt half1) SLSL 16) + , MOVK (OpReg W64 tmp) (OpImmShift (ImmInt half2) SLSL 32) + , MOVK (OpReg W64 tmp) (OpImmShift (ImmInt half3) SLSL 48) + , MOV (OpReg W64 dst) (OpReg W64 tmp) + ])) + CmmFloat _f _w -> pprPanic "getRegister' (CmmLit:CmmFloat), unsupported float lit" (pdoc plat expr) + CmmVec _ -> pprPanic "getRegister' (CmmLit:CmmVec): " (pdoc plat expr) + CmmLabel _lbl -> do + (op, imm_code) <- litToImm' lit + let rep = cmmLitType plat lit + format = cmmTypeFormat rep + return (Any format (\dst -> imm_code `snocOL` (annExpr expr $ LDR format (OpReg (formatToWidth format) dst) op))) + + CmmLabelOff _lbl off | isNbitEncodeable 12 (fromIntegral off) -> do + (op, imm_code) <- litToImm' lit + let rep = cmmLitType plat lit + format = cmmTypeFormat rep + -- width = typeWidth rep + return (Any format (\dst -> imm_code `snocOL` LDR format (OpReg (formatToWidth format) dst) op)) + + CmmLabelOff lbl off -> do + (op, imm_code) <- litToImm' (CmmLabel lbl) + let rep = cmmLitType plat lit + format = cmmTypeFormat rep + width = typeWidth rep + (off_r, _off_format, off_code) <- getSomeReg $ CmmLit (CmmInt (fromIntegral off) width) + return (Any format (\dst -> imm_code `appOL` off_code `snocOL` LDR format (OpReg (formatToWidth format) dst) op `snocOL` ADD (OpReg width dst) (OpReg width dst) (OpReg width off_r))) + + CmmLabelDiffOff _ _ _ _ -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr) + CmmBlock _ -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr) + CmmHighStackMark -> pprPanic "getRegister' (CmmLit:CmmLabelOff): " (pdoc plat expr) + CmmLoad mem rep -> do + Amode addr addr_code <- getAmode plat mem + let format = cmmTypeFormat rep + return (Any format (\dst -> addr_code `snocOL` LDR format (OpReg (formatToWidth format) dst) (OpAddr addr))) + CmmStackSlot _ _ + -> pprPanic "getRegister' (CmmStackSlot): " (pdoc plat expr) + CmmReg reg + -> return (Fixed (cmmTypeFormat (cmmRegType plat reg)) + (getRegisterReg plat reg) + nilOL) + CmmRegOff reg off | isNbitEncodeable 12 (fromIntegral off) -> do + getRegister' config plat $ + CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)] + where width = typeWidth (cmmRegType plat reg) + + CmmRegOff reg off -> do + (off_r, _off_format, off_code) <- getSomeReg $ CmmLit (CmmInt (fromIntegral off) width) + (reg, _format, code) <- getSomeReg $ CmmReg reg + return $ Any (intFormat width) (\dst -> off_code `appOL` code `snocOL` ADD (OpReg width dst) (OpReg width reg) (OpReg width off_r)) + where width = typeWidth (cmmRegType plat reg) + + + + -- for MachOps, see GHC.Cmm.MachOp + -- For CmmMachOp, see GHC.Cmm.Expr + CmmMachOp op [e] -> do + (reg, _format, code) <- getSomeReg e + case op of + MO_Not w -> return $ Any (intFormat w) (\dst -> code `snocOL` MVN (OpReg w dst) (OpReg w reg)) + + MO_S_Neg w -> return $ Any (intFormat w) (\dst -> code `snocOL` NEG (OpReg w dst) (OpReg w reg)) + MO_F_Neg w -> return $ Any (floatFormat w) (\dst -> code `snocOL` NEG (OpReg w dst) (OpReg w reg)) + + MO_SF_Conv from to -> return $ Any (floatFormat to) (\dst -> code `snocOL` SCVTF (OpReg to dst) (OpReg from reg)) -- (Signed ConVerT Float) + MO_FS_Conv from to -> return $ Any (intFormat to) (\dst -> code `snocOL` FCVTZS (OpReg to dst) (OpReg from reg)) -- (float convert (-> zero) signed) + + -- TODO this is very hacky + -- Note, UBFM and SBFM expect source and target register to be of the same size, so we'll use @max from to@ + -- UBFM will set the high bits to 0. SBFM will copy the sign (sign extend). + MO_UU_Conv from to -> return $ Any (intFormat to) (\dst -> code `snocOL` UBFM (OpReg (max from to) dst) (OpReg (max from to) reg) (OpImm (ImmInt 0)) (toImm (min from to))) + MO_SS_Conv from to -> return $ Any (intFormat to) (\dst -> code `snocOL` SBFM (OpReg (max from to) dst) (OpReg (max from to) reg) (OpImm (ImmInt 0)) (toImm (min from to))) + MO_FF_Conv from to -> return $ Any (floatFormat to) (\dst -> code `snocOL` FCVT (OpReg to dst) (OpReg from reg)) + + -- Conversions + MO_XX_Conv _from to -> swizzleRegisterRep (intFormat to) <$> getRegister e + + _ -> pprPanic "getRegister' (monadic CmmMachOp):" (pdoc plat expr) + where toImm W8 = (OpImm (ImmInt 7)) + toImm W16 = (OpImm (ImmInt 15)) + toImm W32 = (OpImm (ImmInt 31)) + toImm W64 = (OpImm (ImmInt 63)) + toImm W128 = (OpImm (ImmInt 127)) + toImm W256 = (OpImm (ImmInt 255)) + toImm W512 = (OpImm (ImmInt 511)) + -- Dyadic machops: + -- + -- The general idea is: + -- compute x<i> <- x + -- compute x<j> <- y + -- OP x<r>, x<i>, x<j> + -- + -- TODO: for now we'll only implement the 64bit versions. And rely on the + -- fallthrough to alert us if things go wrong! + -- OPTIMIZATION WARNING: Dyadic CmmMachOp destructuring + -- 0. TODO This should not exist! Rewrite: Reg +- 0 -> Reg + CmmMachOp (MO_Add _) [expr'@(CmmReg (CmmGlobal _r)), CmmLit (CmmInt 0 _)] -> getRegister' config plat expr' + CmmMachOp (MO_Sub _) [expr'@(CmmReg (CmmGlobal _r)), CmmLit (CmmInt 0 _)] -> getRegister' config plat expr' + -- 1. Compute Reg +/- n directly. + -- For Add/Sub we can directly encode 12bits, or 12bits lsl #12. + CmmMachOp (MO_Add w) [(CmmReg reg), CmmLit (CmmInt n _)] + | n > 0 && n < 4096 -> return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (ADD (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n)))) + -- TODO: 12bits lsl #12; e.g. lower 12 bits of n are 0; shift n >> 12, and set lsl to #12. + where w' = formatToWidth (cmmTypeFormat (cmmRegType plat reg)) + r' = getRegisterReg plat reg + CmmMachOp (MO_Sub w) [(CmmReg reg), CmmLit (CmmInt n _)] + | n > 0 && n < 4096 -> return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (SUB (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n)))) + -- TODO: 12bits lsl #12; e.g. lower 12 bits of n are 0; shift n >> 12, and set lsl to #12. + where w' = formatToWidth (cmmTypeFormat (cmmRegType plat reg)) + r' = getRegisterReg plat reg + + -- 2. Shifts. x << n, x >> n. + CmmMachOp (MO_Shl w) [x, (CmmLit (CmmInt n _))] | w == W32, 0 <= n, n < 32 -> do + (reg_x, _format_x, code_x) <- getSomeReg x + return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (LSL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))) + CmmMachOp (MO_Shl w) [x, (CmmLit (CmmInt n _))] | w == W64, 0 <= n, n < 64 -> do + (reg_x, _format_x, code_x) <- getSomeReg x + return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (LSL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))) + + CmmMachOp (MO_U_Shr w) [x, (CmmLit (CmmInt n _))] | w == W32, 0 <= n, n < 32 -> do + (reg_x, _format_x, code_x) <- getSomeReg x + return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (LSR (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))) + CmmMachOp (MO_U_Shr w) [x, (CmmLit (CmmInt n _))] | w == W64, 0 <= n, n < 64 -> do + (reg_x, _format_x, code_x) <- getSomeReg x + return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (LSR (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))) + + -- 3. Logic &&, || + CmmMachOp (MO_And w) [(CmmReg reg), CmmLit (CmmInt n _)] | isBitMaskImmediate (fromIntegral n) -> + return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (AND (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n)))) + where w' = formatToWidth (cmmTypeFormat (cmmRegType plat reg)) + r' = getRegisterReg plat reg + + CmmMachOp (MO_Or w) [(CmmReg reg), CmmLit (CmmInt n _)] | isBitMaskImmediate (fromIntegral n) -> + return $ Any (intFormat w) (\d -> unitOL $ annExpr expr (ORR (OpReg w d) (OpReg w' r') (OpImm (ImmInteger n)))) + where w' = formatToWidth (cmmTypeFormat (cmmRegType plat reg)) + r' = getRegisterReg plat reg + + -- Generic case. + CmmMachOp op [x, y] -> do + -- alright, so we have an operation, and two expressions. And we want to essentially do + -- ensure we get float regs + let genOp w op = do + (reg_x, format_x, code_x) <- getSomeReg x + (reg_y, format_y, code_y) <- getSomeReg y + when ((isFloatFormat format_x && isIntFormat format_y) || (isIntFormat format_x && isFloatFormat format_y)) $ pprPanic "getRegister:genOp" (text "formats don't match:" <+> text (show format_x) <+> text "/=" <+> text (show format_y)) + return $ Any format_x (\dst -> code_x `appOL` code_y `appOL` op (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y)) + + withTempIntReg w op = OpReg w <$> getNewRegNat (intFormat w) >>= op + -- withTempFloatReg w op = OpReg w <$> getNewRegNat (floatFormat w) >>= op + + intOp w op = do + -- compute x<m> <- x + -- compute x<o> <- y + -- <OP> x<n>, x<m>, x<o> + (reg_x, _format_x, code_x) <- getSomeReg x + (reg_y, _format_y, code_y) <- getSomeReg y + return $ Any (intFormat w) (\dst -> code_x `appOL` code_y `appOL` op (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y)) + floatOp w op = do + (reg_fx, _format_x, code_fx) <- getFloatReg x + (reg_fy, _format_y, code_fy) <- getFloatReg y + return $ Any (floatFormat w) (\dst -> code_fx `appOL` code_fy `appOL` op (OpReg w dst) (OpReg w reg_fx) (OpReg w reg_fy)) + -- need a special one for conditionals, as they return ints + floatCond w op = do + (reg_fx, _format_x, code_fx) <- getFloatReg x + (reg_fy, _format_y, code_fy) <- getFloatReg y + return $ Any (intFormat w) (\dst -> code_fx `appOL` code_fy `appOL` op (OpReg w dst) (OpReg w reg_fx) (OpReg w reg_fy)) + + case op of + -- Integer operations + -- Add/Sub should only be Interger Options. + -- But our Cmm parser doesn't care about types + -- and thus we end up with <float> + <float> => MO_Add <float> <float> + MO_Add w -> genOp w (\d x y -> unitOL $ annExpr expr (ADD d x y)) + MO_Sub w -> genOp w (\d x y -> unitOL $ annExpr expr (SUB d x y)) + -- 31 30 29 28 + -- .---+---+---+---+-- - - + -- | N | Z | C | V | + -- '---+---+---+---+-- - - + -- Negative + -- Zero + -- Carry + -- oVerflow + -- + -- .------+-------------------------------------+-----------------+----------. + -- | Code | Meaning | Flags | Encoding | + -- |------+-------------------------------------+-----------------+----------| + -- | EQ | Equal | Z = 1 | 0000 | + -- | NE | Not Equal | Z = 0 | 0001 | + -- | HI | Unsigned Higher | C = 1 && Z = 0 | 1000 | + -- | HS | Unsigned Higher or Same | C = 1 | 0010 | + -- | LS | Unsigned Lower or Same | C = 0 || Z = 1 | 1001 | + -- | LO | Unsigned Lower | C = 0 | 0011 | + -- | GT | Signed Greater Than | Z = 0 && N = V | 1100 | + -- | GE | Signed Greater Than or Equal | N = V | 1010 | + -- | LE | Signed Less Than or Equal | Z = 1 || N /= V | 1101 | + -- | LT | Signed Less Than | N /= V | 1011 | + -- | CS | Carry Set (Unsigned Overflow) | C = 1 | 0010 | + -- | CC | Carry Clear (No Unsigned Overflow) | C = 0 | 0011 | + -- | VS | Signed Overflow | V = 1 | 0110 | + -- | VC | No Signed Overflow | V = 0 | 0111 | + -- | MI | Minus, Negative | N = 1 | 0100 | + -- | PL | Plus, Positive or Zero (!) | N = 0 | 0101 | + -- | AL | Always | Any | 1110 | + -- | NV | Never | Any | 1111 | + --- '-------------------------------------------------------------------------' + + MO_Eq w -> intOp w (\d x y -> toOL [ CMP x y, CSET d EQ ]) + MO_Ne w -> intOp w (\d x y -> toOL [ CMP x y, CSET d NE ]) + MO_Mul w -> intOp w (\d x y -> unitOL $ MUL d x y) + + -- Signed multiply/divide + MO_S_MulMayOflo w -> intOp w (\d x y -> toOL [ MUL d x y, CSET d VS ]) + MO_S_Quot w -> intOp w (\d x y -> unitOL $ SDIV d x y) + + -- No native rem instruction. So we'll compute the following + -- Rd <- Rx / Ry | 2 <- 7 / 3 -- SDIV Rd Rx Ry + -- Rd' <- Rx - Rd * Ry | 1 <- 7 - 2 * 3 -- MSUB Rd' Rd Ry Rx + -- | '---|----------------|---' | + -- | '----------------|-------' + -- '--------------------------' + -- Note the swap in Rx and Ry. + MO_S_Rem w -> withTempIntReg w $ \t -> + intOp w (\d x y -> toOL [ SDIV t x y, MSUB d t y x ]) + + -- Unsigned multiply/divide + MO_U_MulMayOflo _w -> unsupportedP plat expr + MO_U_Quot w -> intOp w (\d x y -> unitOL $ UDIV d x y) + MO_U_Rem w -> withTempIntReg w $ \t -> + intOp w (\d x y -> toOL [ UDIV t x y, MSUB d t y x ]) + + -- Signed comparisons -- see above for the CSET discussion + MO_S_Ge w -> intOp w (\d x y -> toOL [ CMP x y, CSET d SGE ]) + MO_S_Le w -> intOp w (\d x y -> toOL [ CMP x y, CSET d SLE ]) + MO_S_Gt w -> intOp w (\d x y -> toOL [ CMP x y, CSET d SGT ]) + MO_S_Lt w -> intOp w (\d x y -> toOL [ CMP x y, CSET d SLT ]) + + -- Unsigned comparisons + MO_U_Ge w -> intOp w (\d x y -> toOL [ CMP x y, CSET d UGE ]) + MO_U_Le w -> intOp w (\d x y -> toOL [ CMP x y, CSET d ULE ]) + MO_U_Gt w -> intOp w (\d x y -> toOL [ CMP x y, CSET d UGT ]) + MO_U_Lt w -> intOp w (\d x y -> toOL [ CMP x y, CSET d ULT ]) + + -- Floating point arithmetic + MO_F_Add w -> floatOp w (\d x y -> unitOL $ ADD d x y) + MO_F_Sub w -> floatOp w (\d x y -> unitOL $ SUB d x y) + MO_F_Mul w -> floatOp w (\d x y -> unitOL $ MUL d x y) + MO_F_Quot w -> floatOp w (\d x y -> unitOL $ SDIV d x y) + + -- Floating point comparison + MO_F_Eq w -> floatCond w (\d x y -> toOL [ CMP x y, CSET d EQ ]) + MO_F_Ne w -> floatCond w (\d x y -> toOL [ CMP x y, CSET d NE ]) + + -- careful with the floating point operations. + -- SLE is effectively LE or unordered (NaN) + -- SLT is the same. ULE, and ULT will not return true for NaN. + -- This is a bit counter intutive. Don't let yourself be fooled by + -- the S/U prefix for floats, it's only meaningful for integers. + MO_F_Ge w -> floatCond w (\d x y -> toOL [ CMP x y, CSET d OGE ]) + MO_F_Le w -> floatCond w (\d x y -> toOL [ CMP x y, CSET d OLE ]) -- x <= y <=> y > x + MO_F_Gt w -> floatCond w (\d x y -> toOL [ CMP x y, CSET d OGT ]) + MO_F_Lt w -> floatCond w (\d x y -> toOL [ CMP x y, CSET d OLT ]) -- x < y <=> y >= x + + -- Bitwise operations + MO_And w -> intOp w (\d x y -> unitOL $ AND d x y) + MO_Or w -> intOp w (\d x y -> unitOL $ ORR d x y) + MO_Xor w -> intOp w (\d x y -> unitOL $ EOR d x y) + -- MO_Not W64 -> + MO_Shl w -> intOp w (\d x y -> unitOL $ LSL d x y) + MO_U_Shr w -> intOp w (\d x y -> unitOL $ LSR d x y) + MO_S_Shr w -> intOp w (\d x y -> unitOL $ ASR d x y) + + -- TODO + + op -> pprPanic "getRegister' (unhandled dyadic CmmMachOp): " $ (pprMachOp op) <+> text "in" <+> (pdoc plat expr) + CmmMachOp _op _xs + -> pprPanic "getRegister' (variadic CmmMachOp): " (pdoc plat expr) + + where + unsupportedP :: OutputableP env a => env -> a -> b + unsupportedP platform op = pprPanic "Unsupported op:" (pdoc platform op) + + isNbitEncodeable :: Int -> Integer -> Bool + isNbitEncodeable n i = let shift = n - 1 in (-1 `shiftL` shift) <= i && i < (1 `shiftL` shift) + -- This needs to check if n can be encoded as a bitmask immediate: + -- + -- See https://stackoverflow.com/questions/30904718/range-of-immediate-values-in-armv8-a64-assembly + -- + isBitMaskImmediate :: Integer -> Bool + isBitMaskImmediate i = i `elem` [0b0000_0001, 0b0000_0010, 0b0000_0100, 0b0000_1000, 0b0001_0000, 0b0010_0000, 0b0100_0000, 0b1000_0000 + ,0b0000_0011, 0b0000_0110, 0b0000_1100, 0b0001_1000, 0b0011_0000, 0b0110_0000, 0b1100_0000 + ,0b0000_0111, 0b0000_1110, 0b0001_1100, 0b0011_1000, 0b0111_0000, 0b1110_0000 + ,0b0000_1111, 0b0001_1110, 0b0011_1100, 0b0111_1000, 0b1111_0000 + ,0b0001_1111, 0b0011_1110, 0b0111_1100, 0b1111_1000 + ,0b0011_1111, 0b0111_1110, 0b1111_1100 + ,0b0111_1111, 0b1111_1110 + ,0b1111_1111] + + +-- ----------------------------------------------------------------------------- +-- The 'Amode' type: Memory addressing modes passed up the tree. +data Amode = Amode AddrMode InstrBlock + +getAmode :: Platform -> CmmExpr -> NatM Amode +-- TODO: Specialize stuff we can destructure here. + +-- OPTIMIZATION WARNING: Addressing modes. +-- Addressing options: +-- LDUR/STUR: imm9: -256 - 255 +getAmode platform (CmmRegOff reg off) | -256 <= off, off <= 255 + = return $ Amode (AddrRegImm reg' off') nilOL + where reg' = getRegisterReg platform reg + off' = ImmInt off +-- LDR/STR: imm12: if reg is 32bit: 0 -- 16380 in multiples of 4 +getAmode platform (CmmRegOff reg off) + | typeWidth (cmmRegType platform reg) == W32, 0 <= off, off <= 16380, off `mod` 4 == 0 + = return $ Amode (AddrRegImm reg' off') nilOL + where reg' = getRegisterReg platform reg + off' = ImmInt off +-- LDR/STR: imm12: if reg is 64bit: 0 -- 32760 in multiples of 8 +getAmode platform (CmmRegOff reg off) + | typeWidth (cmmRegType platform reg) == W64, 0 <= off, off <= 32760, off `mod` 8 == 0 + = return $ Amode (AddrRegImm reg' off') nilOL + where reg' = getRegisterReg platform reg + off' = ImmInt off + +-- For Stores we often see something like this: +-- CmmStore (CmmMachOp (MO_Add w) [CmmLoad expr, CmmLit (CmmInt n w')]) (expr2) +-- E.g. a CmmStoreOff really. This can be translated to `str $expr2, [$expr, #n ] +-- for `n` in range. +getAmode _platform (CmmMachOp (MO_Add _w) [expr, CmmLit (CmmInt off _w')]) + | -256 <= off, off <= 255 + = do (reg, _format, code) <- getSomeReg expr + return $ Amode (AddrRegImm reg (ImmInteger off)) code + +getAmode _platform (CmmMachOp (MO_Sub _w) [expr, CmmLit (CmmInt off _w')]) + | -256 <= -off, -off <= 255 + = do (reg, _format, code) <- getSomeReg expr + return $ Amode (AddrRegImm reg (ImmInteger (-off))) code + +-- Generic case +getAmode _platform expr + = do (reg, _format, code) <- getSomeReg expr + return $ Amode (AddrReg reg) code + +-- ----------------------------------------------------------------------------- +-- Generating assignments + +-- Assignments are really at the heart of the whole code generation +-- business. Almost all top-level nodes of any real importance are +-- assignments, which correspond to loads, stores, or register +-- transfers. If we're really lucky, some of the register transfers +-- will go away, because we can use the destination register to +-- complete the code generation for the right hand side. This only +-- fails when the right hand side is forced into a fixed register +-- (e.g. the result of a call). + +assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock +assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock + +assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock +assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock + +assignMem_IntCode rep addrE srcE + = do + (src_reg, _format, code) <- getSomeReg srcE + platform <- getPlatform + Amode addr addr_code <- getAmode platform addrE + return $ COMMENT (text "CmmStore" <+> parens (text (show addrE)) <+> parens (text (show srcE))) + `consOL` (code + `appOL` addr_code + `snocOL` STR rep (OpReg (formatToWidth rep) src_reg) (OpAddr addr)) + +assignReg_IntCode _ reg src + = do + platform <- getPlatform + let dst = getRegisterReg platform reg + r <- getRegister src + return $ case r of + Any _ code -> COMMENT (text "CmmAssign" <+> parens (text (show reg)) <+> parens (text (show src))) `consOL` code dst + Fixed format freg fcode -> COMMENT (text "CmmAssign" <+> parens (text (show reg)) <+> parens (text (show src))) `consOL` (fcode `snocOL` MOV (OpReg (formatToWidth format) dst) (OpReg (formatToWidth format) freg)) + +-- Let's treat Floating point stuff +-- as integer code for now. Opaque. +assignMem_FltCode = assignMem_IntCode +assignReg_FltCode = assignReg_IntCode + +-- ----------------------------------------------------------------------------- +-- Jumps +genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock +genJump expr@(CmmLit (CmmLabel lbl)) + = return $ unitOL (annExpr expr (J (TLabel lbl))) + +genJump expr = do + (target, _format, code) <- getSomeReg expr + return (code `appOL` unitOL (annExpr expr (J (TReg target)))) + +-- ----------------------------------------------------------------------------- +-- Unconditional branches +genBranch :: BlockId -> NatM InstrBlock +genBranch = return . toOL . mkJumpInstr + +-- ----------------------------------------------------------------------------- +-- Conditional branches +genCondJump + :: BlockId + -> CmmExpr + -> NatM InstrBlock +genCondJump bid expr = do + case expr of + -- Optimized == 0 case. + CmmMachOp (MO_Eq w) [x, CmmLit (CmmInt 0 _)] -> do + (reg_x, _format_x, code_x) <- getSomeReg x + return $ code_x `snocOL` (annExpr expr (CBZ (OpReg w reg_x) (TBlock bid))) + + -- Optimized /= 0 case. + CmmMachOp (MO_Ne w) [x, CmmLit (CmmInt 0 _)] -> do + (reg_x, _format_x, code_x) <- getSomeReg x + return $ code_x `snocOL` (annExpr expr (CBNZ (OpReg w reg_x) (TBlock bid))) + + -- Generic case. + CmmMachOp mop [x, y] -> do + + let bcond w cmp = do + -- compute both sides. + (reg_x, _format_x, code_x) <- getSomeReg x + (reg_y, _format_y, code_y) <- getSomeReg y + return $ code_x `appOL` code_y `snocOL` CMP (OpReg w reg_x) (OpReg w reg_y) `snocOL` (annExpr expr (BCOND cmp (TBlock bid))) + fbcond w cmp = do + -- ensure we get float regs + (reg_fx, _format_fx, code_fx) <- getFloatReg x + (reg_fy, _format_fy, code_fy) <- getFloatReg y + return $ code_fx `appOL` code_fy `snocOL` CMP (OpReg w reg_fx) (OpReg w reg_fy) `snocOL` (annExpr expr (BCOND cmp (TBlock bid))) + + case mop of + MO_F_Eq w -> fbcond w EQ + MO_F_Ne w -> fbcond w NE + + MO_F_Gt w -> fbcond w OGT + MO_F_Ge w -> fbcond w OGE + MO_F_Lt w -> fbcond w OLT + MO_F_Le w -> fbcond w OLE + + MO_Eq w -> bcond w EQ + MO_Ne w -> bcond w NE + + MO_S_Gt w -> bcond w SGT + MO_S_Ge w -> bcond w SGE + MO_S_Lt w -> bcond w SLT + MO_S_Le w -> bcond w SLE + MO_U_Gt w -> bcond w UGT + MO_U_Ge w -> bcond w UGE + MO_U_Lt w -> bcond w ULT + MO_U_Le w -> bcond w ULE + _ -> pprPanic "AArch64.genCondJump:case mop: " (text $ show expr) + _ -> pprPanic "AArch64.genCondJump: " (text $ show expr) + + +genCondBranch + :: BlockId -- the source of the jump + -> BlockId -- the true branch target + -> BlockId -- the false branch target + -> CmmExpr -- the condition on which to branch + -> NatM InstrBlock -- Instructions + +genCondBranch _ true false expr = do + b1 <- genCondJump true expr + b2 <- genBranch false + return (b1 `appOL` b2) + +-- ----------------------------------------------------------------------------- +-- Generating C calls + +-- Now the biggest nightmare---calls. Most of the nastiness is buried in +-- @get_arg@, which moves the arguments to the correct registers/stack +-- locations. Apart from that, the code is easy. +-- +-- As per *convention*: +-- x0-x7: (volatile) argument registers +-- x8: (volatile) indirect result register / Linux syscall no +-- x9-x15: (volatile) caller saved regs +-- x16,x17: (volatile) intra-procedure-call registers +-- x18: (volatile) platform register. don't use for portability +-- x19-x28: (non-volatile) callee save regs +-- x29: (non-volatile) frame pointer +-- x30: link register +-- x31: stack pointer / zero reg +-- +-- Thus, this is what a c function will expect. Find the arguments in x0-x7, +-- anything above that on the stack. We'll ignore c functions with more than +-- 8 arguments for now. Sorry. +-- +-- We need to make sure we preserve x9-x15, don't want to touch x16, x17. + +-- Note [PLT vs GOT relocations] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- When linking objects together, we may need to lookup foreign references. That +-- is symbolic references to functions or values in other objects. When +-- compiling the object, we can not know where those elements will end up in +-- memory (relative to the current location). Thus the use of symbols. There +-- are two types of items we are interested, code segments we want to jump to +-- and continue execution there (functions, ...), and data items we want to look +-- up (strings, numbers, ...). For functions we can use the fact that we can use +-- an intermediate jump without visibility to the programs execution. If we +-- want to jump to a function that is simply too far away to reach for the B/BL +-- instruction, we can create a small piece of code that loads the full target +-- address and jumps to that on demand. Say f wants to call g, however g is out +-- of range for a direct jump, we can create a function h in range for f, that +-- will load the address of g, and jump there. The area where we construct h +-- is called the Procedure Linking Table (PLT), we have essentially replaced +-- f -> g with f -> h -> g. This is fine for function calls. However if we +-- want to lookup values, this trick doesn't work, so we need something else. +-- We will instead reserve a slot in memory, and have a symbol pointing to that +-- slot. Now what we essentially do is, we reference that slot, and expect that +-- slot to hold the final resting address of the data we are interested in. +-- Thus what that symbol really points to is the location of the final data. +-- The block of memory where we hold all those slots is the Global Offset Table +-- (GOT). Instead of x <- $foo, we now do y <- $fooPtr, and x <- [$y]. +-- +-- For JUMP/CALLs we have 26bits (+/- 128MB), for conditional branches we only +-- have 19bits (+/- 1MB). Symbol lookups are also within +/- 1MB, thus for most +-- of the LOAD/STOREs we'd want to use adrp, and add to compute a value within +-- 4GB of the PC, and load that. For anything outside of that range, we'd have +-- to go through the GOT. +-- +-- adrp x0, <symbol> +-- add x0, :lo:<symbol> +-- +-- will compute the address of <symbol> int x0 if <symbol> is within 4GB of the +-- PC. +-- +-- If we want to get the slot in the global offset table (GOT), we can do this: +-- +-- adrp x0, #:got:<symbol> +-- ldr x0, [x0, #:got_lo12:<symbol>] +-- +-- this will compute the address anywhere in the addressable 64bit space into +-- x0, by loading the address from the GOT slot. +-- +-- To actually get the value of <symbol>, we'd need to ldr x0, x0 still, which +-- for the first case can be optimized to use ldr x0, [x0, #:lo12:<symbol>] +-- instaed of the add instruction. +-- +-- As the memory model for AArch64 for PIC is considered to be +/- 4GB, we do +-- not need to go through the GOT, unless we want to address the full address +-- range within 64bit. + +genCCall + :: ForeignTarget -- function to call + -> [CmmFormal] -- where to put the result + -> [CmmActual] -- arguments (of mixed type) + -> BlockId -- The block we are in + -> NatM (InstrBlock, Maybe BlockId) +-- TODO: Specialize where we can. +-- Generic impl +genCCall target dest_regs arg_regs bid = do + -- we want to pass arg_regs into allArgRegs + -- pprTraceM "genCCall target" (ppr target) + -- pprTraceM "genCCall formal" (ppr dest_regs) + -- pprTraceM "genCCall actual" (ppr arg_regs) + + case target of + -- The target :: ForeignTarget call can either + -- be a foreign procedure with an address expr + -- and a calling convention. + ForeignTarget expr _cconv -> do + (call_target, call_target_code) <- case expr of + -- if this is a label, let's just directly to it. This will produce the + -- correct CALL relocation for BL... + (CmmLit (CmmLabel lbl)) -> pure (TLabel lbl, nilOL) + -- ... if it's not a label--well--let's compute the expression into a + -- register and jump to that. See Note [PLT vs GOT relocations] + _ -> do (reg, _format, reg_code) <- getSomeReg expr + pure (TReg reg, reg_code) + -- compute the code and register logic for all arg_regs. + -- this will give us the format information to match on. + arg_regs' <- mapM getSomeReg arg_regs + + -- Now this is stupid. Our Cmm expressions doesn't carry the proper sizes + -- so while in Cmm we might get W64 incorrectly for an int, that is W32 in + -- STG; this thenn breaks packing of stack arguments, if we need to pack + -- for the pcs, e.g. darwinpcs. Option one would be to fix the Int type + -- in Cmm proper. Option two, which we choose here is to use extended Hint + -- information to contain the size information and use that when packing + -- arguments, spilled onto the stack. + let (_res_hints, arg_hints) = foreignTargetHints target + arg_regs'' = zipWith (\(r, f, c) h -> (r,f,h,c)) arg_regs' arg_hints + + platform <- getPlatform + let packStack = platformOS platform == OSDarwin + + (stackSpace', passRegs, passArgumentsCode) <- passArguments packStack allGpArgRegs allFpArgRegs arg_regs'' 0 [] nilOL + + -- if we pack the stack, we may need to adjust to multiple of 8byte. + -- if we don't pack the stack, it will always be multiple of 8. + let stackSpace = if stackSpace' `mod` 8 /= 0 + then 8 * (stackSpace' `div` 8 + 1) + else stackSpace' + + (returnRegs, readResultsCode) <- readResults allGpArgRegs allFpArgRegs dest_regs [] nilOL + + let moveStackDown 0 = toOL [ PUSH_STACK_FRAME + , DELTA (-16) ] + moveStackDown i | odd i = moveStackDown (i + 1) + moveStackDown i = toOL [ PUSH_STACK_FRAME + , SUB (OpReg W64 (regSingle 31)) (OpReg W64 (regSingle 31)) (OpImm (ImmInt (8 * i))) + , DELTA (-8 * i - 16) ] + moveStackUp 0 = toOL [ POP_STACK_FRAME + , DELTA 0 ] + moveStackUp i | odd i = moveStackUp (i + 1) + moveStackUp i = toOL [ ADD (OpReg W64 (regSingle 31)) (OpReg W64 (regSingle 31)) (OpImm (ImmInt (8 * i))) + , POP_STACK_FRAME + , DELTA 0 ] + + let code = call_target_code -- compute the label (possibly into a register) + `appOL` moveStackDown (stackSpace `div` 8) + `appOL` passArgumentsCode -- put the arguments into x0, ... + `appOL` (unitOL $ BL call_target passRegs returnRegs) -- branch and link. + `appOL` readResultsCode -- parse the results into registers + `appOL` moveStackUp (stackSpace `div` 8) + return (code, Nothing) + + -- or a possibly side-effecting machine operation + -- mop :: CallishMachOp (see GHC.Cmm.MachOp) + PrimTarget mop -> do + -- We'll need config to construct forien targets + case mop of + -- 64 bit float ops + MO_F64_Pwr -> mkCCall "pow" + + MO_F64_Sin -> mkCCall "sin" + MO_F64_Cos -> mkCCall "cos" + MO_F64_Tan -> mkCCall "tan" + + MO_F64_Sinh -> mkCCall "sinh" + MO_F64_Cosh -> mkCCall "cosh" + MO_F64_Tanh -> mkCCall "tanh" + + MO_F64_Asin -> mkCCall "asin" + MO_F64_Acos -> mkCCall "acos" + MO_F64_Atan -> mkCCall "atan" + + MO_F64_Asinh -> mkCCall "asinh" + MO_F64_Acosh -> mkCCall "acosh" + MO_F64_Atanh -> mkCCall "atanh" + + MO_F64_Log -> mkCCall "log" + MO_F64_Log1P -> mkCCall "log1p" + MO_F64_Exp -> mkCCall "exp" + MO_F64_ExpM1 -> mkCCall "expm1" + MO_F64_Fabs -> mkCCall "fabs" + MO_F64_Sqrt -> mkCCall "sqrt" + + -- 32 bit float ops + MO_F32_Pwr -> mkCCall "powf" + + MO_F32_Sin -> mkCCall "sinf" + MO_F32_Cos -> mkCCall "cosf" + MO_F32_Tan -> mkCCall "tanf" + MO_F32_Sinh -> mkCCall "sinhf" + MO_F32_Cosh -> mkCCall "coshf" + MO_F32_Tanh -> mkCCall "tanhf" + MO_F32_Asin -> mkCCall "asinf" + MO_F32_Acos -> mkCCall "acosf" + MO_F32_Atan -> mkCCall "atanf" + MO_F32_Asinh -> mkCCall "asinhf" + MO_F32_Acosh -> mkCCall "acoshf" + MO_F32_Atanh -> mkCCall "atanhf" + MO_F32_Log -> mkCCall "logf" + MO_F32_Log1P -> mkCCall "log1pf" + MO_F32_Exp -> mkCCall "expf" + MO_F32_ExpM1 -> mkCCall "expm1f" + MO_F32_Fabs -> mkCCall "fasbf" + MO_F32_Sqrt -> mkCCall "sqrtf" + + -- Conversion + MO_UF_Conv w -> mkCCall (word2FloatLabel w) + + -- Arithmatic + -- These are not supported on X86, so I doubt they are used much. + MO_S_Mul2 _w -> unsupported mop + MO_S_QuotRem _w -> unsupported mop + MO_U_QuotRem _w -> unsupported mop + MO_U_QuotRem2 _w -> unsupported mop + MO_Add2 _w -> unsupported mop + MO_AddWordC _w -> unsupported mop + MO_SubWordC _w -> unsupported mop + MO_AddIntC _w -> unsupported mop + MO_SubIntC _w -> unsupported mop + MO_U_Mul2 _w -> unsupported mop + + -- Memory Ordering + -- TODO DMBSY is probably *way* too much! + MO_ReadBarrier -> return (unitOL DMBSY, Nothing) + MO_WriteBarrier -> return (unitOL DMBSY, Nothing) + MO_Touch -> return (nilOL, Nothing) -- Keep variables live (when using interior pointers) + -- Prefetch + MO_Prefetch_Data _n -> return (nilOL, Nothing) -- Prefetch hint. + + -- Memory copy/set/move/cmp, with alignment for optimization + + -- TODO Optimize and use e.g. quad registers to move memory around instead + -- of offloading this to memcpy. For small memcpys we can utilize + -- the 128bit quad registers in NEON to move block of bytes around. + -- Might also make sense of small memsets? Use xzr? What's the function + -- call overhead? + MO_Memcpy _align -> mkCCall "memcpy" + MO_Memset _align -> mkCCall "memset" + MO_Memmove _align -> mkCCall "memmove" + MO_Memcmp _align -> mkCCall "memcmp" + + MO_SuspendThread -> mkCCall "suspendThread" + MO_ResumeThread -> mkCCall "resumeThread" + + MO_PopCnt w -> mkCCall (popCntLabel w) + MO_Pdep w -> mkCCall (pdepLabel w) + MO_Pext w -> mkCCall (pextLabel w) + MO_Clz w -> mkCCall (clzLabel w) + MO_Ctz w -> mkCCall (ctzLabel w) + MO_BSwap w -> mkCCall (bSwapLabel w) + MO_BRev w -> mkCCall (bRevLabel w) + + -- -- Atomic read-modify-write. + MO_AtomicRMW w amop -> mkCCall (atomicRMWLabel w amop) + MO_AtomicRead w -> mkCCall (atomicReadLabel w) + MO_AtomicWrite w -> mkCCall (atomicWriteLabel w) + MO_Cmpxchg w -> mkCCall (cmpxchgLabel w) + -- -- Should be an AtomicRMW variant eventually. + -- -- Sequential consistent. + -- TODO: this should be implemented properly! + MO_Xchg w -> mkCCall (xchgLabel w) + + where + unsupported :: Show a => a -> b + unsupported mop = panic ("outOfLineCmmOp: " ++ show mop + ++ " not supported here") + mkCCall :: FastString -> NatM (InstrBlock, Maybe BlockId) + mkCCall name = do + config <- getConfig + target <- cmmMakeDynamicReference config CallReference $ + mkForeignLabel name Nothing ForeignLabelInThisPackage IsFunction + let cconv = ForeignConvention CCallConv [NoHint] [NoHint] CmmMayReturn + genCCall (ForeignTarget target cconv) dest_regs arg_regs bid + + -- TODO: Optimize using paired stores and loads (STP, LDP). It is + -- automomatically done by the allocator for us. However it's not optimal, + -- as we'd rather want to have control over + -- all spill/load registers, so we can optimize with instructions like + -- STP xA, xB, [sp, #-16]! + -- and + -- LDP xA, xB, sp, #16 + -- + passArguments :: Bool -> [Reg] -> [Reg] -> [(Reg, Format, ForeignHint, InstrBlock)] -> Int -> [Reg] -> InstrBlock -> NatM (Int, [Reg], InstrBlock) + passArguments _packStack _ _ [] stackSpace accumRegs accumCode = return (stackSpace, accumRegs, accumCode) + -- passArguments _ _ [] accumCode stackSpace | isEven stackSpace = return $ SUM (OpReg W64 x31) (OpReg W64 x31) OpImm (ImmInt (-8 * stackSpace)) + -- passArguments _ _ [] accumCode stackSpace = return $ SUM (OpReg W64 x31) (OpReg W64 x31) OpImm (ImmInt (-8 * (stackSpace + 1))) + -- passArguments [] fpRegs (arg0:arg1:args) stack accumCode = do + -- -- allocate this on the stack + -- (r0, format0, code_r0) <- getSomeReg arg0 + -- (r1, format1, code_r1) <- getSomeReg arg1 + -- let w0 = formatToWidth format0 + -- w1 = formatToWidth format1 + -- stackCode = unitOL $ STP (OpReg w0 r0) (OpReg w1 R1), (OpAddr (AddrRegImm x31 (ImmInt (stackSpace * 8))) + -- passArguments gpRegs (fpReg:fpRegs) args (stackCode `appOL` accumCode) + + -- float promotion. + -- According to + -- ISO/IEC 9899:2018 + -- Information technology — Programming languages — C + -- + -- e.g. + -- http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1124.pdf + -- http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1256.pdf + -- + -- GHC would need to know the prototype. + -- + -- > If the expression that denotes the called function has a type that does not include a + -- > prototype, the integer promotions are performed on each argument, and arguments that + -- > have type float are promoted to double. + -- + -- As we have no way to get prototypes for C yet, we'll *not* promote this + -- which is in line with the x86_64 backend :( + -- + -- See the encode_values.cmm test. + -- + -- We would essentially need to insert an FCVT (OpReg W64 fpReg) (OpReg W32 fpReg) + -- if w == W32. But *only* if we don't have a prototype m( + -- + -- For AArch64 specificies see: https://developer.arm.com/docs/ihi0055/latest/procedure-call-standard-for-the-arm-64-bit-architecture + -- + -- Still have GP regs, and we want to pass an GP argument. + passArguments pack (gpReg:gpRegs) fpRegs ((r, format, _hint, code_r):args) stackSpace accumRegs accumCode | isIntFormat format = do + let w = formatToWidth format + passArguments pack gpRegs fpRegs args stackSpace (gpReg:accumRegs) (accumCode `appOL` code_r `snocOL` (ann (text "Pass gp argument: " <> ppr r) $ MOV (OpReg w gpReg) (OpReg w r))) + + -- Still have FP regs, and we want to pass an FP argument. + passArguments pack gpRegs (fpReg:fpRegs) ((r, format, _hint, code_r):args) stackSpace accumRegs accumCode | isFloatFormat format = do + let w = formatToWidth format + passArguments pack gpRegs fpRegs args stackSpace (fpReg:accumRegs) (accumCode `appOL` code_r `snocOL` (ann (text "Pass fp argument: " <> ppr r) $ MOV (OpReg w fpReg) (OpReg w r))) + + -- No mor regs left to pass. Must pass on stack. + passArguments pack [] [] ((r, format, _hint, code_r):args) stackSpace accumRegs accumCode = do + let w = formatToWidth format + bytes = widthInBits w `div` 8 + space = if pack then bytes else 8 + stackCode = code_r `snocOL` (ann (text "Pass argument (size " <> ppr w <> text ") on the stack: " <> ppr r) $ STR format (OpReg w r) (OpAddr (AddrRegImm (regSingle 31) (ImmInt stackSpace)))) + passArguments pack [] [] args (stackSpace+space) accumRegs (stackCode `appOL` accumCode) + + -- Still have fpRegs left, but want to pass a GP argument. Must be passed on the stack then. + passArguments pack [] fpRegs ((r, format, _hint, code_r):args) stackSpace accumRegs accumCode | isIntFormat format = do + let w = formatToWidth format + bytes = widthInBits w `div` 8 + space = if pack then bytes else 8 + stackCode = code_r `snocOL` (ann (text "Pass argument (size " <> ppr w <> text ") on the stack: " <> ppr r) $ STR format (OpReg w r) (OpAddr (AddrRegImm (regSingle 31) (ImmInt stackSpace)))) + passArguments pack [] fpRegs args (stackSpace+space) accumRegs (stackCode `appOL` accumCode) + + -- Still have gpRegs left, but want to pass a FP argument. Must be passed on the stack then. + passArguments pack gpRegs [] ((r, format, _hint, code_r):args) stackSpace accumRegs accumCode | isFloatFormat format = do + let w = formatToWidth format + bytes = widthInBits w `div` 8 + space = if pack then bytes else 8 + stackCode = code_r `snocOL` (ann (text "Pass argument (size " <> ppr w <> text ") on the stack: " <> ppr r) $ STR format (OpReg w r) (OpAddr (AddrRegImm (regSingle 31) (ImmInt stackSpace)))) + passArguments pack gpRegs [] args (stackSpace+space) accumRegs (stackCode `appOL` accumCode) + + passArguments _ _ _ _ _ _ _ = pprPanic "passArguments" (text "invalid state") + + readResults :: [Reg] -> [Reg] -> [LocalReg] -> [Reg]-> InstrBlock -> NatM ([Reg], InstrBlock) + readResults _ _ [] accumRegs accumCode = return (accumRegs, accumCode) + readResults [] _ _ _ _ = do + platform <- getPlatform + pprPanic "genCCall, out of gp registers when reading results" (pdoc platform target) + readResults _ [] _ _ _ = do + platform <- getPlatform + pprPanic "genCCall, out of fp registers when reading results" (pdoc platform target) + readResults (gpReg:gpRegs) (fpReg:fpRegs) (dst:dsts) accumRegs accumCode = do + -- gp/fp reg -> dst + platform <- getPlatform + let rep = cmmRegType platform (CmmLocal dst) + format = cmmTypeFormat rep + w = cmmRegWidth platform (CmmLocal dst) + r_dst = getRegisterReg platform (CmmLocal dst) + if isFloatFormat format + then readResults (gpReg:gpRegs) fpRegs dsts (fpReg:accumRegs) (accumCode `snocOL` MOV (OpReg w r_dst) (OpReg w fpReg)) + else readResults gpRegs (fpReg:fpRegs) dsts (gpReg:accumRegs) (accumCode `snocOL` MOV (OpReg w r_dst) (OpReg w gpReg)) diff --git a/compiler/GHC/CmmToAsm/AArch64/Cond.hs b/compiler/GHC/CmmToAsm/AArch64/Cond.hs new file mode 100644 index 0000000000..687daccfda --- /dev/null +++ b/compiler/GHC/CmmToAsm/AArch64/Cond.hs @@ -0,0 +1,66 @@ +module GHC.CmmToAsm.AArch64.Cond where + +import GHC.Prelude + +-- https://developer.arm.com/documentation/den0024/a/the-a64-instruction-set/data-processing-instructions/conditional-instructions + +-- TODO: This appears to go a bit overboard? Maybe we should stick with what LLVM +-- settled on for fcmp? +-- false: always yields false, regardless of operands. +-- oeq: yields true if both operands are not a QNAN and op1 is equal to op2. +-- ogt: yields true if both operands are not a QNAN and op1 is greater than op2. +-- oge: yields true if both operands are not a QNAN and op1 is greater than or equal to op2. +-- olt: yields true if both operands are not a QNAN and op1 is less than op2. +-- ole: yields true if both operands are not a QNAN and op1 is less than or equal to op2. +-- one: yields true if both operands are not a QNAN and op1 is not equal to op2. +-- ord: yields true if both operands are not a QNAN. +-- ueq: yields true if either operand is a QNAN or op1 is equal to op2. +-- ugt: yields true if either operand is a QNAN or op1 is greater than op2. +-- uge: yields true if either operand is a QNAN or op1 is greater than or equal to op2. +-- ult: yields true if either operand is a QNAN or op1 is less than op2. +-- ule: yields true if either operand is a QNAN or op1 is less than or equal to op2. +-- une: yields true if either operand is a QNAN or op1 is not equal to op2. +-- uno: yields true if either operand is a QNAN. +-- true: always yields true, regardless of operands. +-- +-- LLVMs icmp knows about: +-- eq: yields true if the operands are equal, false otherwise. No sign interpretation is necessary or performed. +-- ne: yields true if the operands are unequal, false otherwise. No sign interpretation is necessary or performed. +-- ugt: interprets the operands as unsigned values and yields true if op1 is greater than op2. +-- uge: interprets the operands as unsigned values and yields true if op1 is greater than or equal to op2. +-- ult: interprets the operands as unsigned values and yields true if op1 is less than op2. +-- ule: interprets the operands as unsigned values and yields true if op1 is less than or equal to op2. +-- sgt: interprets the operands as signed values and yields true if op1 is greater than op2. +-- sge: interprets the operands as signed values and yields true if op1 is greater than or equal to op2. +-- slt: interprets the operands as signed values and yields true if op1 is less than op2. +-- sle: interprets the operands as signed values and yields true if op1 is less than or equal to op2. + +data Cond + = ALWAYS -- b.al + | EQ -- b.eq + | NE -- b.ne + -- signed + | SLT -- b.lt + | SLE -- b.le + | SGE -- b.ge + | SGT -- b.gt + -- unsigned + | ULT -- b.lo + | ULE -- b.ls + | UGE -- b.hs + | UGT -- b.hi + -- ordered + | OLT -- b.mi + | OLE -- b.ls + | OGE -- b.ge + | OGT -- b.gt + -- unordered + | UOLT -- b.lt + | UOLE -- b.le + | UOGE -- b.pl + | UOGT -- b.hi + -- others + | NEVER -- b.nv + | VS -- oVerflow set + | VC -- oVerflow clear + deriving Eq diff --git a/compiler/GHC/CmmToAsm/AArch64/Instr.hs b/compiler/GHC/CmmToAsm/AArch64/Instr.hs new file mode 100644 index 0000000000..7d4eaa95f6 --- /dev/null +++ b/compiler/GHC/CmmToAsm/AArch64/Instr.hs @@ -0,0 +1,758 @@ +{-# OPTIONS_GHC -fno-warn-orphans #-} + +module GHC.CmmToAsm.AArch64.Instr + +where + +import GHC.Prelude + +import GHC.CmmToAsm.AArch64.Cond +import GHC.CmmToAsm.AArch64.Regs + +import GHC.CmmToAsm.Instr (RegUsage(..)) +import GHC.CmmToAsm.Format +import GHC.CmmToAsm.Types +import GHC.CmmToAsm.Utils +import GHC.CmmToAsm.Config +import GHC.Platform.Reg + +import GHC.Platform.Regs +import GHC.Cmm.BlockId +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm.Dataflow.Label +import GHC.Cmm +import GHC.Cmm.CLabel +import GHC.Utils.Outputable +import GHC.Platform +import GHC.Types.Unique.Supply + +import GHC.Utils.Panic + +import Control.Monad (replicateM) +import Data.Maybe (fromMaybe) + +import GHC.Stack + +-- | TODO: verify this! +stackFrameHeaderSize :: Platform -> Int +stackFrameHeaderSize _ = 64 + +-- | All registers are 8 byte wide. +spillSlotSize :: Int +spillSlotSize = 8 + +-- | The number of bytes that the stack pointer should be aligned +-- to. +stackAlign :: Int +stackAlign = 16 + +-- | The number of spill slots available without allocating more. +maxSpillSlots :: NCGConfig -> Int +maxSpillSlots config +-- = 0 -- set to zero, to see when allocMoreStack has to fire. + = let platform = ncgPlatform config + in ((ncgSpillPreallocSize config - stackFrameHeaderSize platform) + `div` spillSlotSize) - 1 + +-- | Convert a spill slot number to a *byte* offset, with no sign. +spillSlotToOffset :: NCGConfig -> Int -> Int +spillSlotToOffset config slot + = stackFrameHeaderSize (ncgPlatform config) + spillSlotSize * slot + +-- | Get the registers that are being used by this instruction. +-- regUsage doesn't need to do any trickery for jumps and such. +-- Just state precisely the regs read and written by that insn. +-- The consequences of control flow transfers, as far as register +-- allocation goes, are taken care of by the register allocator. +-- +-- RegUsage = RU [<read regs>] [<write regs>] + +instance Outputable RegUsage where + ppr (RU reads writes) = text "RegUsage(reads:" <+> ppr reads <> comma <+> text "writes:" <+> ppr writes <> char ')' + +regUsageOfInstr :: Platform -> Instr -> RegUsage +regUsageOfInstr platform instr = case instr of + ANN _ i -> regUsageOfInstr platform i + -- 1. Arithmetic Instructions ------------------------------------------------ + ADD dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) + CMN l r -> usage (regOp l ++ regOp r, []) + CMP l r -> usage (regOp l ++ regOp r, []) + MSUB dst src1 src2 src3 -> usage (regOp src1 ++ regOp src2 ++ regOp src3, regOp dst) + MUL dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) + NEG dst src -> usage (regOp src, regOp dst) + SDIV dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) + SUB dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) + UDIV dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) + + -- 2. Bit Manipulation Instructions ------------------------------------------ + SBFM dst src _ _ -> usage (regOp src, regOp dst) + UBFM dst src _ _ -> usage (regOp src, regOp dst) + + -- 3. Logical and Move Instructions ------------------------------------------ + AND dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) + ASR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) + BIC dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) + BICS dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) + EON dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) + EOR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) + LSL dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) + LSR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) + MOV dst src -> usage (regOp src, regOp dst) + MOVK dst src -> usage (regOp src, regOp dst) + MVN dst src -> usage (regOp src, regOp dst) + ORR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) + ROR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst) + TST src1 src2 -> usage (regOp src1 ++ regOp src2, []) + -- 4. Branch Instructions ---------------------------------------------------- + J t -> usage (regTarget t, []) + B t -> usage (regTarget t, []) + BCOND _ t -> usage (regTarget t, []) + BL t ps _rs -> usage (regTarget t ++ ps, callerSavedRegisters) + + -- 5. Atomic Instructions ---------------------------------------------------- + -- 6. Conditional Instructions ----------------------------------------------- + CSET dst _ -> usage ([], regOp dst) + CBZ src _ -> usage (regOp src, []) + CBNZ src _ -> usage (regOp src, []) + -- 7. Load and Store Instructions -------------------------------------------- + STR _ src dst -> usage (regOp src ++ regOp dst, []) + LDR _ dst src -> usage (regOp src, regOp dst) + -- TODO is this right? see STR, which I'm only partial about being right? + STP _ src1 src2 dst -> usage (regOp src1 ++ regOp src2 ++ regOp dst, []) + LDP _ dst1 dst2 src -> usage (regOp src, regOp dst1 ++ regOp dst2) + + -- 8. Synchronization Instructions ------------------------------------------- + DMBSY -> usage ([], []) + + -- 9. Floating Point Instructions -------------------------------------------- + FCVT dst src -> usage (regOp src, regOp dst) + SCVTF dst src -> usage (regOp src, regOp dst) + FCVTZS dst src -> usage (regOp src, regOp dst) + + _ -> panic "regUsageOfInstr" + + where + -- filtering the usage is necessary, otherwise the register + -- allocator will try to allocate pre-defined fixed stg + -- registers as well, as they show up. + usage (src, dst) = RU (filter (interesting platform) src) + (filter (interesting platform) dst) + + regAddr :: AddrMode -> [Reg] + regAddr (AddrRegReg r1 r2) = [r1, r2] + regAddr (AddrRegImm r1 _) = [r1] + regAddr (AddrReg r1) = [r1] + regOp :: Operand -> [Reg] + regOp (OpReg _ r1) = [r1] + regOp (OpRegExt _ r1 _ _) = [r1] + regOp (OpRegShift _ r1 _ _) = [r1] + regOp (OpAddr a) = regAddr a + regOp (OpImm _) = [] + regOp (OpImmShift _ _ _) = [] + regTarget :: Target -> [Reg] + regTarget (TBlock _) = [] + regTarget (TLabel _) = [] + regTarget (TReg r1) = [r1] + + -- Is this register interesting for the register allocator? + interesting :: Platform -> Reg -> Bool + interesting _ (RegVirtual _) = True + interesting _ (RegReal (RealRegSingle (-1))) = False + interesting platform (RegReal (RealRegSingle i)) = freeReg platform i + interesting _ (RegReal (RealRegPair{})) + = panic "AArch64.Instr.interesting: no reg pairs on this arch" + +-- Save caller save registers +-- This is x0-x18 +-- +-- For SIMD/FP Registers: +-- Registers v8-v15 must be preserved by a callee across subroutine calls; +-- the remaining registers (v0-v7, v16-v31) do not need to be preserved (or +-- should be preserved by the caller). Additionally, only the bottom 64 bits +-- of each value stored in v8-v15 need to be preserved [7]; it is the +-- responsibility of the caller to preserve larger values. +-- +-- .---------------------------------------------------------------------------------------------------------------------------------------------------------------. +-- | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | +-- | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 42 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | +-- |== General Purpose registers ==================================================================================================================================| +-- | <---- argument passing -------------> | IR | <------- tmp registers --------> | IP0| IP1| PL | <------------------- callee saved ------------> | FP | LR | SP | +-- | <------ free registers --------------------------------------------------------------------> | BR | Sp | Hp | R1 | R2 | R3 | R4 | R5 | R6 | SL | -- | -- | -- | +-- |== SIMD/FP Registers ==========================================================================================================================================| +-- | <---- argument passing -------------> | <-- callee saved (lower 64 bits) ---> | <--------------------------------------- caller saved ----------------------> | +-- | <------ free registers -------------> | F1 | F2 | F3 | F4 | D1 | D2 | D3 | D4 | <------ free registers -----------------------------------------------------> | +-- '---------------------------------------------------------------------------------------------------------------------------------------------------------------' +-- IR: Indirect result location register, IP: Intra-procedure register, PL: Platform register, FP: Frame pointer, LR: Link register, SP: Stack pointer +-- BR: Base, SL: SpLim +callerSavedRegisters :: [Reg] +callerSavedRegisters + = map regSingle [0..18] + ++ map regSingle [32..39] + ++ map regSingle [48..63] + +-- | Apply a given mapping to all the register references in this +-- instruction. +patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr +patchRegsOfInstr instr env = case instr of + -- 0. Meta Instructions + ANN d i -> ANN d (patchRegsOfInstr i env) + -- 1. Arithmetic Instructions ---------------------------------------------- + ADD o1 o2 o3 -> ADD (patchOp o1) (patchOp o2) (patchOp o3) + CMN o1 o2 -> CMN (patchOp o1) (patchOp o2) + CMP o1 o2 -> CMP (patchOp o1) (patchOp o2) + MSUB o1 o2 o3 o4 -> MSUB (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4) + MUL o1 o2 o3 -> MUL (patchOp o1) (patchOp o2) (patchOp o3) + NEG o1 o2 -> NEG (patchOp o1) (patchOp o2) + SDIV o1 o2 o3 -> SDIV (patchOp o1) (patchOp o2) (patchOp o3) + SUB o1 o2 o3 -> SUB (patchOp o1) (patchOp o2) (patchOp o3) + UDIV o1 o2 o3 -> UDIV (patchOp o1) (patchOp o2) (patchOp o3) + + -- 2. Bit Manipulation Instructions ---------------------------------------- + SBFM o1 o2 o3 o4 -> SBFM (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4) + UBFM o1 o2 o3 o4 -> UBFM (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4) + + -- 3. Logical and Move Instructions ---------------------------------------- + AND o1 o2 o3 -> AND (patchOp o1) (patchOp o2) (patchOp o3) + ANDS o1 o2 o3 -> ANDS (patchOp o1) (patchOp o2) (patchOp o3) + ASR o1 o2 o3 -> ASR (patchOp o1) (patchOp o2) (patchOp o3) + BIC o1 o2 o3 -> BIC (patchOp o1) (patchOp o2) (patchOp o3) + BICS o1 o2 o3 -> BICS (patchOp o1) (patchOp o2) (patchOp o3) + EON o1 o2 o3 -> EON (patchOp o1) (patchOp o2) (patchOp o3) + EOR o1 o2 o3 -> EOR (patchOp o1) (patchOp o2) (patchOp o3) + LSL o1 o2 o3 -> LSL (patchOp o1) (patchOp o2) (patchOp o3) + LSR o1 o2 o3 -> LSR (patchOp o1) (patchOp o2) (patchOp o3) + MOV o1 o2 -> MOV (patchOp o1) (patchOp o2) + MOVK o1 o2 -> MOVK (patchOp o1) (patchOp o2) + MVN o1 o2 -> MVN (patchOp o1) (patchOp o2) + ORR o1 o2 o3 -> ORR (patchOp o1) (patchOp o2) (patchOp o3) + ROR o1 o2 o3 -> ROR (patchOp o1) (patchOp o2) (patchOp o3) + TST o1 o2 -> TST (patchOp o1) (patchOp o2) + + -- 4. Branch Instructions -------------------------------------------------- + J t -> J (patchTarget t) + B t -> B (patchTarget t) + BL t rs ts -> BL (patchTarget t) rs ts + BCOND c t -> BCOND c (patchTarget t) + + -- 5. Atomic Instructions -------------------------------------------------- + -- 6. Conditional Instructions --------------------------------------------- + CSET o c -> CSET (patchOp o) c + CBZ o l -> CBZ (patchOp o) l + CBNZ o l -> CBNZ (patchOp o) l + -- 7. Load and Store Instructions ------------------------------------------ + STR f o1 o2 -> STR f (patchOp o1) (patchOp o2) + LDR f o1 o2 -> LDR f (patchOp o1) (patchOp o2) + STP f o1 o2 o3 -> STP f (patchOp o1) (patchOp o2) (patchOp o3) + LDP f o1 o2 o3 -> LDP f (patchOp o1) (patchOp o2) (patchOp o3) + + -- 8. Synchronization Instructions ----------------------------------------- + DMBSY -> DMBSY + + -- 9. Floating Point Instructions ------------------------------------------ + FCVT o1 o2 -> FCVT (patchOp o1) (patchOp o2) + SCVTF o1 o2 -> SCVTF (patchOp o1) (patchOp o2) + FCVTZS o1 o2 -> FCVTZS (patchOp o1) (patchOp o2) + + _ -> pprPanic "patchRegsOfInstr" (text $ show instr) + where + patchOp :: Operand -> Operand + patchOp (OpReg w r) = OpReg w (env r) + patchOp (OpRegExt w r x s) = OpRegExt w (env r) x s + patchOp (OpRegShift w r m s) = OpRegShift w (env r) m s + patchOp (OpAddr a) = OpAddr (patchAddr a) + patchOp op = op + patchTarget :: Target -> Target + patchTarget (TReg r) = TReg (env r) + patchTarget t = t + patchAddr :: AddrMode -> AddrMode + patchAddr (AddrRegReg r1 r2) = AddrRegReg (env r1) (env r2) + patchAddr (AddrRegImm r1 i) = AddrRegImm (env r1) i + patchAddr (AddrReg r) = AddrReg (env r) +-------------------------------------------------------------------------------- +-- | Checks whether this instruction is a jump/branch instruction. +-- One that can change the flow of control in a way that the +-- register allocator needs to worry about. +isJumpishInstr :: Instr -> Bool +isJumpishInstr instr = case instr of + ANN _ i -> isJumpishInstr i + CBZ{} -> True + CBNZ{} -> True + J{} -> True + B{} -> True + BL{} -> True + BCOND{} -> True + _ -> False + +-- | Checks whether this instruction is a jump/branch instruction. +-- One that can change the flow of control in a way that the +-- register allocator needs to worry about. +jumpDestsOfInstr :: Instr -> [BlockId] +jumpDestsOfInstr (ANN _ i) = jumpDestsOfInstr i +jumpDestsOfInstr (CBZ _ t) = [ id | TBlock id <- [t]] +jumpDestsOfInstr (CBNZ _ t) = [ id | TBlock id <- [t]] +jumpDestsOfInstr (J t) = [id | TBlock id <- [t]] +jumpDestsOfInstr (B t) = [id | TBlock id <- [t]] +jumpDestsOfInstr (BL t _ _) = [ id | TBlock id <- [t]] +jumpDestsOfInstr (BCOND _ t) = [ id | TBlock id <- [t]] +jumpDestsOfInstr _ = [] + +-- | Change the destination of this jump instruction. +-- Used in the linear allocator when adding fixup blocks for join +-- points. +patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr +patchJumpInstr instr patchF + = case instr of + ANN d i -> ANN d (patchJumpInstr i patchF) + CBZ r (TBlock bid) -> CBZ r (TBlock (patchF bid)) + CBNZ r (TBlock bid) -> CBNZ r (TBlock (patchF bid)) + J (TBlock bid) -> J (TBlock (patchF bid)) + B (TBlock bid) -> B (TBlock (patchF bid)) + BL (TBlock bid) ps rs -> BL (TBlock (patchF bid)) ps rs + BCOND c (TBlock bid) -> BCOND c (TBlock (patchF bid)) + _ -> pprPanic "patchJumpInstr" (text $ show instr) + +-- ----------------------------------------------------------------------------- +-- Note [Spills and Reloads] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~ +-- We reserve @RESERVED_C_STACK_BYTES@ on the C stack for spilling and reloading +-- registers. AArch64s maximum displacement for SP relative spills and reloads +-- is essentially [-256,255], or [0, 0xFFF]*8 = [0, 32760] for 64bits. +-- +-- The @RESERVED_C_STACK_BYTES@ is 16k, so we can't address any location in a +-- single instruction. The idea is to use the Inter Procedure 0 (ip0) register +-- to perform the computations for larger offsets. +-- +-- Using sp to compute the offset will violate assumptions about the stack pointer +-- pointing to the top of the stack during signal handling. As we can't force +-- every signal to use its own stack, we have to ensure that the stack poitner +-- always poitns to the top of the stack, and we can't use it for computation. +-- +-- | An instruction to spill a register into a spill slot. +mkSpillInstr + :: HasCallStack + => NCGConfig + -> Reg -- register to spill + -> Int -- current stack delta + -> Int -- spill slot to use + -> [Instr] + +mkSpillInstr config reg delta slot = + case (spillSlotToOffset config slot) - delta of + imm | -256 <= imm && imm <= 255 -> [ mkStrSp imm ] + imm | imm > 0 && imm .&. 0x7 == 0x0 && imm <= 0xfff -> [ mkStrSp imm ] + imm | imm > 0xfff && imm <= 0xffffff && imm .&. 0x7 == 0x0 -> [ mkIp0SpillAddr (imm .&~. 0xfff) + , mkStrIp0 (imm .&. 0xfff) + ] + imm -> pprPanic "mkSpillInstr" (text "Unable to spill into" <+> int imm) + where + a .&~. b = a .&. (complement b) + + fmt = case reg of + RegReal (RealRegSingle n) | n < 32 -> II64 + _ -> FF64 + mkIp0SpillAddr imm = ANN (text "Spill: IP0 <- SP + " <> int imm) $ ADD ip0 sp (OpImm (ImmInt imm)) + mkStrSp imm = ANN (text "Spill@" <> int (off - delta)) $ STR fmt (OpReg W64 reg) (OpAddr (AddrRegImm (regSingle 31) (ImmInt imm))) + mkStrIp0 imm = ANN (text "Spill@" <> int (off - delta)) $ STR fmt (OpReg W64 reg) (OpAddr (AddrRegImm (regSingle 16) (ImmInt imm))) + + off = spillSlotToOffset config slot + +mkLoadInstr + :: NCGConfig + -> Reg -- register to load + -> Int -- current stack delta + -> Int -- spill slot to use + -> [Instr] + +mkLoadInstr config reg delta slot = + case (spillSlotToOffset config slot) - delta of + imm | -256 <= imm && imm <= 255 -> [ mkLdrSp imm ] + imm | imm > 0 && imm .&. 0x7 == 0x0 && imm <= 0xfff -> [ mkLdrSp imm ] + imm | imm > 0xfff && imm <= 0xffffff && imm .&. 0x7 == 0x0 -> [ mkIp0SpillAddr (imm .&~. 0xfff) + , mkLdrIp0 (imm .&. 0xfff) + ] + imm -> pprPanic "mkSpillInstr" (text "Unable to spill into" <+> int imm) + where + a .&~. b = a .&. (complement b) + + fmt = case reg of + RegReal (RealRegSingle n) | n < 32 -> II64 + _ -> FF64 + + mkIp0SpillAddr imm = ANN (text "Reload: IP0 <- SP + " <> int imm) $ ADD ip0 sp (OpImm (ImmInt imm)) + mkLdrSp imm = ANN (text "Reload@" <> int (off - delta)) $ LDR fmt (OpReg W64 reg) (OpAddr (AddrRegImm (regSingle 31) (ImmInt imm))) + mkLdrIp0 imm = ANN (text "Reload@" <> int (off - delta)) $ LDR fmt (OpReg W64 reg) (OpAddr (AddrRegImm (regSingle 16) (ImmInt imm))) + + off = spillSlotToOffset config slot + +-------------------------------------------------------------------------------- +-- | See if this instruction is telling us the current C stack delta +takeDeltaInstr :: Instr -> Maybe Int +takeDeltaInstr (ANN _ i) = takeDeltaInstr i +takeDeltaInstr (DELTA i) = Just i +takeDeltaInstr _ = Nothing + +-- Not real instructions. Just meta data +isMetaInstr :: Instr -> Bool +isMetaInstr instr + = case instr of + ANN _ i -> isMetaInstr i + COMMENT{} -> True + MULTILINE_COMMENT{} -> True + LOCATION{} -> True + LDATA{} -> True + NEWBLOCK{} -> True + DELTA{} -> True + PUSH_STACK_FRAME -> True + POP_STACK_FRAME -> True + _ -> False + +-- | Copy the value in a register to another one. +-- Must work for all register classes. +mkRegRegMoveInstr :: Reg -> Reg -> Instr +mkRegRegMoveInstr src dst = ANN (text "Reg->Reg Move: " <> ppr src <> text " -> " <> ppr dst) $ MOV (OpReg W64 dst) (OpReg W64 src) + +-- | Take the source and destination from this reg -> reg move instruction +-- or Nothing if it's not one +takeRegRegMoveInstr :: Instr -> Maybe (Reg,Reg) +--takeRegRegMoveInstr (MOV (OpReg fmt dst) (OpReg fmt' src)) | fmt == fmt' = Just (src, dst) +takeRegRegMoveInstr _ = Nothing + +-- | Make an unconditional jump instruction. +mkJumpInstr :: BlockId -> [Instr] +mkJumpInstr id = [B (TBlock id)] + +mkStackAllocInstr :: Platform -> Int -> [Instr] +mkStackAllocInstr platform n + | n == 0 = [] + | n > 0 && n < 4096 = [ ANN (text "Alloc More Stack") $ SUB sp sp (OpImm (ImmInt n)) ] + | n > 0 = ANN (text "Alloc More Stack") (SUB sp sp (OpImm (ImmInt 4095))) : mkStackAllocInstr platform (n - 4095) +mkStackAllocInstr _platform n = pprPanic "mkStackAllocInstr" (int n) + +mkStackDeallocInstr :: Platform -> Int -> [Instr] +mkStackDeallocInstr platform n + | n == 0 = [] + | n > 0 && n < 4096 = [ ANN (text "Dealloc More Stack") $ ADD sp sp (OpImm (ImmInt n)) ] + | n > 0 = ANN (text "Dealloc More Stack") (ADD sp sp (OpImm (ImmInt 4095))) : mkStackDeallocInstr platform (n - 4095) +mkStackDeallocInstr _platform n = pprPanic "mkStackDeallocInstr" (int n) + +-- +-- See note [extra spill slots] in X86/Instr.hs +-- +allocMoreStack + :: Platform + -> Int + -> NatCmmDecl statics GHC.CmmToAsm.AArch64.Instr.Instr + -> UniqSM (NatCmmDecl statics GHC.CmmToAsm.AArch64.Instr.Instr, [(BlockId,BlockId)]) + +allocMoreStack _ _ top@(CmmData _ _) = return (top,[]) +allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do + let entries = entryBlocks proc + + uniqs <- replicateM (length entries) getUniqueM + + let + delta = ((x + stackAlign - 1) `quot` stackAlign) * stackAlign -- round up + where x = slots * spillSlotSize -- sp delta + + alloc = mkStackAllocInstr platform delta + dealloc = mkStackDeallocInstr platform delta + + retargetList = (zip entries (map mkBlockId uniqs)) + + new_blockmap :: LabelMap BlockId + new_blockmap = mapFromList retargetList + + insert_stack_insn (BasicBlock id insns) + | Just new_blockid <- mapLookup id new_blockmap + = [ BasicBlock id $ alloc ++ [ B (TBlock new_blockid) ] + , BasicBlock new_blockid block' ] + | otherwise + = [ BasicBlock id block' ] + where + block' = foldr insert_dealloc [] insns + + insert_dealloc insn r = case insn of + J _ -> dealloc ++ (insn : r) + ANN _ (J _) -> dealloc ++ (insn : r) + _other | jumpDestsOfInstr insn /= [] + -> patchJumpInstr insn retarget : r + _other -> insn : r + + where retarget b = fromMaybe b (mapLookup b new_blockmap) + + new_code = concatMap insert_stack_insn code + -- in + return (CmmProc info lbl live (ListGraph new_code), retargetList) +-- ----------------------------------------------------------------------------- +-- Machine's assembly language + +-- We have a few common "instructions" (nearly all the pseudo-ops) but +-- mostly all of 'Instr' is machine-specific. + +-- Some additional (potential future) instructions are commented out. They are +-- not needed yet for the backend but could be used in the future. +data Instr + -- comment pseudo-op + = COMMENT SDoc + | MULTILINE_COMMENT SDoc + + -- Annotated instruction. Should print <instr> # <doc> + | ANN SDoc Instr + + -- location pseudo-op (file, line, col, name) + | LOCATION Int Int Int String + + -- some static data spat out during code + -- generation. Will be extracted before + -- pretty-printing. + | LDATA Section RawCmmStatics + + -- start a new basic block. Useful during + -- codegen, removed later. Preceding + -- instruction should be a jump, as per the + -- invariants for a BasicBlock (see Cmm). + | NEWBLOCK BlockId + + -- specify current stack offset for + -- benefit of subsequent passes + | DELTA Int + + -- 0. Pseudo Instructions -------------------------------------------------- + -- These are instructions not contained or only partially contained in the + -- official ISA, but make reading clearer. Some of them might even be + -- implemented in the assembler, but are not guaranteed to be portable. + -- | SXTB Operand Operand + -- | SXTH Operand Operand + -- | SXTW Operand Operand + -- | SXTX Operand Operand + | PUSH_STACK_FRAME + | POP_STACK_FRAME + -- 1. Arithmetic Instructions ---------------------------------------------- + -- | ADC Operand Operand Operang -- rd = rn + rm + C + -- | ADCS ... + | ADD Operand Operand Operand -- rd = rn + rm + -- | ADDS Operand Operand Operand -- rd = rn + rm + -- | ADR ... + -- | ADRP ... + | CMN Operand Operand -- rd + op2 + | CMP Operand Operand -- rd - op2 + -- | MADD ... + -- | MNEG ... + | MSUB Operand Operand Operand Operand -- rd = ra - rn × rm + | MUL Operand Operand Operand -- rd = rn × rm + | NEG Operand Operand -- rd = -op2 + -- | NEGS ... + -- | NGC ... + -- | NGCS ... + -- | SBC ... + -- | SBCS ... + | SDIV Operand Operand Operand -- rd = rn ÷ rm + -- | SMADDL ... + -- | SMNEGL ... + -- | SMSUBL ... + -- | SMULH ... + -- | SMULL ... + | SUB Operand Operand Operand -- rd = rn - op2 + -- | SUBS ... + | UDIV Operand Operand Operand -- rd = rn ÷ rm + -- | UMADDL ... -- Xd = Xa + Wn × Wm + -- | UMNEGL ... -- Xd = - Wn × Wm + -- | UMSUBL ... -- Xd = Xa - Wn × Wm + -- | UMULH ... -- Xd = (Xn × Xm)_127:64 + -- | UMULL ... -- Xd = Wn × Wm + + -- 2. Bit Manipulation Instructions ---------------------------------------- + | SBFM Operand Operand Operand Operand -- rd = rn[i,j] + -- SXTB = SBFM <Wd>, <Wn>, #0, #7 + -- SXTH = SBFM <Wd>, <Wn>, #0, #15 + -- SXTW = SBFM <Wd>, <Wn>, #0, #31 + | UBFM Operand Operand Operand Operand -- rd = rn[i,j] + -- UXTB = UBFM <Wd>, <Wn>, #0, #7 + -- UXTH = UBFM <Wd>, <Wn>, #0, #15 + + -- 3. Logical and Move Instructions ---------------------------------------- + | AND Operand Operand Operand -- rd = rn & op2 + | ANDS Operand Operand Operand -- rd = rn & op2 + | ASR Operand Operand Operand -- rd = rn ≫ rm or rd = rn ≫ #i, i is 6 bits + | BIC Operand Operand Operand -- rd = rn & ~op2 + | BICS Operand Operand Operand -- rd = rn & ~op2 + | EON Operand Operand Operand -- rd = rn ⊕ ~op2 + | EOR Operand Operand Operand -- rd = rn ⊕ op2 + | LSL Operand Operand Operand -- rd = rn ≪ rm or rd = rn ≪ #i, i is 6 bits + | LSR Operand Operand Operand -- rd = rn ≫ rm or rd = rn ≫ #i, i is 6 bits + | MOV Operand Operand -- rd = rn or rd = #i + | MOVK Operand Operand + -- | MOVN Operand Operand + -- | MOVZ Operand Operand + | MVN Operand Operand -- rd = ~rn + | ORN Operand Operand Operand -- rd = rn | ~op2 + | ORR Operand Operand Operand -- rd = rn | op2 + | ROR Operand Operand Operand -- rd = rn ≫ rm or rd = rn ≫ #i, i is 6 bits + | TST Operand Operand -- rn & op2 + -- Load and stores. + -- TODO STR/LDR might want to change to STP/LDP with XZR for the second register. + | STR Format Operand Operand -- str Xn, address-mode // Xn -> *addr + | LDR Format Operand Operand -- ldr Xn, address-mode // Xn <- *addr + | STP Format Operand Operand Operand -- stp Xn, Xm, address-mode // Xn -> *addr, Xm -> *(addr + 8) + | LDP Format Operand Operand Operand -- stp Xn, Xm, address-mode // Xn <- *addr, Xm <- *(addr + 8) + + -- Conditional instructions + | CSET Operand Cond -- if(cond) op <- 1 else op <- 0 + + | CBZ Operand Target -- if op == 0, then branch. + | CBNZ Operand Target -- if op /= 0, then branch. + -- Branching. + | J Target -- like B, but only generated from genJump. Used to distinguish genJumps from others. + | B Target -- unconditional branching b/br. (To a blockid, label or register) + | BL Target [Reg] [Reg] -- branch and link (e.g. set x30 to next pc, and branch) + | BCOND Cond Target -- branch with condition. b.<cond> + + -- 8. Synchronization Instructions ----------------------------------------- + | DMBSY + -- 9. Floating Point Instructions + -- Float ConVerT + | FCVT Operand Operand + -- Signed ConVerT Float + | SCVTF Operand Operand + -- Float ConVerT to Zero Signed + | FCVTZS Operand Operand + +instance Show Instr where + show (LDR _f o1 o2) = "LDR " ++ show o1 ++ ", " ++ show o2 + show (MOV o1 o2) = "MOV " ++ show o1 ++ ", " ++ show o2 + show _ = "missing" + +data Target + = TBlock BlockId + | TLabel CLabel + | TReg Reg + + +-- Extension +-- {Unsigned|Signed}XT{Byte|Half|Word|Doube} +data ExtMode + = EUXTB | EUXTH | EUXTW | EUXTX + | ESXTB | ESXTH | ESXTW | ESXTX + deriving (Eq, Show) + +data ShiftMode + = SLSL | SLSR | SASR | SROR + deriving (Eq, Show) + + +-- We can also add ExtShift to Extension. +-- However at most 3bits. +type ExtShift = Int +-- at most 6bits +type RegShift = Int + +data Operand + = OpReg Width Reg -- register + | OpRegExt Width Reg ExtMode ExtShift -- rm, <ext>[, <shift left>] + | OpRegShift Width Reg ShiftMode RegShift -- rm, <shift>, <0-64> + | OpImm Imm -- immediate value + | OpImmShift Imm ShiftMode RegShift + | OpAddr AddrMode -- memory reference + deriving (Eq, Show) + +-- Smart constructors +opReg :: Width -> Reg -> Operand +opReg = OpReg + +xzr, wzr, sp, ip0 :: Operand +xzr = OpReg W64 (RegReal (RealRegSingle (-1))) +wzr = OpReg W32 (RegReal (RealRegSingle (-1))) +sp = OpReg W64 (RegReal (RealRegSingle 31)) +ip0 = OpReg W64 (RegReal (RealRegSingle 16)) + +_x :: Int -> Operand +_x i = OpReg W64 (RegReal (RealRegSingle i)) +x0, x1, x2, x3, x4, x5, x6, x7 :: Operand +x8, x9, x10, x11, x12, x13, x14, x15 :: Operand +x16, x17, x18, x19, x20, x21, x22, x23 :: Operand +x24, x25, x26, x27, x28, x29, x30, x31 :: Operand +x0 = OpReg W64 (RegReal (RealRegSingle 0)) +x1 = OpReg W64 (RegReal (RealRegSingle 1)) +x2 = OpReg W64 (RegReal (RealRegSingle 2)) +x3 = OpReg W64 (RegReal (RealRegSingle 3)) +x4 = OpReg W64 (RegReal (RealRegSingle 4)) +x5 = OpReg W64 (RegReal (RealRegSingle 5)) +x6 = OpReg W64 (RegReal (RealRegSingle 6)) +x7 = OpReg W64 (RegReal (RealRegSingle 7)) +x8 = OpReg W64 (RegReal (RealRegSingle 8)) +x9 = OpReg W64 (RegReal (RealRegSingle 9)) +x10 = OpReg W64 (RegReal (RealRegSingle 10)) +x11 = OpReg W64 (RegReal (RealRegSingle 11)) +x12 = OpReg W64 (RegReal (RealRegSingle 12)) +x13 = OpReg W64 (RegReal (RealRegSingle 13)) +x14 = OpReg W64 (RegReal (RealRegSingle 14)) +x15 = OpReg W64 (RegReal (RealRegSingle 15)) +x16 = OpReg W64 (RegReal (RealRegSingle 16)) +x17 = OpReg W64 (RegReal (RealRegSingle 17)) +x18 = OpReg W64 (RegReal (RealRegSingle 18)) +x19 = OpReg W64 (RegReal (RealRegSingle 19)) +x20 = OpReg W64 (RegReal (RealRegSingle 20)) +x21 = OpReg W64 (RegReal (RealRegSingle 21)) +x22 = OpReg W64 (RegReal (RealRegSingle 22)) +x23 = OpReg W64 (RegReal (RealRegSingle 23)) +x24 = OpReg W64 (RegReal (RealRegSingle 24)) +x25 = OpReg W64 (RegReal (RealRegSingle 25)) +x26 = OpReg W64 (RegReal (RealRegSingle 26)) +x27 = OpReg W64 (RegReal (RealRegSingle 27)) +x28 = OpReg W64 (RegReal (RealRegSingle 28)) +x29 = OpReg W64 (RegReal (RealRegSingle 29)) +x30 = OpReg W64 (RegReal (RealRegSingle 30)) +x31 = OpReg W64 (RegReal (RealRegSingle 31)) + +_d :: Int -> Operand +_d = OpReg W64 . RegReal . RealRegSingle +d0, d1, d2, d3, d4, d5, d6, d7 :: Operand +d8, d9, d10, d11, d12, d13, d14, d15 :: Operand +d16, d17, d18, d19, d20, d21, d22, d23 :: Operand +d24, d25, d26, d27, d28, d29, d30, d31 :: Operand +d0 = OpReg W64 (RegReal (RealRegSingle 32)) +d1 = OpReg W64 (RegReal (RealRegSingle 33)) +d2 = OpReg W64 (RegReal (RealRegSingle 34)) +d3 = OpReg W64 (RegReal (RealRegSingle 35)) +d4 = OpReg W64 (RegReal (RealRegSingle 36)) +d5 = OpReg W64 (RegReal (RealRegSingle 37)) +d6 = OpReg W64 (RegReal (RealRegSingle 38)) +d7 = OpReg W64 (RegReal (RealRegSingle 39)) +d8 = OpReg W64 (RegReal (RealRegSingle 40)) +d9 = OpReg W64 (RegReal (RealRegSingle 41)) +d10 = OpReg W64 (RegReal (RealRegSingle 42)) +d11 = OpReg W64 (RegReal (RealRegSingle 43)) +d12 = OpReg W64 (RegReal (RealRegSingle 44)) +d13 = OpReg W64 (RegReal (RealRegSingle 45)) +d14 = OpReg W64 (RegReal (RealRegSingle 46)) +d15 = OpReg W64 (RegReal (RealRegSingle 47)) +d16 = OpReg W64 (RegReal (RealRegSingle 48)) +d17 = OpReg W64 (RegReal (RealRegSingle 49)) +d18 = OpReg W64 (RegReal (RealRegSingle 50)) +d19 = OpReg W64 (RegReal (RealRegSingle 51)) +d20 = OpReg W64 (RegReal (RealRegSingle 52)) +d21 = OpReg W64 (RegReal (RealRegSingle 53)) +d22 = OpReg W64 (RegReal (RealRegSingle 54)) +d23 = OpReg W64 (RegReal (RealRegSingle 55)) +d24 = OpReg W64 (RegReal (RealRegSingle 56)) +d25 = OpReg W64 (RegReal (RealRegSingle 57)) +d26 = OpReg W64 (RegReal (RealRegSingle 58)) +d27 = OpReg W64 (RegReal (RealRegSingle 59)) +d28 = OpReg W64 (RegReal (RealRegSingle 60)) +d29 = OpReg W64 (RegReal (RealRegSingle 61)) +d30 = OpReg W64 (RegReal (RealRegSingle 62)) +d31 = OpReg W64 (RegReal (RealRegSingle 63)) + +opRegUExt :: Width -> Reg -> Operand +opRegUExt W64 r = OpRegExt W64 r EUXTX 0 +opRegUExt W32 r = OpRegExt W32 r EUXTW 0 +opRegUExt W16 r = OpRegExt W16 r EUXTH 0 +opRegUExt W8 r = OpRegExt W8 r EUXTB 0 +opRegUExt w _r = pprPanic "opRegUExt" (text $ show w) + +opRegSExt :: Width -> Reg -> Operand +opRegSExt W64 r = OpRegExt W64 r ESXTX 0 +opRegSExt W32 r = OpRegExt W32 r ESXTW 0 +opRegSExt W16 r = OpRegExt W16 r ESXTH 0 +opRegSExt W8 r = OpRegExt W8 r ESXTB 0 +opRegSExt w _r = pprPanic "opRegSExt" (text $ show w) diff --git a/compiler/GHC/CmmToAsm/AArch64/Ppr.hs b/compiler/GHC/CmmToAsm/AArch64/Ppr.hs new file mode 100644 index 0000000000..3f413339c2 --- /dev/null +++ b/compiler/GHC/CmmToAsm/AArch64/Ppr.hs @@ -0,0 +1,587 @@ +{-# OPTIONS_GHC -fno-warn-orphans #-} +{-# LANGUAGE CPP #-} + +module GHC.CmmToAsm.AArch64.Ppr (pprNatCmmDecl, pprInstr) where + +import GHC.Prelude hiding (EQ) + +import Data.Word +import qualified Data.Array.Unsafe as U ( castSTUArray ) +import Data.Array.ST +import Control.Monad.ST + +import GHC.CmmToAsm.AArch64.Instr +import GHC.CmmToAsm.AArch64.Regs +import GHC.CmmToAsm.AArch64.Cond +import GHC.CmmToAsm.Ppr +import GHC.CmmToAsm.Format +import GHC.Platform.Reg +import GHC.CmmToAsm.Config +import GHC.CmmToAsm.Types +import GHC.CmmToAsm.Utils + +import GHC.Cmm hiding (topInfoTable) +import GHC.Cmm.Dataflow.Collections +import GHC.Cmm.Dataflow.Label +import GHC.Types.Basic (Alignment, mkAlignment, alignmentBytes) + +import GHC.Cmm.BlockId +import GHC.Cmm.CLabel +import GHC.Cmm.Ppr.Expr () -- For Outputable instances + +import GHC.Types.Unique ( pprUniqueAlways, getUnique ) +import GHC.Platform +import GHC.Utils.Outputable + +import GHC.Utils.Panic + +pprProcAlignment :: NCGConfig -> SDoc +pprProcAlignment config = maybe empty (pprAlign platform . mkAlignment) (ncgProcAlignment config) + where + platform = ncgPlatform config + +pprNatCmmDecl :: NCGConfig -> NatCmmDecl RawCmmStatics Instr -> SDoc +pprNatCmmDecl config (CmmData section dats) = + pprSectionAlign config section $$ pprDatas config dats + +pprNatCmmDecl config proc@(CmmProc top_info lbl _ (ListGraph blocks)) = + let platform = ncgPlatform config in + pprProcAlignment config $$ + case topInfoTable proc of + Nothing -> + -- special case for code without info table: + pprSectionAlign config (Section Text lbl) $$ + -- do not + -- pprProcAlignment config $$ + pprLabel platform lbl $$ -- blocks guaranteed not null, so label needed + vcat (map (pprBasicBlock config top_info) blocks) $$ + (if ncgDwarfEnabled config + then ppr (mkAsmTempEndLabel lbl) <> char ':' else empty) $$ + pprSizeDecl platform lbl + + Just (CmmStaticsRaw info_lbl _) -> + pprSectionAlign config (Section Text info_lbl) $$ + -- pprProcAlignment config $$ + (if platformHasSubsectionsViaSymbols platform + then ppr (mkDeadStripPreventer info_lbl) <> char ':' + else empty) $$ + vcat (map (pprBasicBlock config top_info) blocks) $$ + -- above: Even the first block gets a label, because with branch-chain + -- elimination, it might be the target of a goto. + (if platformHasSubsectionsViaSymbols platform + then -- See Note [Subsections Via Symbols] + text "\t.long " + <+> ppr info_lbl + <+> char '-' + <+> ppr (mkDeadStripPreventer info_lbl) + else empty) $$ + pprSizeDecl platform info_lbl + +pprLabel :: Platform -> CLabel -> SDoc +pprLabel platform lbl = + pprGloblDecl platform lbl + $$ pprTypeDecl platform lbl + $$ (pdoc platform lbl <> char ':') + +pprAlign :: Platform -> Alignment -> SDoc +pprAlign _platform alignment + = text "\t.balign " <> int (alignmentBytes alignment) + +-- | Print appropriate alignment for the given section type. +pprAlignForSection :: Platform -> SectionType -> SDoc +pprAlignForSection _platform _seg + -- .balign is stable, whereas .align is platform dependent. + = text "\t.balign 8" -- always 8 + +instance Outputable Instr where + ppr = pprInstr genericPlatform + +-- | Print section header and appropriate alignment for that section. +-- +-- This one will emit the header: +-- +-- .section .text +-- .balign 8 +-- +pprSectionAlign :: NCGConfig -> Section -> SDoc +pprSectionAlign _config (Section (OtherSection _) _) = + panic "AArch64.Ppr.pprSectionAlign: unknown section" +pprSectionAlign config sec@(Section seg _) = + pprSectionHeader config sec + $$ pprAlignForSection (ncgPlatform config) seg + +-- | Output the ELF .size directive. +pprSizeDecl :: Platform -> CLabel -> SDoc +pprSizeDecl platform lbl + = if osElfTarget (platformOS platform) + then text "\t.size" <+> pdoc platform lbl <> text ", .-" <> pdoc platform lbl + else empty + +pprBasicBlock :: NCGConfig -> LabelMap RawCmmStatics -> NatBasicBlock Instr + -> SDoc +pprBasicBlock config info_env (BasicBlock blockid instrs) + = maybe_infotable $ + pprLabel platform asmLbl $$ + vcat (map (pprInstr platform) (id {-detectTrivialDeadlock-} optInstrs)) $$ + (if ncgDwarfEnabled config + then ppr (mkAsmTempEndLabel asmLbl) <> char ':' + else empty + ) + where + -- Filter out identity moves. E.g. mov x18, x18 will be dropped. + optInstrs = filter f instrs + where f (MOV o1 o2) | o1 == o2 = False + f _ = True + + asmLbl = blockLbl blockid + platform = ncgPlatform config + maybe_infotable c = case mapLookup blockid info_env of + Nothing -> c + Just (CmmStaticsRaw info_lbl info) -> + -- pprAlignForSection platform Text $$ + infoTableLoc $$ + vcat (map (pprData config) info) $$ + pprLabel platform info_lbl $$ + c $$ + (if ncgDwarfEnabled config + then ppr (mkAsmTempEndLabel info_lbl) <> char ':' + else empty) + -- Make sure the info table has the right .loc for the block + -- coming right after it. See [Note: Info Offset] + infoTableLoc = case instrs of + (l@LOCATION{} : _) -> pprInstr platform l + _other -> empty + +pprDatas :: NCGConfig -> RawCmmStatics -> SDoc +-- See Note [emit-time elimination of static indirections] in "GHC.Cmm.CLabel". +pprDatas config (CmmStaticsRaw alias [CmmStaticLit (CmmLabel lbl), CmmStaticLit ind, _, _]) + | lbl == mkIndStaticInfoLabel + , let labelInd (CmmLabelOff l _) = Just l + labelInd (CmmLabel l) = Just l + labelInd _ = Nothing + , Just ind' <- labelInd ind + , alias `mayRedirectTo` ind' + = pprGloblDecl (ncgPlatform config) alias + $$ text ".equiv" <+> pdoc (ncgPlatform config) alias <> comma <> pdoc (ncgPlatform config) (CmmLabel ind') + +pprDatas config (CmmStaticsRaw lbl dats) + = vcat (pprLabel platform lbl : map (pprData config) dats) + where + platform = ncgPlatform config + +pprData :: NCGConfig -> CmmStatic -> SDoc +pprData _config (CmmString str) = pprString str +pprData _config (CmmFileEmbed path) = pprFileEmbed path + +pprData config (CmmUninitialised bytes) + = let platform = ncgPlatform config + in if platformOS platform == OSDarwin + then text ".space " <> int bytes + else text ".skip " <> int bytes + +pprData config (CmmStaticLit lit) = pprDataItem config lit + +pprGloblDecl :: Platform -> CLabel -> SDoc +pprGloblDecl platform lbl + | not (externallyVisibleCLabel lbl) = empty + | otherwise = text "\t.globl " <> pdoc platform lbl + +-- Note [Always use objects for info tables] +-- See discussion in X86.Ppr +-- for why this is necessary. Essentially we need to ensure that we never +-- pass function symbols when we migth want to lookup the info table. If we +-- did, we could end up with procedure linking tables (PLT)s, and thus the +-- lookup wouldn't point to the function, but into the jump table. +-- +-- Fun fact: The LLVMMangler exists to patch this issue su on the LLVM side as +-- well. +pprLabelType' :: Platform -> CLabel -> SDoc +pprLabelType' platform lbl = + if isCFunctionLabel lbl || functionOkInfoTable then + text "@function" + else + text "@object" + where + functionOkInfoTable = platformTablesNextToCode platform && + isInfoTableLabel lbl && not (isConInfoTableLabel lbl) + +-- this is called pprTypeAndSizeDecl in PPC.Ppr +pprTypeDecl :: Platform -> CLabel -> SDoc +pprTypeDecl platform lbl + = if osElfTarget (platformOS platform) && externallyVisibleCLabel lbl + then text ".type " <> pdoc platform lbl <> text ", " <> pprLabelType' platform lbl + else empty + +pprDataItem :: NCGConfig -> CmmLit -> SDoc +pprDataItem config lit + = vcat (ppr_item (cmmTypeFormat $ cmmLitType platform lit) lit) + where + platform = ncgPlatform config + + imm = litToImm lit + + ppr_item II8 _ = [text "\t.byte\t" <> pprImm platform imm] + ppr_item II16 _ = [text "\t.short\t" <> pprImm platform imm] + ppr_item II32 _ = [text "\t.long\t" <> pprImm platform imm] + ppr_item II64 _ = [text "\t.quad\t" <> pprImm platform imm] + + ppr_item FF32 (CmmFloat r _) + = let bs = floatToBytes (fromRational r) + in map (\b -> text "\t.byte\t" <> pprImm platform (ImmInt b)) bs + + ppr_item FF64 (CmmFloat r _) + = let bs = doubleToBytes (fromRational r) + in map (\b -> text "\t.byte\t" <> pprImm platform (ImmInt b)) bs + + ppr_item _ _ = pprPanic "pprDataItem:ppr_item" (text $ show lit) + +floatToBytes :: Float -> [Int] +floatToBytes f + = runST (do + arr <- newArray_ ((0::Int),3) + writeArray arr 0 f + arr <- castFloatToWord8Array arr + i0 <- readArray arr 0 + i1 <- readArray arr 1 + i2 <- readArray arr 2 + i3 <- readArray arr 3 + return (map fromIntegral [i0,i1,i2,i3]) + ) + +castFloatToWord8Array :: STUArray s Int Float -> ST s (STUArray s Int Word8) +castFloatToWord8Array = U.castSTUArray + +pprImm :: Platform -> Imm -> SDoc +pprImm _ (ImmInt i) = int i +pprImm _ (ImmInteger i) = integer i +pprImm p (ImmCLbl l) = pdoc p l +pprImm p (ImmIndex l i) = pdoc p l <> char '+' <> int i +pprImm _ (ImmLit s) = s + +-- TODO: See pprIm below for why this is a bad idea! +pprImm _ (ImmFloat f) + | f == 0 = text "wzr" + | otherwise = float (fromRational f) +pprImm _ (ImmDouble d) + | d == 0 = text "xzr" + | otherwise = double (fromRational d) + +pprImm p (ImmConstantSum a b) = pprImm p a <> char '+' <> pprImm p b +pprImm p (ImmConstantDiff a b) = pprImm p a <> char '-' + <> lparen <> pprImm p b <> rparen + + +-- aarch64 GNU as uses // for comments. +asmComment :: SDoc -> SDoc +asmComment c = whenPprDebug $ text "#" <+> c + +asmDoubleslashComment :: SDoc -> SDoc +asmDoubleslashComment c = whenPprDebug $ text "//" <+> c + +asmMultilineComment :: SDoc -> SDoc +asmMultilineComment c = whenPprDebug $ text "/*" $+$ c $+$ text "*/" + +pprIm :: Platform -> Imm -> SDoc +pprIm platform im = case im of + ImmInt i -> char '#' <> int i + ImmInteger i -> char '#' <> integer i + + -- TODO: This will only work for + -- The floating point value must be expressable as ±n ÷ 16 × 2^r, + -- where n and r are integers such that 16 ≤ n ≤ 31 and -3 ≤ r ≤ 4. + -- and 0 needs to be encoded as wzr/xzr. + -- + -- Except for 0, we might want to either split it up into enough + -- ADD operations into an Integer register and then just bit copy it into + -- the double register? See the toBytes + fromRational above for data items. + -- This is something the x86 backend does. + -- + -- We could also just turn them into statics :-/ Which is what the + -- PowerPC backend odes. + ImmFloat f | f == 0 -> text "wzr" + ImmFloat f -> char '#' <> float (fromRational f) + ImmDouble d | d == 0 -> text "xzr" + ImmDouble d -> char '#' <> double (fromRational d) + -- =<lbl> pseudo instruction! + ImmCLbl l -> char '=' <> pdoc platform l + ImmIndex l o -> text "[=" <> pdoc platform l <> comma <+> char '#' <> int o <> char ']' + _ -> panic "AArch64.pprIm" + +pprExt :: ExtMode -> SDoc +pprExt EUXTB = text "uxtb" +pprExt EUXTH = text "uxth" +pprExt EUXTW = text "uxtw" +pprExt EUXTX = text "uxtx" +pprExt ESXTB = text "sxtb" +pprExt ESXTH = text "sxth" +pprExt ESXTW = text "sxtw" +pprExt ESXTX = text "sxtx" + +pprShift :: ShiftMode -> SDoc +pprShift SLSL = text "lsl" +pprShift SLSR = text "lsr" +pprShift SASR = text "asr" +pprShift SROR = text "ror" + +pprOp :: Platform -> Operand -> SDoc +pprOp plat op = case op of + OpReg w r -> pprReg w r + OpRegExt w r x 0 -> pprReg w r <> comma <+> pprExt x + OpRegExt w r x i -> pprReg w r <> comma <+> pprExt x <> comma <+> char '#' <> int i + OpRegShift w r s i -> pprReg w r <> comma <+> pprShift s <> comma <+> char '#' <> int i + OpImm im -> pprIm plat im + OpImmShift im s i -> pprIm plat im <> comma <+> pprShift s <+> char '#' <> int i + -- TODO: Address compuation always use registers as 64bit -- is this correct? + OpAddr (AddrRegReg r1 r2) -> char '[' <+> pprReg W64 r1 <> comma <+> pprReg W64 r2 <+> char ']' + OpAddr (AddrRegImm r1 im) -> char '[' <+> pprReg W64 r1 <> comma <+> pprImm plat im <+> char ']' + OpAddr (AddrReg r1) -> char '[' <+> pprReg W64 r1 <+> char ']' + +pprReg :: Width -> Reg -> SDoc +pprReg w r = case r of + RegReal (RealRegSingle i) -> ppr_reg_no w i + RegReal (RealRegPair{}) -> panic "AArch64.pprReg: no reg pairs on this arch!" + -- virtual regs should not show up, but this is helpful for debugging. + RegVirtual (VirtualRegI u) -> text "%vI_" <> pprUniqueAlways u + RegVirtual (VirtualRegF u) -> text "%vF_" <> pprUniqueAlways u + RegVirtual (VirtualRegD u) -> text "%vD_" <> pprUniqueAlways u + _ -> pprPanic "AArch64.pprReg" (text $ show r) + + where + ppr_reg_no :: Width -> Int -> SDoc + ppr_reg_no w 31 + | w == W64 = text "sp" + | w == W32 = text "wsp" + + ppr_reg_no w i + | i < 0, w == W32 = text "wzr" + | i < 0, w == W64 = text "xzr" + | i < 0 = pprPanic "Invalid Zero Reg" (ppr w <+> int i) + -- General Purpose Registers + | i <= 31, w == W8 = text "w" <> int i -- there are no byte or half + | i <= 31, w == W16 = text "w" <> int i -- words... word will do. + | i <= 31, w == W32 = text "w" <> int i + | i <= 31, w == W64 = text "x" <> int i + | i <= 31 = pprPanic "Invalid Reg" (ppr w <+> int i) + -- Floating Point Registers + | i <= 63, w == W8 = text "b" <> int (i-32) + | i <= 63, w == W16 = text "h" <> int (i-32) + | i <= 63, w == W32 = text "s" <> int (i-32) + | i <= 63, w == W64 = text "d" <> int (i-32) + -- no support for 'q'uad in GHC's NCG yet. + | otherwise = text "very naughty powerpc register" + +isFloatOp :: Operand -> Bool +isFloatOp (OpReg _ (RegReal (RealRegSingle i))) | i > 31 = True +isFloatOp (OpReg _ (RegVirtual (VirtualRegF _))) = True +isFloatOp (OpReg _ (RegVirtual (VirtualRegD _))) = True +isFloatOp _ = False + +pprInstr :: Platform -> Instr -> SDoc +pprInstr platform instr = case instr of + -- Meta Instructions --------------------------------------------------------- + COMMENT s -> asmComment s + MULTILINE_COMMENT s -> asmMultilineComment s + ANN d i -> pprInstr platform i <+> asmDoubleslashComment d + LOCATION file line col _name + -> text "\t.loc" <+> ppr file <+> ppr line <+> ppr col + DELTA d -> asmComment $ text ("\tdelta = " ++ show d) + NEWBLOCK _ -> panic "PprInstr: NEWBLOCK" + LDATA _ _ -> panic "pprInstr: LDATA" + + -- Pseudo Instructions ------------------------------------------------------- + + PUSH_STACK_FRAME -> text "\tstp x29, x30, [sp, #-16]!" + $$ text "\tmov x29, sp" + + POP_STACK_FRAME -> text "\tldp x29, x30, [sp], #16" + -- =========================================================================== + -- AArch64 Instruction Set + -- 1. Arithmetic Instructions ------------------------------------------------ + ADD o1 o2 o3 + | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 -> text "\tfadd" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 + | otherwise -> text "\tadd" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 + CMN o1 o2 -> text "\tcmn" <+> pprOp platform o1 <> comma <+> pprOp platform o2 + CMP o1 o2 + | isFloatOp o1 && isFloatOp o2 -> text "\tfcmp" <+> pprOp platform o1 <> comma <+> pprOp platform o2 + | otherwise -> text "\tcmp" <+> pprOp platform o1 <> comma <+> pprOp platform o2 + MSUB o1 o2 o3 o4 -> text "\tmsub" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 <> comma <+> pprOp platform o4 + MUL o1 o2 o3 + | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 -> text "\tfmul" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 + | otherwise -> text "\tmul" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 + NEG o1 o2 + | isFloatOp o1 && isFloatOp o2 -> text "\tfneg" <+> pprOp platform o1 <> comma <+> pprOp platform o2 + | otherwise -> text "\tneg" <+> pprOp platform o1 <> comma <+> pprOp platform o2 + SDIV o1 o2 o3 | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 + -> text "\tfdiv" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 + SDIV o1 o2 o3 -> text "\tsdiv" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 + + SUB o1 o2 o3 + | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 -> text "\tfsub" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 + | otherwise -> text "\tsub" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 + UDIV o1 o2 o3 -> text "\tudiv" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 + + -- 2. Bit Manipulation Instructions ------------------------------------------ + SBFM o1 o2 o3 o4 -> text "\tsbfm" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 <> comma <+> pprOp platform o4 + UBFM o1 o2 o3 o4 -> text "\tubfm" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 <> comma <+> pprOp platform o4 + -- 3. Logical and Move Instructions ------------------------------------------ + AND o1 o2 o3 -> text "\tand" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 + ANDS o1 o2 o3 -> text "\tands" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 + ASR o1 o2 o3 -> text "\tasr" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 + BIC o1 o2 o3 -> text "\tbic" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 + BICS o1 o2 o3 -> text "\tbics" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 + EON o1 o2 o3 -> text "\teon" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 + EOR o1 o2 o3 -> text "\teor" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 + LSL o1 o2 o3 -> text "\tlsl" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 + LSR o1 o2 o3 -> text "\tlsr" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 + MOV o1 o2 + | isFloatOp o1 || isFloatOp o2 -> text "\tfmov" <+> pprOp platform o1 <> comma <+> pprOp platform o2 + | otherwise -> text "\tmov" <+> pprOp platform o1 <> comma <+> pprOp platform o2 + MOVK o1 o2 -> text "\tmovk" <+> pprOp platform o1 <> comma <+> pprOp platform o2 + MVN o1 o2 -> text "\tmvn" <+> pprOp platform o1 <> comma <+> pprOp platform o2 + ORN o1 o2 o3 -> text "\torn" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 + ORR o1 o2 o3 -> text "\torr" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 + ROR o1 o2 o3 -> text "\tror" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 + TST o1 o2 -> text "\ttst" <+> pprOp platform o1 <> comma <+> pprOp platform o2 + + -- 4. Branch Instructions ---------------------------------------------------- + J t -> pprInstr platform (B t) + B (TBlock bid) -> text "\tb" <+> pdoc platform (mkLocalBlockLabel (getUnique bid)) + B (TLabel lbl) -> text "\tb" <+> pdoc platform lbl + B (TReg r) -> text "\tbr" <+> pprReg W64 r + + BL (TBlock bid) _ _ -> text "\tbl" <+> pdoc platform (mkLocalBlockLabel (getUnique bid)) + BL (TLabel lbl) _ _ -> text "\tbl" <+> pdoc platform lbl + BL (TReg r) _ _ -> text "\tblr" <+> pprReg W64 r + + BCOND c (TBlock bid) -> text "\t" <> pprBcond c <+> pdoc platform (mkLocalBlockLabel (getUnique bid)) + BCOND c (TLabel lbl) -> text "\t" <> pprBcond c <+> pdoc platform lbl + BCOND _ (TReg _) -> panic "AArch64.ppr: No conditional branching to registers!" + + -- 5. Atomic Instructions ---------------------------------------------------- + -- 6. Conditional Instructions ----------------------------------------------- + CSET o c -> text "\tcset" <+> pprOp platform o <> comma <+> pprCond c + + CBZ o (TBlock bid) -> text "\tcbz" <+> pprOp platform o <> comma <+> pdoc platform (mkLocalBlockLabel (getUnique bid)) + CBZ o (TLabel lbl) -> text "\tcbz" <+> pprOp platform o <> comma <+> pdoc platform lbl + CBZ _ (TReg _) -> panic "AArch64.ppr: No conditional (cbz) branching to registers!" + + CBNZ o (TBlock bid) -> text "\tcbnz" <+> pprOp platform o <> comma <+> pdoc platform (mkLocalBlockLabel (getUnique bid)) + CBNZ o (TLabel lbl) -> text "\tcbnz" <+> pprOp platform o <> comma <+> pdoc platform lbl + CBNZ _ (TReg _) -> panic "AArch64.ppr: No conditional (cbnz) branching to registers!" + + -- 7. Load and Store Instructions -------------------------------------------- + -- NOTE: GHC may do whacky things where it only load the lower part of an + -- address. Not observing the correct size when loading will lead + -- inevitably to crashes. + STR _f o1@(OpReg W8 (RegReal (RealRegSingle i))) o2 | i < 32 -> + text "\tstrb" <+> pprOp platform o1 <> comma <+> pprOp platform o2 + STR _f o1@(OpReg W16 (RegReal (RealRegSingle i))) o2 | i < 32 -> + text "\tstrh" <+> pprOp platform o1 <> comma <+> pprOp platform o2 + STR _f o1 o2 -> text "\tstr" <+> pprOp platform o1 <> comma <+> pprOp platform o2 + +#if defined(darwin_HOST_OS) + LDR _f o1 (OpImm (ImmIndex lbl' off)) | Just (_info, lbl) <- dynamicLinkerLabelInfo lbl' -> + text "\tadrp" <+> pprOp platform o1 <> comma <+> pdoc platform lbl <> text "@gotpage" $$ + text "\tldr" <+> pprOp platform o1 <> comma <+> text "[" <> pprOp platform o1 <> comma <+> pdoc platform lbl <> text "@gotpageoff" <> text "]" $$ + text "\tadd" <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> char '#' <> int off -- TODO: check that off is in 12bits. + + LDR _f o1 (OpImm (ImmIndex lbl off)) | isForeignLabel lbl -> + text "\tadrp" <+> pprOp platform o1 <> comma <+> pdoc platform lbl <> text "@gotpage" $$ + text "\tldr" <+> pprOp platform o1 <> comma <+> text "[" <> pprOp platform o1 <> comma <+> pdoc platform lbl <> text "@gotpageoff" <> text "]" $$ + text "\tadd" <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> char '#' <> int off -- TODO: check that off is in 12bits. + + LDR _f o1 (OpImm (ImmIndex lbl off)) -> + text "\tadrp" <+> pprOp platform o1 <> comma <+> pdoc platform lbl <> text "@page" $$ + text "\tadd" <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> pdoc platform lbl <> text "@pageoff" $$ + text "\tadd" <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> char '#' <> int off -- TODO: check that off is in 12bits. + + LDR _f o1 (OpImm (ImmCLbl lbl')) | Just (_info, lbl) <- dynamicLinkerLabelInfo lbl' -> + text "\tadrp" <+> pprOp platform o1 <> comma <+> pdoc platform lbl <> text "@gotpage" $$ + text "\tldr" <+> pprOp platform o1 <> comma <+> text "[" <> pprOp platform o1 <> comma <+> pdoc platform lbl <> text "@gotpageoff" <> text "]" + + LDR _f o1 (OpImm (ImmCLbl lbl)) | isForeignLabel lbl -> + text "\tadrp" <+> pprOp platform o1 <> comma <+> pdoc platform lbl <> text "@gotpage" $$ + text "\tldr" <+> pprOp platform o1 <> comma <+> text "[" <> pprOp platform o1 <> comma <+> pdoc platform lbl <> text "@gotpageoff" <> text "]" + + LDR _f o1 (OpImm (ImmCLbl lbl)) -> + text "\tadrp" <+> pprOp platform o1 <> comma <+> pdoc platform lbl <> text "@page" $$ + text "\tadd" <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> pdoc platform lbl <> text "@pageoff" +#else + LDR _f o1 (OpImm (ImmIndex lbl' off)) | Just (_info, lbl) <- dynamicLinkerLabelInfo lbl' -> + text "\tadrp" <+> pprOp platform o1 <> comma <+> text ":got:" <> pdoc platform lbl $$ + text "\tldr" <+> pprOp platform o1 <> comma <+> text "[" <> pprOp platform o1 <> comma <+> text ":got_lo12:" <> pdoc platform lbl <> text "]" $$ + text "\tadd" <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> char '#' <> int off -- TODO: check that off is in 12bits. + + LDR _f o1 (OpImm (ImmIndex lbl off)) | isForeignLabel lbl -> + text "\tadrp" <+> pprOp platform o1 <> comma <+> text ":got:" <> pdoc platform lbl $$ + text "\tldr" <+> pprOp platform o1 <> comma <+> text "[" <> pprOp platform o1 <> comma <+> text ":got_lo12:" <> pdoc platform lbl <> text "]" $$ + text "\tadd" <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> char '#' <> int off -- TODO: check that off is in 12bits. + + LDR _f o1 (OpImm (ImmIndex lbl off)) -> + text "\tadrp" <+> pprOp platform o1 <> comma <+> pdoc platform lbl $$ + text "\tadd" <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> text ":lo12:" <> pdoc platform lbl $$ + text "\tadd" <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> char '#' <> int off -- TODO: check that off is in 12bits. + + LDR _f o1 (OpImm (ImmCLbl lbl')) | Just (_info, lbl) <- dynamicLinkerLabelInfo lbl' -> + text "\tadrp" <+> pprOp platform o1 <> comma <+> text ":got:" <> pdoc platform lbl $$ + text "\tldr" <+> pprOp platform o1 <> comma <+> text "[" <> pprOp platform o1 <> comma <+> text ":got_lo12:" <> pdoc platform lbl <> text "]" + + LDR _f o1 (OpImm (ImmCLbl lbl)) | isForeignLabel lbl -> + text "\tadrp" <+> pprOp platform o1 <> comma <+> text ":got:" <> pdoc platform lbl $$ + text "\tldr" <+> pprOp platform o1 <> comma <+> text "[" <> pprOp platform o1 <> comma <+> text ":got_lo12:" <> pdoc platform lbl <> text "]" + + LDR _f o1 (OpImm (ImmCLbl lbl)) -> + text "\tadrp" <+> pprOp platform o1 <> comma <+> pdoc platform lbl $$ + text "\tadd" <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> text ":lo12:" <> pdoc platform lbl +#endif + + LDR _f o1@(OpReg W8 (RegReal (RealRegSingle i))) o2 | i < 32 -> + text "\tldrsb" <+> pprOp platform o1 <> comma <+> pprOp platform o2 + LDR _f o1@(OpReg W16 (RegReal (RealRegSingle i))) o2 | i < 32 -> + text "\tldrsh" <+> pprOp platform o1 <> comma <+> pprOp platform o2 + LDR _f o1 o2 -> text "\tldr" <+> pprOp platform o1 <> comma <+> pprOp platform o2 + + STP _f o1 o2 o3 -> text "\tstp" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 + LDP _f o1 o2 o3 -> text "\tldp" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 + + -- 8. Synchronization Instructions ------------------------------------------- + DMBSY -> text "\tdmb sy" + -- 8. Synchronization Instructions ------------------------------------------- + FCVT o1 o2 -> text "\tfcvt" <+> pprOp platform o1 <> comma <+> pprOp platform o2 + SCVTF o1 o2 -> text "\tscvtf" <+> pprOp platform o1 <> comma <+> pprOp platform o2 + FCVTZS o1 o2 -> text "\tfcvtzs" <+> pprOp platform o1 <> comma <+> pprOp platform o2 + +pprBcond :: Cond -> SDoc +pprBcond c = text "b." <> pprCond c + +pprCond :: Cond -> SDoc +pprCond c = case c of + ALWAYS -> text "al" -- Always + EQ -> text "eq" -- Equal + NE -> text "ne" -- Not Equal + + SLT -> text "lt" -- Signed less than ; Less than, or unordered + SLE -> text "le" -- Signed less than or equal ; Less than or equal, or unordered + SGE -> text "ge" -- Signed greater than or equal ; Greater than or equal + SGT -> text "gt" -- Signed greater than ; Greater than + + ULT -> text "lo" -- Carry clear/ unsigned lower ; less than + ULE -> text "ls" -- Unsigned lower or same ; Less than or equal + UGE -> text "hs" -- Carry set/unsigned higher or same ; Greater than or equal, or unordered + UGT -> text "hi" -- Unsigned higher ; Greater than, or unordered + + NEVER -> text "nv" -- Never + VS -> text "vs" -- Overflow ; Unordered (at least one NaN operand) + VC -> text "vc" -- No overflow ; Not unordered + + -- Orderd variants. Respecting NaN. + OLT -> text "mi" + OLE -> text "ls" + OGE -> text "ge" + OGT -> text "gt" + + -- Unordered + UOLT -> text "lt" + UOLE -> text "le" + UOGE -> text "pl" + UOGT -> text "hi" diff --git a/compiler/GHC/CmmToAsm/AArch64/RegInfo.hs b/compiler/GHC/CmmToAsm/AArch64/RegInfo.hs new file mode 100644 index 0000000000..8c3d081e92 --- /dev/null +++ b/compiler/GHC/CmmToAsm/AArch64/RegInfo.hs @@ -0,0 +1,31 @@ +module GHC.CmmToAsm.AArch64.RegInfo where + +import GHC.Prelude + +import GHC.CmmToAsm.AArch64.Instr +import GHC.Cmm.BlockId +import GHC.Cmm + +import GHC.Utils.Outputable + +data JumpDest = DestBlockId BlockId + +-- Debug Instance +instance Outputable JumpDest where + ppr (DestBlockId bid) = text "jd<blk>:" <> ppr bid + +-- TODO: documen what this does. See Ticket 19914 +getJumpDestBlockId :: JumpDest -> Maybe BlockId +getJumpDestBlockId (DestBlockId bid) = Just bid + +-- TODO: document what this does. See Ticket 19914 +canShortcut :: Instr -> Maybe JumpDest +canShortcut _ = Nothing + +-- TODO: document what this does. See Ticket 19914 +shortcutStatics :: (BlockId -> Maybe JumpDest) -> RawCmmStatics -> RawCmmStatics +shortcutStatics _ other_static = other_static + +-- TODO: document what this does. See Ticket 19914 +shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr +shortcutJump _ other = other diff --git a/compiler/GHC/CmmToAsm/AArch64/Regs.hs b/compiler/GHC/CmmToAsm/AArch64/Regs.hs new file mode 100644 index 0000000000..fd1669eeac --- /dev/null +++ b/compiler/GHC/CmmToAsm/AArch64/Regs.hs @@ -0,0 +1,167 @@ +{-# OPTIONS_GHC -fno-warn-orphans #-} +module GHC.CmmToAsm.AArch64.Regs where + +import GHC.Prelude + +import GHC.Platform.Reg +import GHC.Platform.Reg.Class +import GHC.CmmToAsm.Format + +import GHC.Cmm +import GHC.Cmm.CLabel ( CLabel ) +import GHC.Types.Unique + +import GHC.Platform.Regs +import GHC.Utils.Outputable +import GHC.Utils.Panic +import GHC.Platform + +allMachRegNos :: [RegNo] +allMachRegNos = [0..31] ++ [32..63] +-- allocatableRegs is allMachRegNos with the fixed-use regs removed. +-- i.e., these are the regs for which we are prepared to allow the +-- register allocator to attempt to map VRegs to. +allocatableRegs :: Platform -> [RealReg] +allocatableRegs platform + = let isFree i = freeReg platform i + in map RealRegSingle $ filter isFree allMachRegNos + + +-- argRegs is the set of regs which are read for an n-argument call to C. +allGpArgRegs :: [Reg] +allGpArgRegs = map regSingle [0..7] +allFpArgRegs :: [Reg] +allFpArgRegs = map regSingle [32..39] + +-- STG: +-- 19: Base +-- 20: Sp +-- 21: Hp +-- 22-27: R1-R6 +-- 28: SpLim + +-- This is the STG Sp reg. +-- sp :: Reg +-- sp = regSingle 20 + +-- addressing modes ------------------------------------------------------------ + +data AddrMode + = AddrRegReg Reg Reg + | AddrRegImm Reg Imm + | AddrReg Reg + deriving (Eq, Show) + +-- ----------------------------------------------------------------------------- +-- Immediates + +data Imm + = ImmInt Int + | ImmInteger Integer -- Sigh. + | ImmCLbl CLabel -- AbstractC Label (with baggage) + | ImmLit SDoc -- Simple string + | ImmIndex CLabel Int + | ImmFloat Rational + | ImmDouble Rational + | ImmConstantSum Imm Imm + | ImmConstantDiff Imm Imm + deriving (Eq, Show) + +instance Show SDoc where + show = showPprUnsafe . ppr + +instance Eq SDoc where + lhs == rhs = show lhs == show rhs + +strImmLit :: String -> Imm +strImmLit s = ImmLit (text s) + + +litToImm :: CmmLit -> Imm +litToImm (CmmInt i w) = ImmInteger (narrowS w i) + -- narrow to the width: a CmmInt might be out of + -- range, but we assume that ImmInteger only contains + -- in-range values. A signed value should be fine here. +litToImm (CmmFloat f W32) = ImmFloat f +litToImm (CmmFloat f W64) = ImmDouble f +litToImm (CmmLabel l) = ImmCLbl l +litToImm (CmmLabelOff l off) = ImmIndex l off +litToImm (CmmLabelDiffOff l1 l2 off _) + = ImmConstantSum + (ImmConstantDiff (ImmCLbl l1) (ImmCLbl l2)) + (ImmInt off) +litToImm _ = panic "AArch64.Regs.litToImm: no match" + + +-- == To satisfy GHC.CmmToAsm.Reg.Target ======================================= + +-- squeese functions for the graph allocator ----------------------------------- +-- | regSqueeze_class reg +-- Calculate the maximum number of register colors that could be +-- denied to a node of this class due to having this reg +-- as a neighbour. +-- +{-# INLINE virtualRegSqueeze #-} +virtualRegSqueeze :: RegClass -> VirtualReg -> Int +virtualRegSqueeze cls vr + = case cls of + RcInteger + -> case vr of + VirtualRegI{} -> 1 + VirtualRegHi{} -> 1 + _other -> 0 + + RcDouble + -> case vr of + VirtualRegD{} -> 1 + VirtualRegF{} -> 0 + _other -> 0 + + _other -> 0 + +{-# INLINE realRegSqueeze #-} +realRegSqueeze :: RegClass -> RealReg -> Int +realRegSqueeze cls rr + = case cls of + RcInteger + -> case rr of + RealRegSingle regNo + | regNo < 32 -> 1 -- first fp reg is 32 + | otherwise -> 0 + + RealRegPair{} -> 0 + + RcDouble + -> case rr of + RealRegSingle regNo + | regNo < 32 -> 0 + | otherwise -> 1 + + RealRegPair{} -> 0 + + _other -> 0 + +mkVirtualReg :: Unique -> Format -> VirtualReg +mkVirtualReg u format + | not (isFloatFormat format) = VirtualRegI u + | otherwise + = case format of + FF32 -> VirtualRegD u + FF64 -> VirtualRegD u + _ -> panic "AArch64.mkVirtualReg" + +{-# INLINE classOfRealReg #-} +classOfRealReg :: RealReg -> RegClass +classOfRealReg (RealRegSingle i) + | i < 32 = RcInteger + | otherwise = RcDouble + +classOfRealReg (RealRegPair{}) + = panic "regClass(ppr): no reg pairs on this architecture" + +regDotColor :: RealReg -> SDoc +regDotColor reg + = case classOfRealReg reg of + RcInteger -> text "blue" + RcFloat -> text "red" + RcDouble -> text "green" diff --git a/compiler/GHC/CmmToAsm/Dwarf/Constants.hs b/compiler/GHC/CmmToAsm/Dwarf/Constants.hs index e9047256e8..b8fb5706cb 100644 --- a/compiler/GHC/CmmToAsm/Dwarf/Constants.hs +++ b/compiler/GHC/CmmToAsm/Dwarf/Constants.hs @@ -216,6 +216,7 @@ dwarfRegNo p r = case platformArch p of | r == xmm14 -> 31 | r == xmm15 -> 32 ArchPPC_64 _ -> fromIntegral $ toRegNo r + ArchAArch64 -> fromIntegral $ toRegNo r _other -> error "dwarfRegNo: Unsupported platform or unknown register!" -- | Virtual register number to use for return address. @@ -228,4 +229,5 @@ dwarfReturnRegNo p ArchX86 -> 8 -- eip ArchX86_64 -> 16 -- rip ArchPPC_64 ELF_V2 -> 65 -- lr (link register) + ArchAArch64-> 30 _other -> error "dwarfReturnRegNo: Unsupported platform!" diff --git a/compiler/GHC/CmmToAsm/Format.hs b/compiler/GHC/CmmToAsm/Format.hs index 207de095ae..390ef29bd2 100644 --- a/compiler/GHC/CmmToAsm/Format.hs +++ b/compiler/GHC/CmmToAsm/Format.hs @@ -12,6 +12,7 @@ module GHC.CmmToAsm.Format ( Format(..), intFormat, floatFormat, + isIntFormat, isFloatFormat, cmmTypeFormat, formatToWidth, @@ -73,6 +74,9 @@ floatFormat width other -> pprPanic "Format.floatFormat" (ppr other) +-- | Check if a format represent an integer value. +isIntFormat :: Format -> Bool +isIntFormat = not . isFloatFormat -- | Check if a format represents a floating point value. isFloatFormat :: Format -> Bool diff --git a/compiler/GHC/CmmToAsm/Instr.hs b/compiler/GHC/CmmToAsm/Instr.hs index 0a62c1d3bb..bc2e2969e6 100644 --- a/compiler/GHC/CmmToAsm/Instr.hs +++ b/compiler/GHC/CmmToAsm/Instr.hs @@ -31,6 +31,7 @@ data RegUsage reads :: [Reg], writes :: [Reg] } + deriving Show -- | No regs read or written to. noUsage :: RegUsage @@ -90,7 +91,7 @@ class Instruction instr where -> Reg -- ^ the reg to spill -> Int -- ^ the current stack delta -> Int -- ^ spill slot to use - -> instr + -> [instr] -- ^ instructions -- | An instruction to reload a register from a spill slot. @@ -99,7 +100,7 @@ class Instruction instr where -> Reg -- ^ the reg to reload. -> Int -- ^ the current stack delta -> Int -- ^ the spill slot to use - -> instr + -> [instr] -- ^ instructions -- | See if this instruction is telling us the current C stack delta takeDeltaInstr @@ -157,3 +158,6 @@ class Instruction instr where -- | Pretty-print an instruction pprInstr :: Platform -> instr -> SDoc + + -- Create a comment instruction + mkComment :: SDoc -> [instr] diff --git a/compiler/GHC/CmmToAsm/PIC.hs b/compiler/GHC/CmmToAsm/PIC.hs index 7fe90c3ec6..81ce9d34a9 100644 --- a/compiler/GHC/CmmToAsm/PIC.hs +++ b/compiler/GHC/CmmToAsm/PIC.hs @@ -122,6 +122,15 @@ cmmMakeDynamicReference config referenceKind lbl addImport stub return $ CmmLit $ CmmLabel stub + -- GOT relative loads work differently on AArch64. We don't do two + -- step loads. The got symbol is loaded directly, and not through an + -- additional load. Thus we do not need the CmmLoad decoration we have + -- on other platforms. + AccessViaSymbolPtr | ArchAArch64 <- platformArch platform -> do + let symbolPtr = mkDynamicLinkerLabel SymbolPtr lbl + addImport symbolPtr + return $ cmmMakePicReference config symbolPtr + AccessViaSymbolPtr -> do let symbolPtr = mkDynamicLinkerLabel SymbolPtr lbl addImport symbolPtr @@ -135,7 +144,6 @@ cmmMakeDynamicReference config referenceKind lbl -- so just jump there if it's a call or a jump _ -> return $ CmmLit $ CmmLabel lbl - -- ----------------------------------------------------------------------------- -- Create a position independent reference to a label. -- (but do not bother with dynamic linking). @@ -150,6 +158,11 @@ cmmMakePicReference config lbl | OSMinGW32 <- platformOS platform = CmmLit $ CmmLabel lbl + -- no pic base reg on AArch64, however indicate this symbol should go through + -- the global offset table (GOT). + | ArchAArch64 <- platformArch platform + = CmmLit $ CmmLabel lbl + | OSAIX <- platformOS platform = CmmMachOp (MO_Add W32) [ CmmReg (CmmGlobal PicBaseReg) @@ -241,6 +254,20 @@ howToAccessLabel config _arch OSMinGW32 _kind lbl | otherwise = AccessDirectly +-- On AArch64, relocations for JUMP and CALL will be emitted with 26bits, this +-- is enough for ~64MB of range. Anything else will need to go through a veneer, +-- which is the job of the linker to build. We might only want to lookup +-- Data References through the GOT. +howToAccessLabel config ArchAArch64 _os _kind lbl + | not (ncgExternalDynamicRefs config) + = AccessDirectly + + | labelDynamic config lbl + = AccessViaSymbolPtr + + | otherwise + = AccessDirectly + -- Mach-O (Darwin, Mac OS X) -- @@ -275,7 +302,7 @@ howToAccessLabel config arch OSDarwin JumpReference lbl -- dyld code stubs don't work for tailcalls because the -- stack alignment is only right for regular calls. -- Therefore, we have to go via a symbol pointer: - | arch == ArchX86 || arch == ArchX86_64 + | arch == ArchX86 || arch == ArchX86_64 || arch == ArchAArch64 , labelDynamic config lbl = AccessViaSymbolPtr @@ -283,15 +310,15 @@ howToAccessLabel config arch OSDarwin JumpReference lbl howToAccessLabel config arch OSDarwin _kind lbl -- Code stubs are the usual method of choice for imported code; -- not needed on x86_64 because Apple's new linker, ld64, generates - -- them automatically. + -- them automatically, neither on Aarch64 (arm64). | arch /= ArchX86_64 + , arch /= ArchAArch64 , labelDynamic config lbl = AccessViaStub | otherwise = AccessDirectly - ---------------------------------------------------------------------------- -- AIX @@ -616,7 +643,9 @@ pprImportedSymbol config importedLbl = case (arch,os) of | otherwise -> empty - (_, OSDarwin) -> empty + (ArchAArch64, OSDarwin) + -> empty + -- XCOFF / AIX diff --git a/compiler/GHC/CmmToAsm/PPC.hs b/compiler/GHC/CmmToAsm/PPC.hs index 148fd1b4b2..d38eb84c64 100644 --- a/compiler/GHC/CmmToAsm/PPC.hs +++ b/compiler/GHC/CmmToAsm/PPC.hs @@ -57,5 +57,4 @@ instance Instruction PPC.Instr where mkStackAllocInstr = PPC.mkStackAllocInstr mkStackDeallocInstr = PPC.mkStackDeallocInstr pprInstr = PPC.pprInstr - - + mkComment = const [] diff --git a/compiler/GHC/CmmToAsm/PPC/Instr.hs b/compiler/GHC/CmmToAsm/PPC/Instr.hs index 92567989ed..54a73f24a9 100644 --- a/compiler/GHC/CmmToAsm/PPC/Instr.hs +++ b/compiler/GHC/CmmToAsm/PPC/Instr.hs @@ -533,7 +533,7 @@ mkSpillInstr -> Reg -- register to spill -> Int -- current stack delta -> Int -- spill slot to use - -> Instr + -> [Instr] mkSpillInstr config reg delta slot = let platform = ncgPlatform config @@ -550,7 +550,7 @@ mkSpillInstr config reg delta slot Just _ -> ST Nothing -> STFAR -- pseudo instruction: 32 bit offsets - in instr fmt reg (AddrRegImm sp (ImmInt (off-delta))) + in [instr fmt reg (AddrRegImm sp (ImmInt (off-delta)))] mkLoadInstr @@ -558,7 +558,7 @@ mkLoadInstr -> Reg -- register to load -> Int -- current stack delta -> Int -- spill slot to use - -> Instr + -> [Instr] mkLoadInstr config reg delta slot = let platform = ncgPlatform config @@ -575,7 +575,7 @@ mkLoadInstr config reg delta slot Just _ -> LD Nothing -> LDFAR -- pseudo instruction: 32 bit offsets - in instr fmt reg (AddrRegImm sp (ImmInt (off-delta))) + in [instr fmt reg (AddrRegImm sp (ImmInt (off-delta)))] -- | The size of a minimal stackframe header including minimal diff --git a/compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs b/compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs index 1050fbaa96..83f581cac4 100644 --- a/compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs +++ b/compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs @@ -111,7 +111,10 @@ trivColorable platform virtualRegSqueeze realRegSqueeze RcInteger conflicts excl ArchSPARC64 -> panic "trivColorable ArchSPARC64" ArchPPC_64 _ -> 15 ArchARM _ _ _ -> panic "trivColorable ArchARM" - ArchAArch64 -> panic "trivColorable ArchAArch64" + -- We should be able to allocate *a lot* more in princple. + -- essentially all 32 - SP, so 31, we'd trash the link reg + -- as well as the platform and all others though. + ArchAArch64 -> 18 ArchAlpha -> panic "trivColorable ArchAlpha" ArchMipseb -> panic "trivColorable ArchMipseb" ArchMipsel -> panic "trivColorable ArchMipsel" @@ -143,7 +146,10 @@ trivColorable platform virtualRegSqueeze realRegSqueeze RcFloat conflicts exclus ArchSPARC64 -> panic "trivColorable ArchSPARC64" ArchPPC_64 _ -> 0 ArchARM _ _ _ -> panic "trivColorable ArchARM" - ArchAArch64 -> panic "trivColorable ArchAArch64" + -- we can in princple address all the float regs as + -- segments. So we could have 64 Float regs. Or + -- 128 Half regs, or even 256 Byte regs. + ArchAArch64 -> 0 ArchAlpha -> panic "trivColorable ArchAlpha" ArchMipseb -> panic "trivColorable ArchMipseb" ArchMipsel -> panic "trivColorable ArchMipsel" @@ -177,7 +183,7 @@ trivColorable platform virtualRegSqueeze realRegSqueeze RcDouble conflicts exclu ArchSPARC64 -> panic "trivColorable ArchSPARC64" ArchPPC_64 _ -> 20 ArchARM _ _ _ -> panic "trivColorable ArchARM" - ArchAArch64 -> panic "trivColorable ArchAArch64" + ArchAArch64 -> 32 ArchAlpha -> panic "trivColorable ArchAlpha" ArchMipseb -> panic "trivColorable ArchMipseb" ArchMipsel -> panic "trivColorable ArchMipsel" diff --git a/compiler/GHC/CmmToAsm/Reg/Linear.hs b/compiler/GHC/CmmToAsm/Reg/Linear.hs index 88fdcd6bce..a9a4545f62 100644 --- a/compiler/GHC/CmmToAsm/Reg/Linear.hs +++ b/compiler/GHC/CmmToAsm/Reg/Linear.hs @@ -110,10 +110,11 @@ import GHC.CmmToAsm.Reg.Linear.StackMap import GHC.CmmToAsm.Reg.Linear.FreeRegs import GHC.CmmToAsm.Reg.Linear.Stats import GHC.CmmToAsm.Reg.Linear.JoinToTargets -import qualified GHC.CmmToAsm.Reg.Linear.PPC as PPC -import qualified GHC.CmmToAsm.Reg.Linear.SPARC as SPARC -import qualified GHC.CmmToAsm.Reg.Linear.X86 as X86 -import qualified GHC.CmmToAsm.Reg.Linear.X86_64 as X86_64 +import qualified GHC.CmmToAsm.Reg.Linear.PPC as PPC +import qualified GHC.CmmToAsm.Reg.Linear.SPARC as SPARC +import qualified GHC.CmmToAsm.Reg.Linear.X86 as X86 +import qualified GHC.CmmToAsm.Reg.Linear.X86_64 as X86_64 +import qualified GHC.CmmToAsm.Reg.Linear.AArch64 as AArch64 import GHC.CmmToAsm.Reg.Target import GHC.CmmToAsm.Reg.Liveness import GHC.CmmToAsm.Reg.Utils @@ -121,6 +122,7 @@ import GHC.CmmToAsm.Instr import GHC.CmmToAsm.Config import GHC.CmmToAsm.Types import GHC.Platform.Reg +import GHC.Platform.Reg.Class (RegClass(..)) import GHC.Cmm.BlockId import GHC.Cmm.Dataflow.Collections @@ -202,7 +204,7 @@ regAlloc _ (CmmProc _ _ _ _) -- an entry in the block map or it is the first block. -- linearRegAlloc - :: forall instr. Instruction instr + :: forall instr. (Instruction instr) => NCGConfig -> [BlockId] -- ^ entry points -> BlockMap RegSet @@ -220,7 +222,7 @@ linearRegAlloc config entry_ids block_live sccs ArchSPARC64 -> panic "linearRegAlloc ArchSPARC64" ArchPPC -> go $ (frInitFreeRegs platform :: PPC.FreeRegs) ArchARM _ _ _ -> panic "linearRegAlloc ArchARM" - ArchAArch64 -> panic "linearRegAlloc ArchAArch64" + ArchAArch64 -> go $ (frInitFreeRegs platform :: AArch64.FreeRegs) ArchPPC_64 _ -> go $ (frInitFreeRegs platform :: PPC.FreeRegs) ArchAlpha -> panic "linearRegAlloc ArchAlpha" ArchMipseb -> panic "linearRegAlloc ArchMipseb" @@ -487,7 +489,7 @@ isInReg src assig | Just (InReg _) <- lookupUFM assig src = True genRaInsn :: forall freeRegs instr. - OutputableRegConstraint freeRegs instr + (OutputableRegConstraint freeRegs instr) => BlockMap RegSet -> [instr] -> BlockId @@ -497,7 +499,7 @@ genRaInsn :: forall freeRegs instr. -> RegM freeRegs ([instr], [NatBasicBlock instr]) genRaInsn block_live new_instrs block_id instr r_dying w_dying = do --- pprTraceM "genRaInsn" $ ppr (block_id, instr) +-- pprTraceM "genRaInsn" $ ppr (block_id, instr) platform <- getPlatform case regUsageOfInstr platform instr of { RU read written -> do @@ -509,19 +511,20 @@ genRaInsn block_live new_instrs block_id instr r_dying w_dying = do -- so using nub isn't a problem). let virt_read = nub [ vr | (RegVirtual vr) <- read ] :: [VirtualReg] - -- debugging -{- freeregs <- getFreeRegsR - assig <- getAssigR - pprDebugAndThen (defaultDynFlags Settings{ sTargetPlatform=platform } undefined) trace "genRaInsn" - (ppr instr - $$ text "r_dying = " <+> ppr r_dying - $$ text "w_dying = " <+> ppr w_dying - $$ text "virt_read = " <+> ppr virt_read - $$ text "virt_written = " <+> ppr virt_written - $$ text "freeregs = " <+> text (show freeregs) - $$ text "assig = " <+> ppr assig) - $ do --} +-- do +-- let real_read = nub [ rr | (RegReal rr) <- read] +-- freeregs <- getFreeRegsR +-- assig <- getAssigR + +-- pprTraceM "genRaInsn" +-- ( text "block = " <+> ppr block_id +-- $$ text "instruction = " <+> ppr instr +-- $$ text "r_dying = " <+> ppr r_dying +-- $$ text "w_dying = " <+> ppr w_dying +-- $$ text "read = " <+> ppr real_read <+> ppr virt_read +-- $$ text "written = " <+> ppr real_written <+> ppr virt_written +-- $$ text "freeregs = " <+> ppr freeregs +-- $$ text "assign = " <+> ppr assig) -- (a), (b) allocate real regs for all regs read by this instruction. (r_spills, r_allocd) <- @@ -580,7 +583,6 @@ genRaInsn block_live new_instrs block_id instr r_dying w_dying = do Nothing -> x Just y -> y - -- (j) free up stack slots for dead spilled regs -- TODO (can't be bothered right now) @@ -592,7 +594,32 @@ genRaInsn block_live new_instrs block_id instr r_dying w_dying = do | src == dst -> [] _ -> [patched_instr] - let code = concat [ squashed_instr, w_spills, reverse r_spills, clobber_saves, new_instrs ] + -- On the use of @reverse@ below. + -- Since we can have spills and reloads produce multiple instructions + -- we need to ensure they are emitted in the correct order. We used to only + -- emit single instructions in mkSpill/mkReload/mkRegRegMove. + -- As such order of spills and reloads didn't matter. However, with + -- mutliple instructions potentially issued by those functions we need to be + -- careful to not break execution order. Reversing the spills (clobber will + -- also spill), will ensure they are emitted in the right order. + -- + -- See also Ticket 19910 for changing the return type from [] to OrdList. + + -- For debugging, uncomment the follow line and the mkComment lines. + -- u <- getUniqueR + let code = concat [ -- mkComment (text "<genRaInsn(" <> ppr u <> text ")>") + -- ,mkComment (text "<genRaInsn(" <> ppr u <> text "):squashed>")] + squashed_instr + -- ,mkComment (text "<genRaInsn(" <> ppr u <> text "):w_spills>") + , reverse w_spills + -- ,mkComment (text "<genRaInsn(" <> ppr u <> text "):r_spills>") + , reverse r_spills + -- ,mkComment (text "<genRaInsn(" <> ppr u <> text "):clobber_saves>") + , reverse clobber_saves + -- ,mkComment (text "<genRaInsn(" <> ppr u <> text "):new_instrs>") + , new_instrs + -- ,mkComment (text "</genRaInsn(" <> ppr u <> text ")>") + ] -- pprTrace "patched-code" ((vcat $ map (docToSDoc . pprInstr) code)) $ do -- pprTrace "pached-fixup" ((ppr fixup_blocks)) $ do @@ -609,6 +636,7 @@ releaseRegs regs = do platform <- getPlatform assig <- getAssigR free <- getFreeRegsR + let loop assig !free [] = do setAssigR assig; setFreeRegsR free; return () loop assig !free (RegReal rr : rs) = loop assig (frReleaseReg platform rr free) rs loop assig !free (r:rs) = @@ -662,8 +690,9 @@ saveClobberedTemps clobbered dying (instrs,assig') <- clobber assig [] to_spill setAssigR assig' - return instrs - + return $ -- mkComment (text "<saveClobberedTemps>") ++ + instrs +-- ++ mkComment (text "</saveClobberedTemps>") where -- See Note [UniqFM and the register allocator] clobber :: RegMap Loc -> [instr] -> [(Unique,RealReg)] -> RegM freeRegs ([instr], RegMap Loc) @@ -700,7 +729,7 @@ saveClobberedTemps clobbered dying let new_assign = addToUFM_Directly assig temp (InBoth reg slot) - clobber new_assign (spill : instrs) rest + clobber new_assign (spill ++ instrs) rest @@ -714,7 +743,17 @@ clobberRegs [] clobberRegs clobbered = do platform <- getPlatform freeregs <- getFreeRegsR - setFreeRegsR $! foldl' (flip $ frAllocateReg platform) freeregs clobbered + + let gpRegs = frGetFreeRegs platform RcInteger freeregs :: [RealReg] + fltRegs = frGetFreeRegs platform RcFloat freeregs :: [RealReg] + dblRegs = frGetFreeRegs platform RcDouble freeregs :: [RealReg] + + let extra_clobbered = [ r | r <- clobbered + , r `elem` (gpRegs ++ fltRegs ++ dblRegs) ] + + setFreeRegsR $! foldl' (flip $ frAllocateReg platform) freeregs extra_clobbered + + -- setFreeRegsR $! foldl' (flip $ frAllocateReg platform) freeregs clobbered assig <- getAssigR setAssigR $! clobber assig (nonDetUFMToList assig) @@ -909,10 +948,7 @@ allocRegsAndSpill_spill reading keep spills alloc r rs assig spill_loc | (temp_to_push_out, (my_reg :: RealReg)) : _ <- candidates_inReg = do - (spill_insn, slot) <- spillR (RegReal my_reg) temp_to_push_out - let spill_store = (if reading then id else reverse) - [ -- COMMENT (fsLit "spill alloc") - spill_insn ] + (spill_store, slot) <- spillR (RegReal my_reg) temp_to_push_out -- record that this temp was spilled recordSpill (SpillAlloc temp_to_push_out) @@ -962,7 +998,7 @@ loadTemp vreg (ReadMem slot) hreg spills = do insn <- loadR (RegReal hreg) slot recordSpill (SpillLoad $ getUnique vreg) - return $ {- COMMENT (fsLit "spill load") : -} insn : spills + return $ {- mkComment (text "spill load") : -} insn ++ spills loadTemp _ _ _ spills = return spills diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/AArch64.hs b/compiler/GHC/CmmToAsm/Reg/Linear/AArch64.hs new file mode 100644 index 0000000000..50299c4e74 --- /dev/null +++ b/compiler/GHC/CmmToAsm/Reg/Linear/AArch64.hs @@ -0,0 +1,137 @@ +module GHC.CmmToAsm.Reg.Linear.AArch64 where + +import GHC.Prelude + +import GHC.CmmToAsm.AArch64.Regs +import GHC.Platform.Reg.Class +import GHC.Platform.Reg + +import GHC.Utils.Outputable +import GHC.Utils.Panic +import GHC.Platform + +import Data.Word + +import GHC.Stack +-- AArch64 has 32 64bit general purpose register r0..r30, and zr/sp +-- AArch64 has 32 128bit floating point registers v0..v31 as part of the NEON +-- extension in Armv8-A. +-- +-- Armv8-A is a fundamental change to the Arm architecture. It supports the +-- 64-bit Execution state called “AArch64”, and a new 64-bit instruction set +-- “A64”. To provide compatibility with the Armv7-A (32-bit architecture) +-- instruction set, a 32-bit variant of Armv8-A “AArch32” is provided. Most of +-- existing Armv7-A code can be run in the AArch32 execution state of Armv8-A. +-- +-- these can be addresses as q/d/s/h/b 0..31, or v.f<size>[idx] +-- where size is 64, 32, 16, 8, ... and the index i allows us +-- to access the given part. +-- +-- History of Arm Adv SIMD +-- .---------------------------------------------------------------------------. +-- | Armv6 | Armv7-A | Armv8-A AArch64 | +-- | SIMD extension | NEON | NEON | +-- |===========================================================================| +-- | - Operates on 32-bit | - Separate reg. bank, | - Separate reg. bank, | +-- | GP ARM registers | 32x64-bit NEON regs | 32x128-bit NEON regs | +-- | - 8-bit/16-bit integer | - 8/16/32/64-bit int | - 8/16/32/64-bit int | +-- | | - Single percision fp | - Single percision fp | +-- | | | - Double precision fp | +-- | | | - Single/Double fp are | +-- | | | IEEE compliant | +-- | - 2x16-bit/4x8-bit ops | - Up to 16x8-bit ops | - Up to 16x8-bit ops | +-- | per instruction | per instruction | per instruction | +-- '---------------------------------------------------------------------------' + +data FreeRegs = FreeRegs !Word32 !Word32 + +instance Show FreeRegs where + show (FreeRegs g f) = "FreeRegs: " ++ showBits g ++ "; " ++ showBits f + +instance Outputable FreeRegs where + ppr (FreeRegs g f) = text " " <+> foldr (\i x -> pad_int i <+> x) (text "") [0..31] + $$ text "GPR" <+> foldr (\i x -> show_bit g i <+> x) (text "") [0..31] + $$ text "FPR" <+> foldr (\i x -> show_bit f i <+> x) (text "") [0..31] + where pad_int i | i < 10 = char ' ' <> int i + pad_int i = int i + -- remember bit = 1 means it's available. + show_bit bits bit | testBit bits bit = text " " + show_bit _ _ = text " x" + +noFreeRegs :: FreeRegs +noFreeRegs = FreeRegs 0 0 + +showBits :: Word32 -> String +showBits w = map (\i -> if testBit w i then '1' else '0') [0..31] + +-- FR instance implementation (See Linear.FreeRegs) +allocateReg :: HasCallStack => RealReg -> FreeRegs -> FreeRegs +allocateReg (RealRegSingle r) (FreeRegs g f) + | r > 31 && testBit f (r - 32) = FreeRegs g (clearBit f (r - 32)) + | r < 32 && testBit g r = FreeRegs (clearBit g r) f + | r > 31 = panic $ "Linear.AArch64.allocReg: double allocation of float reg v" ++ show (r - 32) ++ "; " ++ showBits f + | otherwise = pprPanic "Linear.AArch64.allocReg" $ text ("double allocation of gp reg x" ++ show r ++ "; " ++ showBits g) +allocateReg _ _ = panic "Linear.AArch64.allocReg: bad reg" + +-- we start from 28 downwards... the logic is similar to the ppc logic. +-- 31 is Stack Pointer +-- 30 is Link Register +-- 29 is Stack Frame (by convention) +-- 19-28 are callee save +-- the lower ones are all caller save + +-- For this reason someone decided to give aarch64 only 6 regs for +-- STG: +-- 19: Base +-- 20: Sp +-- 21: Hp +-- 22-27: R1-R6 +-- 28: SpLim + +-- For LLVM code gen interop: +-- See https://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20150119/253722.html +-- and the current ghccc implementation here: +-- https://github.com/llvm/llvm-project/blob/161ae1f39816edf667aaa190bce702a86879c7bd/llvm/lib/Target/AArch64/AArch64CallingConvention.td#L324-L363 +-- and https://gitlab.haskell.org/ghc/ghc/-/wikis/commentary/compiler/generated-code +-- for the STG discussion. +{- For reference the ghcc from the link above: +let Entry = 1 in +def CC_AArch64_GHC : CallingConv<[ + CCIfType<[iPTR], CCBitConvertToType<i64>>, + + // Handle all vector types as either f64 or v2f64. + CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, f128], CCBitConvertToType<v2f64>>, + + CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>, + CCIfType<[f32], CCAssignToReg<[S8, S9, S10, S11]>>, + CCIfType<[f64], CCAssignToReg<[D12, D13, D14, D15]>>, + + // Promote i8/i16/i32 arguments to i64. + CCIfType<[i8, i16, i32], CCPromoteToType<i64>>, + + // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, SpLim + CCIfType<[i64], CCAssignToReg<[X19, X20, X21, X22, X23, X24, X25, X26, X27, X28]>> +]>; +-} + +getFreeRegs :: RegClass -> FreeRegs -> [RealReg] +getFreeRegs cls (FreeRegs g f) + | RcFloat <- cls = [] -- For now we only support double and integer registers, floats will need to be promoted. + | RcDouble <- cls = go 32 f 31 + | RcInteger <- cls = go 0 g 18 + where + go _ _ i | i < 0 = [] + go off x i | testBit x i = RealRegSingle (off + i) : (go off x $! i - 1) + | otherwise = go off x $! i - 1 + +initFreeRegs :: Platform -> FreeRegs +initFreeRegs platform = foldl' (flip releaseReg) noFreeRegs (allocatableRegs platform) + +releaseReg :: HasCallStack => RealReg -> FreeRegs -> FreeRegs +releaseReg (RealRegSingle r) (FreeRegs g f) + | r > 31 && testBit f (r - 32) = pprPanic "Linear.AArch64.releaseReg" (text "can't release non-allocated reg v" <> int (r - 32)) + | r < 32 && testBit g r = pprPanic "Linear.AArch64.releaseReg" (text "can't release non-allocated reg x" <> int r) + | r > 31 = FreeRegs g (setBit f (r - 32)) + | otherwise = FreeRegs (setBit g r) f +releaseReg _ _ = pprPanic "Linear.AArch64.releaseReg" (text "bad reg") diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs b/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs index d501718c4a..3ae0fa140d 100644 --- a/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs +++ b/compiler/GHC/CmmToAsm/Reg/Linear/FreeRegs.hs @@ -25,14 +25,16 @@ import GHC.Platform -- getFreeRegs cls f = filter ( (==cls) . regClass . RealReg ) f -- allocateReg f r = filter (/= r) f -import qualified GHC.CmmToAsm.Reg.Linear.PPC as PPC -import qualified GHC.CmmToAsm.Reg.Linear.SPARC as SPARC -import qualified GHC.CmmToAsm.Reg.Linear.X86 as X86 -import qualified GHC.CmmToAsm.Reg.Linear.X86_64 as X86_64 +import qualified GHC.CmmToAsm.Reg.Linear.PPC as PPC +import qualified GHC.CmmToAsm.Reg.Linear.SPARC as SPARC +import qualified GHC.CmmToAsm.Reg.Linear.X86 as X86 +import qualified GHC.CmmToAsm.Reg.Linear.X86_64 as X86_64 +import qualified GHC.CmmToAsm.Reg.Linear.AArch64 as AArch64 -import qualified GHC.CmmToAsm.PPC.Instr as PPC.Instr -import qualified GHC.CmmToAsm.SPARC.Instr as SPARC.Instr -import qualified GHC.CmmToAsm.X86.Instr as X86.Instr +import qualified GHC.CmmToAsm.PPC.Instr as PPC.Instr +import qualified GHC.CmmToAsm.SPARC.Instr as SPARC.Instr +import qualified GHC.CmmToAsm.X86.Instr as X86.Instr +import qualified GHC.CmmToAsm.AArch64.Instr as AArch64.Instr class Show freeRegs => FR freeRegs where frAllocateReg :: Platform -> RealReg -> freeRegs -> freeRegs @@ -58,6 +60,12 @@ instance FR PPC.FreeRegs where frInitFreeRegs = PPC.initFreeRegs frReleaseReg = \_ -> PPC.releaseReg +instance FR AArch64.FreeRegs where + frAllocateReg = \_ -> AArch64.allocateReg + frGetFreeRegs = \_ -> AArch64.getFreeRegs + frInitFreeRegs = AArch64.initFreeRegs + frReleaseReg = \_ -> AArch64.releaseReg + instance FR SPARC.FreeRegs where frAllocateReg = SPARC.allocateReg frGetFreeRegs = \_ -> SPARC.getFreeRegs @@ -73,7 +81,7 @@ maxSpillSlots config = case platformArch (ncgPlatform config) of ArchSPARC -> SPARC.Instr.maxSpillSlots config ArchSPARC64 -> panic "maxSpillSlots ArchSPARC64" ArchARM _ _ _ -> panic "maxSpillSlots ArchARM" - ArchAArch64 -> panic "maxSpillSlots ArchAArch64" + ArchAArch64 -> AArch64.Instr.maxSpillSlots config ArchPPC_64 _ -> PPC.Instr.maxSpillSlots config ArchAlpha -> panic "maxSpillSlots ArchAlpha" ArchMipseb -> panic "maxSpillSlots ArchMipseb" diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs b/compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs index d0330a4f6a..cbdf5d030b 100644 --- a/compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs +++ b/compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs @@ -26,6 +26,7 @@ import GHC.Cmm.BlockId import GHC.Cmm.Dataflow.Collections import GHC.Data.Graph.Directed import GHC.Utils.Panic +import GHC.Utils.Monad (concatMapM) import GHC.Types.Unique import GHC.Types.Unique.FM import GHC.Types.Unique.Set @@ -306,7 +307,7 @@ handleComponent -- go via a spill slot. -- handleComponent delta _ (AcyclicSCC (DigraphNode vreg src dsts)) - = mapM (makeMove delta vreg src) dsts + = concatMapM (makeMove delta vreg src) dsts -- Handle some cyclic moves. @@ -340,7 +341,7 @@ handleComponent delta instr -- make sure to do all the reloads after all the spills, -- so we don't end up clobbering the source values. - return ([instrSpill] ++ concat remainingFixUps ++ [instrLoad]) + return (instrSpill ++ concat remainingFixUps ++ instrLoad) handleComponent _ _ (CyclicSCC _) = panic "Register Allocator: handleComponent cyclic" @@ -354,7 +355,7 @@ makeMove -> Unique -- ^ unique of the vreg that we're moving. -> Loc -- ^ source location. -> Loc -- ^ destination location. - -> RegM freeRegs instr -- ^ move instruction. + -> RegM freeRegs [instr] -- ^ move instruction. makeMove delta vreg src dst = do config <- getConfig @@ -363,7 +364,7 @@ makeMove delta vreg src dst case (src, dst) of (InReg s, InReg d) -> do recordSpill (SpillJoinRR vreg) - return $ mkRegRegMoveInstr platform (RegReal s) (RegReal d) + return $ [mkRegRegMoveInstr platform (RegReal s) (RegReal d)] (InMem s, InReg d) -> do recordSpill (SpillJoinRM vreg) return $ mkLoadInstr config (RegReal d) delta s @@ -377,4 +378,3 @@ makeMove delta vreg src dst panic ("makeMove " ++ show vreg ++ " (" ++ show src ++ ") (" ++ show dst ++ ")" ++ " we don't handle mem->mem moves.") - diff --git a/compiler/GHC/CmmToAsm/Reg/Linear/State.hs b/compiler/GHC/CmmToAsm/Reg/Linear/State.hs index 24a75121b8..ec1cd517ea 100644 --- a/compiler/GHC/CmmToAsm/Reg/Linear/State.hs +++ b/compiler/GHC/CmmToAsm/Reg/Linear/State.hs @@ -121,7 +121,7 @@ makeRAStats state spillR :: Instruction instr - => Reg -> Unique -> RegM freeRegs (instr, Int) + => Reg -> Unique -> RegM freeRegs ([instr], Int) spillR reg temp = mkRegM $ \s -> let (stack1,slot) = getStackSlotFor (ra_stack s) temp @@ -131,7 +131,7 @@ spillR reg temp = mkRegM $ \s -> loadR :: Instruction instr - => Reg -> Int -> RegM freeRegs instr + => Reg -> Int -> RegM freeRegs [instr] loadR reg slot = mkRegM $ \s -> RA_Result s (mkLoadInstr (ra_config s) reg (ra_delta s) slot) diff --git a/compiler/GHC/CmmToAsm/Reg/Liveness.hs b/compiler/GHC/CmmToAsm/Reg/Liveness.hs index 4d70533624..ad8190270f 100644 --- a/compiler/GHC/CmmToAsm/Reg/Liveness.hs +++ b/compiler/GHC/CmmToAsm/Reg/Liveness.hs @@ -174,6 +174,8 @@ instance Instruction instr => Instruction (InstrSR instr) where pprInstr platform i = ppr (fmap (pprInstr platform) i) + mkComment = fmap Instr . mkComment + -- | An instruction with liveness information. data LiveInstr instr @@ -565,16 +567,20 @@ stripLiveBlock config (BasicBlock i lis) where (instrs', _) = runState (spillNat [] lis) 0 + -- spillNat :: [instr] -> [LiveInstr instr] -> State Int [instr] + spillNat :: Instruction instr => [instr] -> [LiveInstr instr] -> State Int [instr] spillNat acc [] = return (reverse acc) + -- The SPILL/RELOAD cases do not appear to be exercised by our codegens + -- spillNat acc (LiveInstr (SPILL reg slot) _ : instrs) = do delta <- get - spillNat (mkSpillInstr config reg delta slot : acc) instrs + spillNat (mkSpillInstr config reg delta slot ++ acc) instrs spillNat acc (LiveInstr (RELOAD slot reg) _ : instrs) = do delta <- get - spillNat (mkLoadInstr config reg delta slot : acc) instrs + spillNat (mkLoadInstr config reg delta slot ++ acc) instrs spillNat acc (LiveInstr (Instr instr) _ : instrs) | Just i <- takeDeltaInstr instr diff --git a/compiler/GHC/CmmToAsm/Reg/Target.hs b/compiler/GHC/CmmToAsm/Reg/Target.hs index 4611c3a8e8..22b22e21cc 100644 --- a/compiler/GHC/CmmToAsm/Reg/Target.hs +++ b/compiler/GHC/CmmToAsm/Reg/Target.hs @@ -34,6 +34,8 @@ import qualified GHC.CmmToAsm.X86.Regs as X86 import qualified GHC.CmmToAsm.X86.RegInfo as X86 import qualified GHC.CmmToAsm.PPC.Regs as PPC import qualified GHC.CmmToAsm.SPARC.Regs as SPARC +import qualified GHC.CmmToAsm.AArch64.Regs as AArch64 + targetVirtualRegSqueeze :: Platform -> RegClass -> VirtualReg -> Int targetVirtualRegSqueeze platform @@ -46,7 +48,7 @@ targetVirtualRegSqueeze platform ArchSPARC64 -> panic "targetVirtualRegSqueeze ArchSPARC64" ArchPPC_64 _ -> PPC.virtualRegSqueeze ArchARM _ _ _ -> panic "targetVirtualRegSqueeze ArchARM" - ArchAArch64 -> panic "targetVirtualRegSqueeze ArchAArch64" + ArchAArch64 -> AArch64.virtualRegSqueeze ArchAlpha -> panic "targetVirtualRegSqueeze ArchAlpha" ArchMipseb -> panic "targetVirtualRegSqueeze ArchMipseb" ArchMipsel -> panic "targetVirtualRegSqueeze ArchMipsel" @@ -66,7 +68,7 @@ targetRealRegSqueeze platform ArchSPARC64 -> panic "targetRealRegSqueeze ArchSPARC64" ArchPPC_64 _ -> PPC.realRegSqueeze ArchARM _ _ _ -> panic "targetRealRegSqueeze ArchARM" - ArchAArch64 -> panic "targetRealRegSqueeze ArchAArch64" + ArchAArch64 -> AArch64.realRegSqueeze ArchAlpha -> panic "targetRealRegSqueeze ArchAlpha" ArchMipseb -> panic "targetRealRegSqueeze ArchMipseb" ArchMipsel -> panic "targetRealRegSqueeze ArchMipsel" @@ -85,7 +87,7 @@ targetClassOfRealReg platform ArchSPARC64 -> panic "targetClassOfRealReg ArchSPARC64" ArchPPC_64 _ -> PPC.classOfRealReg ArchARM _ _ _ -> panic "targetClassOfRealReg ArchARM" - ArchAArch64 -> panic "targetClassOfRealReg ArchAArch64" + ArchAArch64 -> AArch64.classOfRealReg ArchAlpha -> panic "targetClassOfRealReg ArchAlpha" ArchMipseb -> panic "targetClassOfRealReg ArchMipseb" ArchMipsel -> panic "targetClassOfRealReg ArchMipsel" @@ -104,7 +106,7 @@ targetMkVirtualReg platform ArchSPARC64 -> panic "targetMkVirtualReg ArchSPARC64" ArchPPC_64 _ -> PPC.mkVirtualReg ArchARM _ _ _ -> panic "targetMkVirtualReg ArchARM" - ArchAArch64 -> panic "targetMkVirtualReg ArchAArch64" + ArchAArch64 -> AArch64.mkVirtualReg ArchAlpha -> panic "targetMkVirtualReg ArchAlpha" ArchMipseb -> panic "targetMkVirtualReg ArchMipseb" ArchMipsel -> panic "targetMkVirtualReg ArchMipsel" @@ -123,7 +125,7 @@ targetRegDotColor platform ArchSPARC64 -> panic "targetRegDotColor ArchSPARC64" ArchPPC_64 _ -> PPC.regDotColor ArchARM _ _ _ -> panic "targetRegDotColor ArchARM" - ArchAArch64 -> panic "targetRegDotColor ArchAArch64" + ArchAArch64 -> AArch64.regDotColor ArchAlpha -> panic "targetRegDotColor ArchAlpha" ArchMipseb -> panic "targetRegDotColor ArchMipseb" ArchMipsel -> panic "targetRegDotColor ArchMipsel" diff --git a/compiler/GHC/CmmToAsm/SPARC.hs b/compiler/GHC/CmmToAsm/SPARC.hs index 7d9a671932..cac72de6d3 100644 --- a/compiler/GHC/CmmToAsm/SPARC.hs +++ b/compiler/GHC/CmmToAsm/SPARC.hs @@ -69,7 +69,6 @@ instance Instruction SPARC.Instr where takeRegRegMoveInstr = SPARC.takeRegRegMoveInstr mkJumpInstr = SPARC.mkJumpInstr pprInstr = SPARC.pprInstr + mkComment = const [] mkStackAllocInstr = panic "no sparc_mkStackAllocInstr" mkStackDeallocInstr = panic "no sparc_mkStackDeallocInstr" - - diff --git a/compiler/GHC/CmmToAsm/SPARC/Instr.hs b/compiler/GHC/CmmToAsm/SPARC/Instr.hs index 3aeeb4d976..a5c9e46936 100644 --- a/compiler/GHC/CmmToAsm/SPARC/Instr.hs +++ b/compiler/GHC/CmmToAsm/SPARC/Instr.hs @@ -362,7 +362,7 @@ mkSpillInstr -> Reg -- ^ register to spill -> Int -- ^ current stack delta -> Int -- ^ spill slot to use - -> Instr + -> [Instr] mkSpillInstr config reg _ slot = let platform = ncgPlatform config @@ -373,7 +373,7 @@ mkSpillInstr config reg _ slot RcFloat -> FF32 RcDouble -> FF64 - in ST fmt reg (fpRel (negate off_w)) + in [ST fmt reg (fpRel (negate off_w))] -- | Make a spill reload instruction. @@ -382,7 +382,7 @@ mkLoadInstr -> Reg -- ^ register to load into -> Int -- ^ current stack delta -> Int -- ^ spill slot to use - -> Instr + -> [Instr] mkLoadInstr config reg _ slot = let platform = ncgPlatform config @@ -393,7 +393,7 @@ mkLoadInstr config reg _ slot RcFloat -> FF32 RcDouble -> FF64 - in LD fmt (fpRel (- off_w)) reg + in [LD fmt (fpRel (- off_w)) reg] -------------------------------------------------------------------------------- diff --git a/compiler/GHC/CmmToAsm/X86.hs b/compiler/GHC/CmmToAsm/X86.hs index dbeeddc184..3d9780a99c 100644 --- a/compiler/GHC/CmmToAsm/X86.hs +++ b/compiler/GHC/CmmToAsm/X86.hs @@ -62,4 +62,4 @@ instance Instruction X86.Instr where mkStackAllocInstr = X86.mkStackAllocInstr mkStackDeallocInstr = X86.mkStackDeallocInstr pprInstr = X86.pprInstr - + mkComment = const [] diff --git a/compiler/GHC/CmmToAsm/X86/Instr.hs b/compiler/GHC/CmmToAsm/X86/Instr.hs index e48d0922d8..9410537ed8 100644 --- a/compiler/GHC/CmmToAsm/X86/Instr.hs +++ b/compiler/GHC/CmmToAsm/X86/Instr.hs @@ -672,15 +672,15 @@ mkSpillInstr -> Reg -- register to spill -> Int -- current stack delta -> Int -- spill slot to use - -> Instr + -> [Instr] mkSpillInstr config reg delta slot = let off = spillSlotToOffset platform slot - delta in case targetClassOfReg platform reg of - RcInteger -> MOV (archWordFormat is32Bit) - (OpReg reg) (OpAddr (spRel platform off)) - RcDouble -> MOV FF64 (OpReg reg) (OpAddr (spRel platform off)) + RcInteger -> [MOV (archWordFormat is32Bit) + (OpReg reg) (OpAddr (spRel platform off))] + RcDouble -> [MOV FF64 (OpReg reg) (OpAddr (spRel platform off))] _ -> panic "X86.mkSpillInstr: no match" where platform = ncgPlatform config is32Bit = target32Bit platform @@ -691,16 +691,16 @@ mkLoadInstr -> Reg -- register to load -> Int -- current stack delta -> Int -- spill slot to use - -> Instr + -> [Instr] mkLoadInstr config reg delta slot = let off = spillSlotToOffset platform slot - delta in case targetClassOfReg platform reg of - RcInteger -> MOV (archWordFormat is32Bit) - (OpAddr (spRel platform off)) (OpReg reg) - RcDouble -> MOV FF64 (OpAddr (spRel platform off)) (OpReg reg) - _ -> panic "X86.mkLoadInstr" + RcInteger -> ([MOV (archWordFormat is32Bit) + (OpAddr (spRel platform off)) (OpReg reg)]) + RcDouble -> ([MOV FF64 (OpAddr (spRel platform off)) (OpReg reg)]) + _ -> panic "X86.mkLoadInstr" where platform = ncgPlatform config is32Bit = target32Bit platform diff --git a/compiler/GHC/Driver/Backend.hs b/compiler/GHC/Driver/Backend.hs index 39789607d9..71cfdd9dec 100644 --- a/compiler/GHC/Driver/Backend.hs +++ b/compiler/GHC/Driver/Backend.hs @@ -104,6 +104,7 @@ platformNcgSupported platform = if ArchPPC -> True ArchPPC_64 {} -> True ArchSPARC -> True + ArchAArch64 -> True _ -> False -- | Will this backend produce an object file on the disk? diff --git a/compiler/GHC/Linker/Static.hs b/compiler/GHC/Linker/Static.hs index 4e2367f9e6..b0b8f1c541 100644 --- a/compiler/GHC/Linker/Static.hs +++ b/compiler/GHC/Linker/Static.hs @@ -222,7 +222,7 @@ linkBinary' staticLink logger tmpfs dflags unit_env o_files dep_units = do not staticLink && (platformOS platform == OSDarwin) && case platformArch platform of - ArchX86 -> True + ArchX86 -> True ArchX86_64 -> True ArchARM {} -> True ArchAArch64 -> True diff --git a/compiler/GHC/Platform.hs b/compiler/GHC/Platform.hs index b3ab1b4020..70c8f9b658 100644 --- a/compiler/GHC/Platform.hs +++ b/compiler/GHC/Platform.hs @@ -40,6 +40,7 @@ module GHC.Platform , platformSOName , platformHsSOName , platformSOExt + , genericPlatform ) where @@ -83,6 +84,21 @@ platformConstants platform = case platform_constants platform of Nothing -> panic "Platform constants not available!" Just c -> c +genericPlatform :: Platform +genericPlatform = Platform + { platformArchOS = ArchOS ArchX86_64 OSLinux + , platformWordSize = PW8 + , platformByteOrder = LittleEndian + , platformUnregisterised = False + , platformHasGnuNonexecStack = False + , platformHasIdentDirective = False + , platformHasSubsectionsViaSymbols= False + , platformIsCrossCompiling = False + , platformLeadingUnderscore = False + , platformTablesNextToCode = True + , platform_constants = Nothing + } + data PlatformWordSize = PW4 -- ^ A 32-bit platform | PW8 -- ^ A 64-bit platform @@ -223,7 +239,6 @@ data BmiVersion | BMI2 deriving (Eq, Ord) - -- | Platform-specific settings formerly hard-coded in Config.hs. -- -- These should probably be all be triaged whether they can be computed from diff --git a/compiler/GHC/Platform/Reg.hs b/compiler/GHC/Platform/Reg.hs index c011a59eb0..5edd39df51 100644 --- a/compiler/GHC/Platform/Reg.hs +++ b/compiler/GHC/Platform/Reg.hs @@ -181,7 +181,7 @@ realRegsAlias rr1 rr2 data Reg = RegVirtual !VirtualReg | RegReal !RealReg - deriving (Eq, Ord) + deriving (Eq, Ord, Show) regSingle :: RegNo -> Reg regSingle regNo = RegReal (realRegSingle regNo) diff --git a/compiler/GHC/StgToCmm/Foreign.hs b/compiler/GHC/StgToCmm/Foreign.hs index 39f25a7b86..78a01d06d6 100644 --- a/compiler/GHC/StgToCmm/Foreign.hs +++ b/compiler/GHC/StgToCmm/Foreign.hs @@ -84,6 +84,7 @@ cgForeignCall (CCall (CCallSpec target cconv safety)) typ stg_args res_ty arg_size (arg, _) = max (widthInBytes $ typeWidth $ cmmExprType platform arg) (platformWordSizeInBytes platform) ; cmm_args <- getFCallArgs stg_args typ + -- ; traceM $ show cmm_args ; (res_regs, res_hints) <- newUnboxedTupleRegs res_ty ; let ((call_args, arg_hints), cmm_target) = case target of diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in index 40cfde0d3a..5d8d1f9b22 100644 --- a/compiler/ghc.cabal.in +++ b/compiler/ghc.cabal.in @@ -203,6 +203,13 @@ Library GHC.Cmm.Switch.Implement GHC.CmmToAsm GHC.Cmm.LRegSet + GHC.CmmToAsm.AArch64 + GHC.CmmToAsm.AArch64.CodeGen + GHC.CmmToAsm.AArch64.Cond + GHC.CmmToAsm.AArch64.Instr + GHC.CmmToAsm.AArch64.Ppr + GHC.CmmToAsm.AArch64.RegInfo + GHC.CmmToAsm.AArch64.Regs GHC.CmmToAsm.BlockLayout GHC.CmmToAsm.CFG GHC.CmmToAsm.CFG.Dominators @@ -234,6 +241,7 @@ Library GHC.CmmToAsm.Reg.Graph.TrivColorable GHC.CmmToAsm.Reg.Graph.X86 GHC.CmmToAsm.Reg.Linear + GHC.CmmToAsm.Reg.Linear.AArch64 GHC.CmmToAsm.Reg.Linear.Base GHC.CmmToAsm.Reg.Linear.FreeRegs GHC.CmmToAsm.Reg.Linear.JoinToTargets diff --git a/includes/CodeGen.Platform.hs b/includes/CodeGen.Platform.hs index 8c942662e6..0dfac62a3f 100644 --- a/includes/CodeGen.Platform.hs +++ b/includes/CodeGen.Platform.hs @@ -1,7 +1,8 @@ import GHC.Cmm.Expr #if !(defined(MACHREGS_i386) || defined(MACHREGS_x86_64) \ - || defined(MACHREGS_sparc) || defined(MACHREGS_powerpc)) + || defined(MACHREGS_sparc) || defined(MACHREGS_powerpc) \ + || defined(MACHREGS_aarch64)) import GHC.Utils.Panic.Plain #endif import GHC.Platform.Reg @@ -1016,6 +1017,98 @@ freeReg REG_HpLim = False # endif freeReg _ = True +#elif defined(MACHREGS_aarch64) + +-- stack pointer / zero reg +freeReg 31 = False +-- link register +freeReg 30 = False +-- frame pointer +freeReg 29 = False +-- ip0 -- used for spill offset computations +freeReg 16 = False + +# if defined(REG_Base) +freeReg REG_Base = False +# endif +# if defined(REG_Sp) +freeReg REG_Sp = False +# endif +# if defined(REG_SpLim) +freeReg REG_SpLim = False +# endif +# if defined(REG_Hp) +freeReg REG_Hp = False +# endif +# if defined(REG_HpLim) +freeReg REG_HpLim = False +# endif + +# if defined(REG_R1) +freeReg REG_R1 = False +# endif +# if defined(REG_R2) +freeReg REG_R2 = False +# endif +# if defined(REG_R3) +freeReg REG_R3 = False +# endif +# if defined(REG_R4) +freeReg REG_R4 = False +# endif +# if defined(REG_R5) +freeReg REG_R5 = False +# endif +# if defined(REG_R6) +freeReg REG_R6 = False +# endif +# if defined(REG_R7) +freeReg REG_R7 = False +# endif +# if defined(REG_R8) +freeReg REG_R8 = False +# endif + +# if defined(REG_F1) +freeReg REG_F1 = False +# endif +# if defined(REG_F2) +freeReg REG_F2 = False +# endif +# if defined(REG_F3) +freeReg REG_F3 = False +# endif +# if defined(REG_F4) +freeReg REG_F4 = False +# endif +# if defined(REG_F5) +freeReg REG_F5 = False +# endif +# if defined(REG_F6) +freeReg REG_F6 = False +# endif + +# if defined(REG_D1) +freeReg REG_D1 = False +# endif +# if defined(REG_D2) +freeReg REG_D2 = False +# endif +# if defined(REG_D3) +freeReg REG_D3 = False +# endif +# if defined(REG_D4) +freeReg REG_D4 = False +# endif +# if defined(REG_D5) +freeReg REG_D5 = False +# endif +# if defined(REG_D6) +freeReg REG_D6 = False +# endif + +freeReg _ = True + #elif defined(MACHREGS_sparc) -- SPARC regs used by the OS / ABI diff --git a/includes/rts/storage/GC.h b/includes/rts/storage/GC.h index 12f0e32f0f..0258811728 100644 --- a/includes/rts/storage/GC.h +++ b/includes/rts/storage/GC.h @@ -208,7 +208,6 @@ void flushExec(W_ len, AdjustorExecutable exec_addr); #if defined(darwin_HOST_OS) AdjustorWritable execToWritable(AdjustorExecutable exec); #endif - #if RTS_LINKER_USE_MMAP AdjustorWritable allocateWrite(W_ bytes); void markExec(W_ bytes, AdjustorWritable writ); diff --git a/includes/stg/MachRegs.h b/includes/stg/MachRegs.h index 4b0991891e..d50969b66a 100644 --- a/includes/stg/MachRegs.h +++ b/includes/stg/MachRegs.h @@ -548,7 +548,7 @@ the stack. See Note [Overlapping global registers] for implications. r30 | LR | The Link Register r29 | FP | The Frame Pointer r19-r28 | | Callee-saved registers - r18 | | The Platform Register, if needed; + r18 | | The Platform Register, if needed; | | or temporary register r17 | IP1 | The second intra-procedure-call temporary register r16 | IP0 | The first intra-procedure-call scratch register diff --git a/mk/flavours/devel-cross-ncg.mk b/mk/flavours/devel-cross-ncg.mk new file mode 100644 index 0000000000..0d5325dd25 --- /dev/null +++ b/mk/flavours/devel-cross-ncg.mk @@ -0,0 +1,18 @@ +SRC_HC_OPTS = -O0 -H64m +GhcStage1HcOpts = -O2 -DDEBUG +GhcStage2HcOpts = -O0 +GhcLibHcOpts = -O +BUILD_PROF_LIBS = NO +SplitSections = NO +HADDOCK_DOCS = NO +BUILD_SPHINX_HTML = NO +BUILD_SPHINX_PDF = NO +BUILD_MAN = NO +WITH_TERMINFO = NO + +BIGNUM_BACKEND = native +Stage1Only = YES +DYNAMIC_BY_DEFAULT = NO +DYNAMIC_GHC_PROGRAMS = NO + +libraries/Cabal_dist-install_HC_OPTS += -O0 diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c index 2380ac7135..993af91528 100644 --- a/rts/RtsSymbols.c +++ b/rts/RtsSymbols.c @@ -1020,6 +1020,7 @@ SymI_HasProto(registerInfoProvList) \ SymI_HasProto(lookupIPE) \ RTS_USER_SIGNALS_SYMBOLS \ + RTS_LINKER_USE_MMAP_SYMBOLS \ RTS_INTCHAR_SYMBOLS diff --git a/rts/linker/Elf.c b/rts/linker/Elf.c index 84cb72bd6b..f6a1754257 100644 --- a/rts/linker/Elf.c +++ b/rts/linker/Elf.c @@ -774,9 +774,9 @@ ocGetNames_ELF ( ObjectCode* oc ) void * mem = mmapAnonForLinker(size+stub_space); - if( mem == NULL ) { - barf("failed to mmap allocated memory to load section %d. " - "errno = %d", i, errno); + if( mem == MAP_FAILED ) { + barf("failed to mmap allocated memory to load section %d. " + "errno = %d", i, errno); } /* copy only the image part over; we don't want to copy data diff --git a/testsuite/tests/ghci/linking/all.T b/testsuite/tests/ghci/linking/all.T index 1739129a6a..5802c907c2 100644 --- a/testsuite/tests/ghci/linking/all.T +++ b/testsuite/tests/ghci/linking/all.T @@ -14,6 +14,9 @@ test('ghcilink003', [ unless(doing_ghci, skip), # libstdc++ is GCC-specific on FreeBSD. FreeBSD has libc++ though. when(opsys('freebsd'), fragile(17739)), + # from Big Sur onwards, we can't dlopen libstdc++.dylib + # anymore. Will produce: + # dlopen(libstdc++.dylib, 5): image not found when(opsys('darwin'), fragile(16083)) ], makefile_test, ['ghcilink003']) @@ -34,6 +37,9 @@ test('ghcilink006', [ unless(doing_ghci, skip), # libstdc++ is GCC-specific on FreeBSD. FreeBSD has libc++ though. when(opsys('freebsd'), fragile(17739)), + # from Big Sur onwards, we can't dlopen libstdc++.dylib + # anymore. Will produce: + # dlopen(libstdc++.dylib, 5): image not found when(opsys('darwin'), fragile(16083)) ], makefile_test, ['ghcilink006']) diff --git a/testsuite/tests/llvm/should_compile/all.T b/testsuite/tests/llvm/should_compile/all.T index dca858dec8..4fb98dac7b 100644 --- a/testsuite/tests/llvm/should_compile/all.T +++ b/testsuite/tests/llvm/should_compile/all.T @@ -5,11 +5,19 @@ def f( name, opts ): setTestOpts(f) +# Apples LLVM Toolchain knows about a `vortex` cpu (and possibly others), that +# the stock LLVM toolchain doesn't know abotu and will warn about. Let's not +# have test fail just because of processor name differences due to different +# LLVM Toolchains. GHC tries to pass what apple expects (on darwin), but can +# be used with the stock LLVM toolchain as well. +def ignore_llvm_and_vortex( msg ): + return re.sub(r".* is not a recognized processor for this target.*\n",r"",msg) + # test('T5486', normal, compile, ['']) -test('T5681', normal, compile, ['']) +test('T5681', [normal, normalise_errmsg_fun(ignore_llvm_and_vortex)], compile, ['']) test('T6158', [reqlib('vector'), reqlib('primitive')], compile, ['-package vector -package primitive']) -test('T7571', cmm_src, compile, ['-no-hs-main']) +test('T7571', [cmm_src, normalise_errmsg_fun(ignore_llvm_and_vortex)], compile, ['-no-hs-main']) test('T7575', unless(wordsize(32), skip), compile, ['']) -test('T8131b', normal, compile, ['']) -test('T11649', normal, compile, ['']) +test('T8131b', [normal, normalise_errmsg_fun(ignore_llvm_and_vortex)], compile, ['']) +test('T11649', [normal, normalise_errmsg_fun(ignore_llvm_and_vortex)], compile, ['']) test('T17920fail', cmm_src, compile_fail, ['-no-hs-main']) diff --git a/testsuite/tests/llvm/should_run/subsections_via_symbols/all.T b/testsuite/tests/llvm/should_run/subsections_via_symbols/all.T index 16a30e6f0f..22d7cb2b42 100644 --- a/testsuite/tests/llvm/should_run/subsections_via_symbols/all.T +++ b/testsuite/tests/llvm/should_run/subsections_via_symbols/all.T @@ -3,9 +3,15 @@ # # Please refer to https://gitlab.haskell.org/ghc/ghc/issues/5019 # for the subsections_via_symbols.stderr +def ignore_llvm_and_vortex( msg ): + return re.sub(r"You are using an unsupported version of LLVM!.*\n",r"", + re.sub(r"Currently only [^ ]* is supported. System LLVM version: .*\n", r"", + re.sub(r"We will try though.*\n",r"", + re.sub(r".* is not a recognized processor for this target.*\n",r"",msg)))) test('subsections_via_symbols', [when(not opsys('darwin'), skip), only_ways(['optllvm', 'llvm', 'debugllvm']), - extra_files(['SubsectionsViaSymbols.hs'])], + extra_files(['SubsectionsViaSymbols.hs']), + normalise_errmsg_fun(ignore_llvm_and_vortex)], makefile_test, []) diff --git a/testsuite/tests/rts/all.T b/testsuite/tests/rts/all.T index 085e4b1f12..cd4a92dd7b 100644 --- a/testsuite/tests/rts/all.T +++ b/testsuite/tests/rts/all.T @@ -37,6 +37,7 @@ test('derefnull', # The output under OS X is too unstable to readily compare when(platform('i386-apple-darwin'), [ignore_stderr, exit_code(139)]), when(platform('x86_64-apple-darwin'), [ignore_stderr, exit_code(139)]), + when(platform('aarch64-apple-darwin'), [ignore_stderr, exit_code(139)]), when(opsys('mingw32'), [ignore_stderr, exit_code(11)]), when(opsys('mingw32'), [fragile(18548)]), # ThreadSanitizer changes the output |