diff options
author | Kavon Farvardin <kavon@farvard.in> | 2018-09-23 15:29:37 -0500 |
---|---|---|
committer | Kavon Farvardin <kavon@farvard.in> | 2018-09-23 15:29:37 -0500 |
commit | 84c2ad99582391005b5e873198b15e9e9eb4f78d (patch) | |
tree | caa8c2f2ec7e97fbb4977263c6817c9af5025cf4 /compiler/nativeGen/PPC | |
parent | 8ddb47cfcf5776e9a3c55fd37947c8a95e00fa12 (diff) | |
parent | e68b439fe5de61b9a2ca51af472185c62ccb8b46 (diff) | |
download | haskell-wip/T13904.tar.gz |
update to current master againwip/T13904
Diffstat (limited to 'compiler/nativeGen/PPC')
-rw-r--r-- | compiler/nativeGen/PPC/CodeGen.hs | 207 | ||||
-rw-r--r-- | compiler/nativeGen/PPC/Cond.hs | 2 | ||||
-rw-r--r-- | compiler/nativeGen/PPC/Instr.hs | 71 | ||||
-rw-r--r-- | compiler/nativeGen/PPC/Ppr.hs | 89 | ||||
-rw-r--r-- | compiler/nativeGen/PPC/RegInfo.hs | 12 | ||||
-rw-r--r-- | compiler/nativeGen/PPC/Regs.hs | 6 |
6 files changed, 285 insertions, 102 deletions
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs index 1e88a1d025..f246ec36f1 100644 --- a/compiler/nativeGen/PPC/CodeGen.hs +++ b/compiler/nativeGen/PPC/CodeGen.hs @@ -25,6 +25,8 @@ where #include "../includes/MachDeps.h" -- NCG stuff: +import GhcPrelude + import CodeGen.Platform import PPC.Instr import PPC.Cond @@ -52,7 +54,6 @@ import Hoopl.Graph -- The rest: import OrdList import Outputable -import Unique import DynFlags import Control.Monad ( mapAndUnzipM, when ) @@ -90,13 +91,23 @@ cmmTopCodeGen (CmmProc info lab live graph) = do case picBaseMb of Just picBase -> initializePicBase_ppc arch os picBase tops Nothing -> return tops - ArchPPC_64 ELF_V1 -> return tops + ArchPPC_64 ELF_V1 -> fixup_entry tops -- generating function descriptor is handled in -- pretty printer - ArchPPC_64 ELF_V2 -> return tops + ArchPPC_64 ELF_V2 -> fixup_entry tops -- generating function prologue is handled in -- pretty printer _ -> panic "PPC.cmmTopCodeGen: unknown arch" + where + fixup_entry (CmmProc info lab live (ListGraph (entry:blocks)) : statics) + = do + let BasicBlock bID insns = entry + bID' <- if lab == (blockLbl bID) + then newBlockId + else return bID + let b' = BasicBlock bID' insns + return (CmmProc info lab live (ListGraph (b':blocks)) : statics) + fixup_entry _ = panic "cmmTopCodegen: Broken CmmProc" cmmTopCodeGen (CmmData sec dat) = do return [CmmData sec dat] -- no translation, we just use CmmStatic @@ -161,8 +172,8 @@ stmtToInstrs stmt = do -> genCCall target result_regs args CmmBranch id -> genBranch id - CmmCondBranch arg true false _ -> do - b1 <- genCondJump true arg + CmmCondBranch arg true false prediction -> do + b1 <- genCondJump true arg prediction b2 <- genBranch false return (b1 `appOL` b2) CmmSwitch arg ids -> do dflags <- getDynFlags @@ -214,7 +225,7 @@ getRegisterReg platform (CmmGlobal mid) jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags)) jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel) - where blockLabel = mkAsmTempLabel (getUnique blockid) + where blockLabel = blockLbl blockid @@ -371,6 +382,14 @@ iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do mov_lo = MR rlo expr_reg return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi) rlo + +iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do + (expr_reg,expr_code) <- getSomeReg expr + (rlo, rhi) <- getNewRegPairNat II32 + let mov_hi = SRA II32 rhi expr_reg (RIImm (ImmInt 31)) + mov_lo = MR rlo expr_reg + return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi) + rlo iselExpr64 expr = pprPanic "iselExpr64(powerpc)" (pprExpr expr) @@ -719,7 +738,7 @@ data Amode = Amode AddrMode InstrBlock {- -Now, given a tree (the argument to an CmmLoad) that references memory, +Now, given a tree (the argument to a CmmLoad) that references memory, produce a suitable addressing mode. A Rule of the Game (tm) for Amodes: use of the addr bit must @@ -1070,11 +1089,12 @@ comparison to do. genCondJump :: BlockId -- the branch target -> CmmExpr -- the condition on which to branch + -> Maybe Bool -> NatM InstrBlock -genCondJump id bool = do +genCondJump id bool prediction = do CondCode _ cond code <- getCondCode bool - return (code `snocOL` BCC cond id) + return (code `snocOL` BCC cond id prediction) @@ -1098,6 +1118,90 @@ genCCall (PrimTarget MO_Touch) _ _ genCCall (PrimTarget (MO_Prefetch_Data _)) _ _ = return $ nilOL +genCCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] + = do dflags <- getDynFlags + let platform = targetPlatform dflags + fmt = intFormat width + reg_dst = getRegisterReg platform (CmmLocal dst) + (instr, n_code) <- case amop of + AMO_Add -> getSomeRegOrImm ADD True reg_dst + AMO_Sub -> case n of + CmmLit (CmmInt i _) + | Just imm <- makeImmediate width True (-i) + -> return (ADD reg_dst reg_dst (RIImm imm), nilOL) + _ + -> do + (n_reg, n_code) <- getSomeReg n + return (SUBF reg_dst n_reg reg_dst, n_code) + AMO_And -> getSomeRegOrImm AND False reg_dst + AMO_Nand -> do (n_reg, n_code) <- getSomeReg n + return (NAND reg_dst reg_dst n_reg, n_code) + AMO_Or -> getSomeRegOrImm OR False reg_dst + AMO_Xor -> getSomeRegOrImm XOR False reg_dst + Amode addr_reg addr_code <- getAmodeIndex addr + lbl_retry <- getBlockIdNat + return $ n_code `appOL` addr_code + `appOL` toOL [ HWSYNC + , BCC ALWAYS lbl_retry Nothing + + , NEWBLOCK lbl_retry + , LDR fmt reg_dst addr_reg + , instr + , STC fmt reg_dst addr_reg + , BCC NE lbl_retry (Just False) + , ISYNC + ] + where + getAmodeIndex (CmmMachOp (MO_Add _) [x, y]) + = do + (regX, codeX) <- getSomeReg x + (regY, codeY) <- getSomeReg y + return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY)) + getAmodeIndex other + = do + (reg, code) <- getSomeReg other + return (Amode (AddrRegReg r0 reg) code) -- NB: r0 is 0 here! + getSomeRegOrImm op sign dst + = case n of + CmmLit (CmmInt i _) | Just imm <- makeImmediate width sign i + -> return (op dst dst (RIImm imm), nilOL) + _ + -> do + (n_reg, n_code) <- getSomeReg n + return (op dst dst (RIReg n_reg), n_code) + +genCCall (PrimTarget (MO_AtomicRead width)) [dst] [addr] + = do dflags <- getDynFlags + let platform = targetPlatform dflags + fmt = intFormat width + reg_dst = getRegisterReg platform (CmmLocal dst) + form = if widthInBits width == 64 then DS else D + Amode addr_reg addr_code <- getAmode form addr + lbl_end <- getBlockIdNat + return $ addr_code `appOL` toOL [ HWSYNC + , LD fmt reg_dst addr_reg + , CMP fmt reg_dst (RIReg reg_dst) + , BCC NE lbl_end (Just False) + , BCC ALWAYS lbl_end Nothing + -- See Note [Seemingly useless cmp and bne] + , NEWBLOCK lbl_end + , ISYNC + ] + +-- Note [Seemingly useless cmp and bne] +-- In Power ISA, Book II, Section 4.4.1, Instruction Synchronize Instruction +-- the second paragraph says that isync may complete before storage accesses +-- "associated" with a preceding instruction have been performed. The cmp +-- operation and the following bne introduce a data and control dependency +-- on the load instruction (See also Power ISA, Book II, Appendix B.2.3, Safe +-- Fetch). +-- This is also what gcc does. + + +genCCall (PrimTarget (MO_AtomicWrite width)) [] [addr, val] = do + code <- assignMem_IntCode (intFormat width) addr val + return $ unitOL(HWSYNC) `appOL` code + genCCall (PrimTarget (MO_Clz width)) [dst] [src] = do dflags <- getDynFlags let platform = targetPlatform dflags @@ -1110,17 +1214,17 @@ genCCall (PrimTarget (MO_Clz width)) [dst] [src] lbl3 <- getBlockIdNat let vr_hi = getHiVRegFromLo vr_lo cntlz = toOL [ CMPL II32 vr_hi (RIImm (ImmInt 0)) - , BCC NE lbl2 - , BCC ALWAYS lbl1 + , BCC NE lbl2 Nothing + , BCC ALWAYS lbl1 Nothing , NEWBLOCK lbl1 , CNTLZ II32 reg_dst vr_lo , ADD reg_dst reg_dst (RIImm (ImmInt 32)) - , BCC ALWAYS lbl3 + , BCC ALWAYS lbl3 Nothing , NEWBLOCK lbl2 , CNTLZ II32 reg_dst vr_hi - , BCC ALWAYS lbl3 + , BCC ALWAYS lbl3 Nothing , NEWBLOCK lbl3 ] @@ -1167,8 +1271,8 @@ genCCall (PrimTarget (MO_Ctz width)) [dst] [src] cnttzlo <- cnttz format reg_dst vr_lo let vr_hi = getHiVRegFromLo vr_lo cnttz64 = toOL [ CMPL format vr_lo (RIImm (ImmInt 0)) - , BCC NE lbl2 - , BCC ALWAYS lbl1 + , BCC NE lbl2 Nothing + , BCC ALWAYS lbl1 Nothing , NEWBLOCK lbl1 , ADD x' vr_hi (RIImm (ImmInt (-1))) @@ -1176,12 +1280,12 @@ genCCall (PrimTarget (MO_Ctz width)) [dst] [src] , CNTLZ format r' x'' -- 32 + (32 - clz(x'')) , SUBFC reg_dst r' (RIImm (ImmInt 64)) - , BCC ALWAYS lbl3 + , BCC ALWAYS lbl3 Nothing , NEWBLOCK lbl2 ] `appOL` cnttzlo `appOL` - toOL [ BCC ALWAYS lbl3 + toOL [ BCC ALWAYS lbl3 Nothing , NEWBLOCK lbl3 ] @@ -1229,6 +1333,7 @@ genCCall target dest_regs argsAndHints PrimTarget (MO_U_Mul2 width) -> multOp2 platform width dest_regs argsAndHints PrimTarget (MO_Add2 _) -> add2Op platform dest_regs argsAndHints + PrimTarget (MO_AddWordC _) -> addcOp platform dest_regs argsAndHints PrimTarget (MO_SubWordC _) -> subcOp platform dest_regs argsAndHints PrimTarget (MO_AddIntC width) -> addSubCOp ADDO platform width dest_regs argsAndHints @@ -1315,21 +1420,21 @@ genCCall target dest_regs argsAndHints -- rhat = un32 - q1*vn1 , MULL fmt tmp q1 (RIReg vn1) , SUBF rhat tmp un32 - , BCC ALWAYS again1 + , BCC ALWAYS again1 Nothing , NEWBLOCK again1 -- if (q1 >= b || q1*vn0 > b*rhat + un1) , CMPL fmt q1 (RIReg b) - , BCC GEU then1 - , BCC ALWAYS no1 + , BCC GEU then1 Nothing + , BCC ALWAYS no1 Nothing , NEWBLOCK no1 , MULL fmt tmp q1 (RIReg vn0) , SL fmt tmp1 rhat (RIImm (ImmInt half)) , ADD tmp1 tmp1 (RIReg un1) , CMPL fmt tmp (RIReg tmp1) - , BCC LEU endif1 - , BCC ALWAYS then1 + , BCC LEU endif1 Nothing + , BCC ALWAYS then1 Nothing , NEWBLOCK then1 -- q1 = q1 - 1 @@ -1338,8 +1443,8 @@ genCCall target dest_regs argsAndHints , ADD rhat rhat (RIReg vn1) -- if (rhat < b) goto again1 , CMPL fmt rhat (RIReg b) - , BCC LTT again1 - , BCC ALWAYS endif1 + , BCC LTT again1 Nothing + , BCC ALWAYS endif1 Nothing , NEWBLOCK endif1 -- un21 = un32*b + un1 - q1*v @@ -1353,21 +1458,21 @@ genCCall target dest_regs argsAndHints -- rhat = un21- q0*vn1 , MULL fmt tmp q0 (RIReg vn1) , SUBF rhat tmp un21 - , BCC ALWAYS again2 + , BCC ALWAYS again2 Nothing , NEWBLOCK again2 -- if (q0>b || q0*vn0 > b*rhat + un0) , CMPL fmt q0 (RIReg b) - , BCC GEU then2 - , BCC ALWAYS no2 + , BCC GEU then2 Nothing + , BCC ALWAYS no2 Nothing , NEWBLOCK no2 , MULL fmt tmp q0 (RIReg vn0) , SL fmt tmp1 rhat (RIImm (ImmInt half)) , ADD tmp1 tmp1 (RIReg un0) , CMPL fmt tmp (RIReg tmp1) - , BCC LEU endif2 - , BCC ALWAYS then2 + , BCC LEU endif2 Nothing + , BCC ALWAYS then2 Nothing , NEWBLOCK then2 -- q0 = q0 - 1 @@ -1376,8 +1481,8 @@ genCCall target dest_regs argsAndHints , ADD rhat rhat (RIReg vn1) -- if (rhat<b) goto again2 , CMPL fmt rhat (RIReg b) - , BCC LTT again2 - , BCC ALWAYS endif2 + , BCC LTT again2 Nothing + , BCC ALWAYS endif2 Nothing , NEWBLOCK endif2 -- compute remainder @@ -1419,6 +1524,11 @@ genCCall target dest_regs argsAndHints add2Op _ _ _ = panic "genCCall: Wrong number of arguments/results for add2" + addcOp platform [res_r, res_c] [arg_x, arg_y] + = add2Op platform [res_c {-hi-}, res_r {-lo-}] [arg_x, arg_y] + addcOp _ _ _ + = panic "genCCall: Wrong number of arguments/results for addc" + -- PowerPC subfc sets the carry for rT = ~(rA) + rB + 1, -- which is 0 for borrow and 1 otherwise. We need 1 and 0 -- so xor with 1. @@ -1598,7 +1708,7 @@ genCCall' dflags gcp target dest_regs args uses_pic_base_implicitly = do -- See Note [implicit register in PPC PIC code] -- on why we claim to use PIC register here - when (gopt Opt_PIC dflags && target32Bit platform) $ do + when (positionIndependent dflags && target32Bit platform) $ do _ <- getPicBaseNat $ archWordFormat True return () @@ -1881,6 +1991,10 @@ genCCall' dflags gcp target dest_regs args MO_F32_Tanh -> (fsLit "tanh", True) MO_F32_Pwr -> (fsLit "pow", True) + MO_F32_Asinh -> (fsLit "asinh", True) + MO_F32_Acosh -> (fsLit "acosh", True) + MO_F32_Atanh -> (fsLit "atanh", True) + MO_F64_Exp -> (fsLit "exp", False) MO_F64_Log -> (fsLit "log", False) MO_F64_Sqrt -> (fsLit "sqrt", False) @@ -1899,32 +2013,40 @@ genCCall' dflags gcp target dest_regs args MO_F64_Tanh -> (fsLit "tanh", False) MO_F64_Pwr -> (fsLit "pow", False) + MO_F64_Asinh -> (fsLit "asinh", False) + MO_F64_Acosh -> (fsLit "acosh", False) + MO_F64_Atanh -> (fsLit "atanh", False) + MO_UF_Conv w -> (fsLit $ word2FloatLabel w, False) MO_Memcpy _ -> (fsLit "memcpy", False) MO_Memset _ -> (fsLit "memset", False) MO_Memmove _ -> (fsLit "memmove", False) + MO_Memcmp _ -> (fsLit "memcmp", False) MO_BSwap w -> (fsLit $ bSwapLabel w, False) MO_PopCnt w -> (fsLit $ popCntLabel w, False) - MO_Clz w -> (fsLit $ clzLabel w, False) - MO_Ctz w -> (fsLit $ ctzLabel w, False) - MO_AtomicRMW w amop -> (fsLit $ atomicRMWLabel w amop, False) + MO_Pdep w -> (fsLit $ pdepLabel w, False) + MO_Pext w -> (fsLit $ pextLabel w, False) + MO_Clz _ -> unsupported + MO_Ctz _ -> unsupported + MO_AtomicRMW {} -> unsupported MO_Cmpxchg w -> (fsLit $ cmpxchgLabel w, False) - MO_AtomicRead w -> (fsLit $ atomicReadLabel w, False) - MO_AtomicWrite w -> (fsLit $ atomicWriteLabel w, False) + MO_AtomicRead _ -> unsupported + MO_AtomicWrite _ -> unsupported MO_S_QuotRem {} -> unsupported MO_U_QuotRem {} -> unsupported MO_U_QuotRem2 {} -> unsupported MO_Add2 {} -> unsupported + MO_AddWordC {} -> unsupported MO_SubWordC {} -> unsupported MO_AddIntC {} -> unsupported MO_SubIntC {} -> unsupported MO_U_Mul2 {} -> unsupported MO_WriteBarrier -> unsupported MO_Touch -> unsupported - (MO_Prefetch_Data _ ) -> unsupported + MO_Prefetch_Data _ -> unsupported unsupported = panic ("outOfLineCmmOp: " ++ show mop ++ " not supported") @@ -1950,7 +2072,7 @@ genSwitch dflags expr targets ] return code - | (gopt Opt_PIC dflags) || (not $ target32Bit $ targetPlatform dflags) + | (positionIndependent dflags) || (not $ target32Bit $ targetPlatform dflags) = do (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset) let fmt = archWordFormat $ target32Bit $ targetPlatform dflags @@ -1988,15 +2110,16 @@ generateJumpTableForInstr :: DynFlags -> Instr -> Maybe (NatCmmDecl CmmStatics Instr) generateJumpTableForInstr dflags (BCTR ids (Just lbl)) = let jumpTable - | (gopt Opt_PIC dflags) + | (positionIndependent dflags) || (not $ target32Bit $ targetPlatform dflags) = map jumpTableEntryRel ids | otherwise = map (jumpTableEntry dflags) ids where jumpTableEntryRel Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags)) jumpTableEntryRel (Just blockid) - = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0) - where blockLabel = mkAsmTempLabel (getUnique blockid) + = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0 + (wordWidth dflags)) + where blockLabel = blockLbl blockid in Just (CmmData (Section ReadOnlyData lbl) (Statics lbl jumpTable)) generateJumpTableForInstr _ _ = Nothing diff --git a/compiler/nativeGen/PPC/Cond.hs b/compiler/nativeGen/PPC/Cond.hs index 0e4b1fd701..bd8bdee81a 100644 --- a/compiler/nativeGen/PPC/Cond.hs +++ b/compiler/nativeGen/PPC/Cond.hs @@ -8,6 +8,8 @@ module PPC.Cond ( where +import GhcPrelude + import Panic data Cond diff --git a/compiler/nativeGen/PPC/Instr.hs b/compiler/nativeGen/PPC/Instr.hs index eb179c5a99..8eb5e8fa8d 100644 --- a/compiler/nativeGen/PPC/Instr.hs +++ b/compiler/nativeGen/PPC/Instr.hs @@ -23,6 +23,8 @@ module PPC.Instr ( where +import GhcPrelude + import PPC.Regs import PPC.Cond import Instruction @@ -75,19 +77,19 @@ instance Instruction Instr where mkStackDeallocInstr = ppc_mkStackDeallocInstr -ppc_mkStackAllocInstr :: Platform -> Int -> Instr +ppc_mkStackAllocInstr :: Platform -> Int -> [Instr] ppc_mkStackAllocInstr platform amount = ppc_mkStackAllocInstr' platform (-amount) -ppc_mkStackDeallocInstr :: Platform -> Int -> Instr +ppc_mkStackDeallocInstr :: Platform -> Int -> [Instr] ppc_mkStackDeallocInstr platform amount = ppc_mkStackAllocInstr' platform amount -ppc_mkStackAllocInstr' :: Platform -> Int -> Instr +ppc_mkStackAllocInstr' :: Platform -> Int -> [Instr] ppc_mkStackAllocInstr' platform amount = case platformArch platform of - ArchPPC -> UPDATE_SP II32 (ImmInt amount) - ArchPPC_64 _ -> UPDATE_SP II64 (ImmInt amount) + ArchPPC -> [UPDATE_SP II32 (ImmInt amount)] + ArchPPC_64 _ -> [UPDATE_SP II64 (ImmInt amount)] _ -> panic $ "ppc_mkStackAllocInstr' " ++ show (platformArch platform) @@ -124,7 +126,7 @@ allocMoreStack platform slots (CmmProc info lbl live (ListGraph code)) = do insert_stack_insns (BasicBlock id insns) | Just new_blockid <- mapLookup id new_blockmap - = [ BasicBlock id [alloc, BCC ALWAYS new_blockid] + = [ BasicBlock id $ alloc ++ [BCC ALWAYS new_blockid Nothing] , BasicBlock new_blockid block' ] | otherwise @@ -137,11 +139,11 @@ allocMoreStack platform slots (CmmProc info lbl live (ListGraph code)) = do -- "labeled-goto" we use JMP, and for "computed-goto" we -- use MTCTR followed by BCTR. See 'PPC.CodeGen.genJump'. = case insn of - JMP _ -> dealloc : insn : r - BCTR [] Nothing -> dealloc : insn : r + JMP _ -> dealloc ++ (insn : r) + BCTR [] Nothing -> dealloc ++ (insn : r) BCTR ids label -> BCTR (map (fmap retarget) ids) label : r - BCCFAR cond b -> BCCFAR cond (retarget b) : r - BCC cond b -> BCC cond (retarget b) : r + BCCFAR cond b p -> BCCFAR cond (retarget b) p : r + BCC cond b p -> BCC cond (retarget b) p : r _ -> insn : r -- BL and BCTRL are call-like instructions rather than -- jumps, and are used only for C calls. @@ -190,10 +192,12 @@ data Instr -- Loads and stores. | LD Format Reg AddrMode -- Load format, dst, src | LDFAR Format Reg AddrMode -- Load format, dst, src 32 bit offset + | LDR Format Reg AddrMode -- Load and reserve format, dst, src | LA Format Reg AddrMode -- Load arithmetic format, dst, src | ST Format Reg AddrMode -- Store format, src, dst | STFAR Format Reg AddrMode -- Store format, src, dst 32 bit offset | STU Format Reg AddrMode -- Store with Update format, src, dst + | STC Format Reg AddrMode -- Store conditional format, src, dst | LIS Reg Imm -- Load Immediate Shifted dst, src | LI Reg Imm -- Load Immediate dst, src | MR Reg Reg -- Move Register dst, src -- also for fmr @@ -201,8 +205,12 @@ data Instr | CMP Format Reg RI -- format, src1, src2 | CMPL Format Reg RI -- format, src1, src2 - | BCC Cond BlockId - | BCCFAR Cond BlockId + | BCC Cond BlockId (Maybe Bool) -- cond, block, hint + | BCCFAR Cond BlockId (Maybe Bool) -- cond, block, hint + -- hint: + -- Just True: branch likely taken + -- Just False: branch likely not taken + -- Nothing: no hint | JMP CLabel -- same as branch, -- but with CLabel instead of block ID | MTCTR Reg @@ -232,6 +240,7 @@ data Instr | DIV Format Bool Reg Reg Reg | AND Reg Reg RI -- dst, src1, src2 | ANDC Reg Reg Reg -- AND with complement, dst = src1 & ~ src2 + | NAND Reg Reg Reg -- dst, src1, src2 | OR Reg Reg RI -- dst, src1, src2 | ORIS Reg Reg Imm -- OR Immediate Shifted dst, src1, src2 | XOR Reg Reg RI -- dst, src1, src2 @@ -272,6 +281,8 @@ data Instr | MFLR Reg -- move from link register | FETCHPC Reg -- pseudo-instruction: -- bcl to next insn, mflr reg + | HWSYNC -- heavy weight sync + | ISYNC -- instruction synchronize | LWSYNC -- memory barrier | NOP -- no operation, PowerPC 64 bit -- needs this as place holder to @@ -290,17 +301,19 @@ ppc_regUsageOfInstr platform instr = case instr of LD _ reg addr -> usage (regAddr addr, [reg]) LDFAR _ reg addr -> usage (regAddr addr, [reg]) + LDR _ reg addr -> usage (regAddr addr, [reg]) LA _ reg addr -> usage (regAddr addr, [reg]) ST _ reg addr -> usage (reg : regAddr addr, []) STFAR _ reg addr -> usage (reg : regAddr addr, []) STU _ reg addr -> usage (reg : regAddr addr, []) + STC _ reg addr -> usage (reg : regAddr addr, []) LIS reg _ -> usage ([], [reg]) LI reg _ -> usage ([], [reg]) MR reg1 reg2 -> usage ([reg2], [reg1]) CMP _ reg ri -> usage (reg : regRI ri,[]) CMPL _ reg ri -> usage (reg : regRI ri,[]) - BCC _ _ -> noUsage - BCCFAR _ _ -> noUsage + BCC _ _ _ -> noUsage + BCCFAR _ _ _ -> noUsage MTCTR reg -> usage ([reg],[]) BCTR _ _ -> noUsage BL _ params -> usage (params, callClobberedRegs platform) @@ -325,6 +338,7 @@ ppc_regUsageOfInstr platform instr AND reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) ANDC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) + NAND reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1]) OR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) ORIS reg1 reg2 _ -> usage ([reg2], [reg1]) XOR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1]) @@ -380,17 +394,19 @@ ppc_patchRegsOfInstr instr env = case instr of LD fmt reg addr -> LD fmt (env reg) (fixAddr addr) LDFAR fmt reg addr -> LDFAR fmt (env reg) (fixAddr addr) + LDR fmt reg addr -> LDR fmt (env reg) (fixAddr addr) LA fmt reg addr -> LA fmt (env reg) (fixAddr addr) ST fmt reg addr -> ST fmt (env reg) (fixAddr addr) STFAR fmt reg addr -> STFAR fmt (env reg) (fixAddr addr) STU fmt reg addr -> STU fmt (env reg) (fixAddr addr) + STC fmt reg addr -> STC fmt (env reg) (fixAddr addr) LIS reg imm -> LIS (env reg) imm LI reg imm -> LI (env reg) imm MR reg1 reg2 -> MR (env reg1) (env reg2) CMP fmt reg ri -> CMP fmt (env reg) (fixRI ri) CMPL fmt reg ri -> CMPL fmt (env reg) (fixRI ri) - BCC cond lbl -> BCC cond lbl - BCCFAR cond lbl -> BCCFAR cond lbl + BCC cond lbl p -> BCC cond lbl p + BCCFAR cond lbl p -> BCCFAR cond lbl p MTCTR reg -> MTCTR (env reg) BCTR targets lbl -> BCTR targets lbl BL imm argRegs -> BL imm argRegs -- argument regs @@ -417,6 +433,7 @@ ppc_patchRegsOfInstr instr env AND reg1 reg2 ri -> AND (env reg1) (env reg2) (fixRI ri) ANDC reg1 reg2 reg3 -> ANDC (env reg1) (env reg2) (env reg3) + NAND reg1 reg2 reg3 -> NAND (env reg1) (env reg2) (env reg3) OR reg1 reg2 ri -> OR (env reg1) (env reg2) (fixRI ri) ORIS reg1 reg2 imm -> ORIS (env reg1) (env reg2) imm XOR reg1 reg2 ri -> XOR (env reg1) (env reg2) (fixRI ri) @@ -480,8 +497,8 @@ ppc_isJumpishInstr instr ppc_jumpDestsOfInstr :: Instr -> [BlockId] ppc_jumpDestsOfInstr insn = case insn of - BCC _ id -> [id] - BCCFAR _ id -> [id] + BCC _ id _ -> [id] + BCCFAR _ id _ -> [id] BCTR targets _ -> [id | Just id <- targets] _ -> [] @@ -492,8 +509,8 @@ ppc_jumpDestsOfInstr insn ppc_patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr ppc_patchJumpInstr insn patchF = case insn of - BCC cc id -> BCC cc (patchF id) - BCCFAR cc id -> BCCFAR cc (patchF id) + BCC cc id p -> BCC cc (patchF id) p + BCCFAR cc id p -> BCCFAR cc (patchF id) p BCTR ids lbl -> BCTR (map (fmap patchF) ids) lbl _ -> insn @@ -631,16 +648,12 @@ ppc_mkRegRegMoveInstr src dst -- | Make an unconditional jump instruction. --- For architectures with branch delay slots, its ok to put --- a NOP after the jump. Don't fill the delay slot with an --- instruction that references regs or you'll confuse the --- linear allocator. ppc_mkJumpInstr :: BlockId -> [Instr] ppc_mkJumpInstr id - = [BCC ALWAYS id] + = [BCC ALWAYS id Nothing] -- | Take the source and destination from this reg -> reg move instruction @@ -669,12 +682,12 @@ makeFarBranches info_env blocks handleBlock addr (BasicBlock id instrs) = BasicBlock id (zipWith makeFar [addr..] instrs) - makeFar _ (BCC ALWAYS tgt) = BCC ALWAYS tgt - makeFar addr (BCC cond tgt) + makeFar _ (BCC ALWAYS tgt _) = BCC ALWAYS tgt Nothing + makeFar addr (BCC cond tgt p) | abs (addr - targetAddr) >= nearLimit - = BCCFAR cond tgt + = BCCFAR cond tgt p | otherwise - = BCC cond tgt + = BCC cond tgt p where Just targetAddr = lookupUFM blockAddressMap tgt makeFar _ other = other diff --git a/compiler/nativeGen/PPC/Ppr.hs b/compiler/nativeGen/PPC/Ppr.hs index 63d01c3913..2f64d82ee5 100644 --- a/compiler/nativeGen/PPC/Ppr.hs +++ b/compiler/nativeGen/PPC/Ppr.hs @@ -9,6 +9,8 @@ {-# OPTIONS_GHC -fno-warn-orphans #-} module PPC.Ppr (pprNatCmmDecl) where +import GhcPrelude + import PPC.Regs import PPC.Instr import PPC.Cond @@ -23,9 +25,10 @@ import Cmm hiding (topInfoTable) import Hoopl.Collections import Hoopl.Label +import BlockId import CLabel -import Unique ( pprUniqueAlways, Uniquable(..) ) +import Unique ( pprUniqueAlways, getUnique ) import Platform import FastString import Outputable @@ -78,19 +81,17 @@ pprNatCmmDecl proc@(CmmProc top_info lbl _ (ListGraph blocks)) = pprFunctionDescriptor :: CLabel -> SDoc pprFunctionDescriptor lab = pprGloblDecl lab - $$ text ".section \".opd\",\"aw\"" - $$ text ".align 3" + $$ text "\t.section \".opd\", \"aw\"" + $$ text "\t.align 3" $$ ppr lab <> char ':' - $$ text ".quad ." - <> ppr lab - <> text ",.TOC.@tocbase,0" - $$ text ".previous" - $$ text ".type " - <> ppr lab - <> text ", @function" - $$ char '.' - <> ppr lab - <> char ':' + $$ text "\t.quad ." + <> ppr lab + <> text ",.TOC.@tocbase,0" + $$ text "\t.previous" + $$ text "\t.type" + <+> ppr lab + <> text ", @function" + $$ char '.' <> ppr lab <> char ':' pprFunctionPrologue :: CLabel ->SDoc pprFunctionPrologue lab = pprGloblDecl lab @@ -108,7 +109,7 @@ pprFunctionPrologue lab = pprGloblDecl lab pprBasicBlock :: LabelMap CmmStatics -> NatBasicBlock Instr -> SDoc pprBasicBlock info_env (BasicBlock blockid instrs) = maybe_infotable $$ - pprLabel (mkAsmTempLabel (getUnique blockid)) $$ + pprLabel (blockLbl blockid) $$ vcat (map pprInstr instrs) where maybe_infotable = case mapLookup blockid info_env of @@ -310,11 +311,13 @@ pprImm (HIGHESTA i) pprAddr :: AddrMode -> SDoc pprAddr (AddrRegReg r1 r2) - = pprReg r1 <+> text ", " <+> pprReg r2 - -pprAddr (AddrRegImm r1 (ImmInt i)) = hcat [ int i, char '(', pprReg r1, char ')' ] -pprAddr (AddrRegImm r1 (ImmInteger i)) = hcat [ integer i, char '(', pprReg r1, char ')' ] -pprAddr (AddrRegImm r1 imm) = hcat [ pprImm imm, char '(', pprReg r1, char ')' ] + = pprReg r1 <> char ',' <+> pprReg r2 +pprAddr (AddrRegImm r1 (ImmInt i)) + = hcat [ int i, char '(', pprReg r1, char ')' ] +pprAddr (AddrRegImm r1 (ImmInteger i)) + = hcat [ integer i, char '(', pprReg r1, char ')' ] +pprAddr (AddrRegImm r1 imm) + = hcat [ pprImm imm, char '(', pprReg r1, char ')' ] pprSectionAlign :: Section -> SDoc @@ -450,15 +453,27 @@ pprInstr (LD fmt reg addr) = hcat [ text ", ", pprAddr addr ] + pprInstr (LDFAR fmt reg (AddrRegImm source off)) = sdocWithPlatform $ \platform -> vcat [ pprInstr (ADDIS (tmpReg platform) source (HA off)), pprInstr (LD fmt reg (AddrRegImm (tmpReg platform) (LO off))) ] - pprInstr (LDFAR _ _ _) = panic "PPC.Ppr.pprInstr LDFAR: no match" +pprInstr (LDR fmt reg1 addr) = hcat [ + text "\tl", + case fmt of + II32 -> char 'w' + II64 -> char 'd' + _ -> panic "PPC.Ppr.Instr LDR: no match", + text "arx\t", + pprReg reg1, + text ", ", + pprAddr addr + ] + pprInstr (LA fmt reg addr) = hcat [ char '\t', text "l", @@ -508,6 +523,17 @@ pprInstr (STU fmt reg addr) = hcat [ text ", ", pprAddr addr ] +pprInstr (STC fmt reg1 addr) = hcat [ + text "\tst", + case fmt of + II32 -> char 'w' + II64 -> char 'd' + _ -> panic "PPC.Ppr.Instr STC: no match", + text "cx.\t", + pprReg reg1, + text ", ", + pprAddr addr + ] pprInstr (LIS reg imm) = hcat [ char '\t', text "lis", @@ -569,19 +595,25 @@ pprInstr (CMPL fmt reg ri) = hcat [ RIReg _ -> empty RIImm _ -> char 'i' ] -pprInstr (BCC cond blockid) = hcat [ +pprInstr (BCC cond blockid prediction) = hcat [ char '\t', text "b", pprCond cond, + pprPrediction prediction, char '\t', ppr lbl ] - where lbl = mkAsmTempLabel (getUnique blockid) + where lbl = mkLocalBlockLabel (getUnique blockid) + pprPrediction p = case p of + Nothing -> empty + Just True -> char '+' + Just False -> char '-' -pprInstr (BCCFAR cond blockid) = vcat [ +pprInstr (BCCFAR cond blockid prediction) = vcat [ hcat [ text "\tb", pprCond (condNegate cond), + neg_prediction, text "\t$+8" ], hcat [ @@ -589,7 +621,11 @@ pprInstr (BCCFAR cond blockid) = vcat [ ppr lbl ] ] - where lbl = mkAsmTempLabel (getUnique blockid) + where lbl = mkLocalBlockLabel (getUnique blockid) + neg_prediction = case prediction of + Nothing -> empty + Just True -> char '-' + Just False -> char '+' pprInstr (JMP lbl) -- We never jump to ForeignLabels; if we ever do, c.f. handling for "BL" @@ -741,6 +777,7 @@ pprInstr (AND reg1 reg2 (RIImm imm)) = hcat [ ] pprInstr (AND reg1 reg2 ri) = pprLogic (sLit "and") reg1 reg2 ri pprInstr (ANDC reg1 reg2 reg3) = pprLogic (sLit "andc") reg1 reg2 (RIReg reg3) +pprInstr (NAND reg1 reg2 reg3) = pprLogic (sLit "nand") reg1 reg2 (RIReg reg3) pprInstr (OR reg1 reg2 ri) = pprLogic (sLit "or") reg1 reg2 ri pprInstr (XOR reg1 reg2 ri) = pprLogic (sLit "xor") reg1 reg2 ri @@ -922,6 +959,10 @@ pprInstr (FETCHPC reg) = vcat [ hcat [ text "1:\tmflr\t", pprReg reg ] ] +pprInstr HWSYNC = text "\tsync" + +pprInstr ISYNC = text "\tisync" + pprInstr LWSYNC = text "\tlwsync" pprInstr NOP = text "\tnop" diff --git a/compiler/nativeGen/PPC/RegInfo.hs b/compiler/nativeGen/PPC/RegInfo.hs index c4724d4193..30a07b9440 100644 --- a/compiler/nativeGen/PPC/RegInfo.hs +++ b/compiler/nativeGen/PPC/RegInfo.hs @@ -20,6 +20,8 @@ where #include "nativeGen/NCG.h" #include "HsVersions.h" +import GhcPrelude + import PPC.Instr import BlockId @@ -49,14 +51,14 @@ shortcutStatics fn (Statics lbl statics) shortcutLabel :: (BlockId -> Maybe JumpDest) -> CLabel -> CLabel shortcutLabel fn lab - | Just uq <- maybeAsmTemp lab = shortBlockId fn (mkBlockId uq) - | otherwise = lab + | Just blkId <- maybeLocalBlockLabel lab = shortBlockId fn blkId + | otherwise = lab shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic shortcutStatic fn (CmmStaticLit (CmmLabel lab)) = CmmStaticLit (CmmLabel (shortcutLabel fn lab)) -shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off)) - = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off) +shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off w)) + = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off w) -- slightly dodgy, we're ignoring the second label, but this -- works with the way we use CmmLabelDiffOff for jump tables now. shortcutStatic _ other_static @@ -69,6 +71,6 @@ shortBlockId shortBlockId fn blockid = case fn blockid of - Nothing -> mkAsmTempLabel uq + Nothing -> mkLocalBlockLabel uq Just (DestBlockId blockid') -> shortBlockId fn blockid' where uq = getUnique blockid diff --git a/compiler/nativeGen/PPC/Regs.hs b/compiler/nativeGen/PPC/Regs.hs index a1befc7837..227517be88 100644 --- a/compiler/nativeGen/PPC/Regs.hs +++ b/compiler/nativeGen/PPC/Regs.hs @@ -50,6 +50,8 @@ where #include "nativeGen/NCG.h" #include "HsVersions.h" +import GhcPrelude + import Reg import RegClass import Format @@ -70,7 +72,7 @@ import Data.Int ( Int8, Int16, Int32, Int64 ) -- squeese functions for the graph allocator ----------------------------------- -- | regSqueeze_class reg --- Calculuate the maximum number of register colors that could be +-- Calculate the maximum number of register colors that could be -- denied to a node of this class due to having this reg -- as a neighbour. -- @@ -163,7 +165,7 @@ litToImm (CmmFloat f W32) = ImmFloat f litToImm (CmmFloat f W64) = ImmDouble f litToImm (CmmLabel l) = ImmCLbl l litToImm (CmmLabelOff l off) = ImmIndex l off -litToImm (CmmLabelDiffOff l1 l2 off) +litToImm (CmmLabelDiffOff l1 l2 off _) = ImmConstantSum (ImmConstantDiff (ImmCLbl l1) (ImmCLbl l2)) (ImmInt off) |