summaryrefslogtreecommitdiff
path: root/compiler/nativeGen/PPC
diff options
context:
space:
mode:
authorKavon Farvardin <kavon@farvard.in>2018-09-23 15:29:37 -0500
committerKavon Farvardin <kavon@farvard.in>2018-09-23 15:29:37 -0500
commit84c2ad99582391005b5e873198b15e9e9eb4f78d (patch)
treecaa8c2f2ec7e97fbb4977263c6817c9af5025cf4 /compiler/nativeGen/PPC
parent8ddb47cfcf5776e9a3c55fd37947c8a95e00fa12 (diff)
parente68b439fe5de61b9a2ca51af472185c62ccb8b46 (diff)
downloadhaskell-wip/T13904.tar.gz
update to current master againwip/T13904
Diffstat (limited to 'compiler/nativeGen/PPC')
-rw-r--r--compiler/nativeGen/PPC/CodeGen.hs207
-rw-r--r--compiler/nativeGen/PPC/Cond.hs2
-rw-r--r--compiler/nativeGen/PPC/Instr.hs71
-rw-r--r--compiler/nativeGen/PPC/Ppr.hs89
-rw-r--r--compiler/nativeGen/PPC/RegInfo.hs12
-rw-r--r--compiler/nativeGen/PPC/Regs.hs6
6 files changed, 285 insertions, 102 deletions
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
index 1e88a1d025..f246ec36f1 100644
--- a/compiler/nativeGen/PPC/CodeGen.hs
+++ b/compiler/nativeGen/PPC/CodeGen.hs
@@ -25,6 +25,8 @@ where
#include "../includes/MachDeps.h"
-- NCG stuff:
+import GhcPrelude
+
import CodeGen.Platform
import PPC.Instr
import PPC.Cond
@@ -52,7 +54,6 @@ import Hoopl.Graph
-- The rest:
import OrdList
import Outputable
-import Unique
import DynFlags
import Control.Monad ( mapAndUnzipM, when )
@@ -90,13 +91,23 @@ cmmTopCodeGen (CmmProc info lab live graph) = do
case picBaseMb of
Just picBase -> initializePicBase_ppc arch os picBase tops
Nothing -> return tops
- ArchPPC_64 ELF_V1 -> return tops
+ ArchPPC_64 ELF_V1 -> fixup_entry tops
-- generating function descriptor is handled in
-- pretty printer
- ArchPPC_64 ELF_V2 -> return tops
+ ArchPPC_64 ELF_V2 -> fixup_entry tops
-- generating function prologue is handled in
-- pretty printer
_ -> panic "PPC.cmmTopCodeGen: unknown arch"
+ where
+ fixup_entry (CmmProc info lab live (ListGraph (entry:blocks)) : statics)
+ = do
+ let BasicBlock bID insns = entry
+ bID' <- if lab == (blockLbl bID)
+ then newBlockId
+ else return bID
+ let b' = BasicBlock bID' insns
+ return (CmmProc info lab live (ListGraph (b':blocks)) : statics)
+ fixup_entry _ = panic "cmmTopCodegen: Broken CmmProc"
cmmTopCodeGen (CmmData sec dat) = do
return [CmmData sec dat] -- no translation, we just use CmmStatic
@@ -161,8 +172,8 @@ stmtToInstrs stmt = do
-> genCCall target result_regs args
CmmBranch id -> genBranch id
- CmmCondBranch arg true false _ -> do
- b1 <- genCondJump true arg
+ CmmCondBranch arg true false prediction -> do
+ b1 <- genCondJump true arg prediction
b2 <- genBranch false
return (b1 `appOL` b2)
CmmSwitch arg ids -> do dflags <- getDynFlags
@@ -214,7 +225,7 @@ getRegisterReg platform (CmmGlobal mid)
jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
- where blockLabel = mkAsmTempLabel (getUnique blockid)
+ where blockLabel = blockLbl blockid
@@ -371,6 +382,14 @@ iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do
mov_lo = MR rlo expr_reg
return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
rlo
+
+iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do
+ (expr_reg,expr_code) <- getSomeReg expr
+ (rlo, rhi) <- getNewRegPairNat II32
+ let mov_hi = SRA II32 rhi expr_reg (RIImm (ImmInt 31))
+ mov_lo = MR rlo expr_reg
+ return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi)
+ rlo
iselExpr64 expr
= pprPanic "iselExpr64(powerpc)" (pprExpr expr)
@@ -719,7 +738,7 @@ data Amode
= Amode AddrMode InstrBlock
{-
-Now, given a tree (the argument to an CmmLoad) that references memory,
+Now, given a tree (the argument to a CmmLoad) that references memory,
produce a suitable addressing mode.
A Rule of the Game (tm) for Amodes: use of the addr bit must
@@ -1070,11 +1089,12 @@ comparison to do.
genCondJump
:: BlockId -- the branch target
-> CmmExpr -- the condition on which to branch
+ -> Maybe Bool
-> NatM InstrBlock
-genCondJump id bool = do
+genCondJump id bool prediction = do
CondCode _ cond code <- getCondCode bool
- return (code `snocOL` BCC cond id)
+ return (code `snocOL` BCC cond id prediction)
@@ -1098,6 +1118,90 @@ genCCall (PrimTarget MO_Touch) _ _
genCCall (PrimTarget (MO_Prefetch_Data _)) _ _
= return $ nilOL
+genCCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n]
+ = do dflags <- getDynFlags
+ let platform = targetPlatform dflags
+ fmt = intFormat width
+ reg_dst = getRegisterReg platform (CmmLocal dst)
+ (instr, n_code) <- case amop of
+ AMO_Add -> getSomeRegOrImm ADD True reg_dst
+ AMO_Sub -> case n of
+ CmmLit (CmmInt i _)
+ | Just imm <- makeImmediate width True (-i)
+ -> return (ADD reg_dst reg_dst (RIImm imm), nilOL)
+ _
+ -> do
+ (n_reg, n_code) <- getSomeReg n
+ return (SUBF reg_dst n_reg reg_dst, n_code)
+ AMO_And -> getSomeRegOrImm AND False reg_dst
+ AMO_Nand -> do (n_reg, n_code) <- getSomeReg n
+ return (NAND reg_dst reg_dst n_reg, n_code)
+ AMO_Or -> getSomeRegOrImm OR False reg_dst
+ AMO_Xor -> getSomeRegOrImm XOR False reg_dst
+ Amode addr_reg addr_code <- getAmodeIndex addr
+ lbl_retry <- getBlockIdNat
+ return $ n_code `appOL` addr_code
+ `appOL` toOL [ HWSYNC
+ , BCC ALWAYS lbl_retry Nothing
+
+ , NEWBLOCK lbl_retry
+ , LDR fmt reg_dst addr_reg
+ , instr
+ , STC fmt reg_dst addr_reg
+ , BCC NE lbl_retry (Just False)
+ , ISYNC
+ ]
+ where
+ getAmodeIndex (CmmMachOp (MO_Add _) [x, y])
+ = do
+ (regX, codeX) <- getSomeReg x
+ (regY, codeY) <- getSomeReg y
+ return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY))
+ getAmodeIndex other
+ = do
+ (reg, code) <- getSomeReg other
+ return (Amode (AddrRegReg r0 reg) code) -- NB: r0 is 0 here!
+ getSomeRegOrImm op sign dst
+ = case n of
+ CmmLit (CmmInt i _) | Just imm <- makeImmediate width sign i
+ -> return (op dst dst (RIImm imm), nilOL)
+ _
+ -> do
+ (n_reg, n_code) <- getSomeReg n
+ return (op dst dst (RIReg n_reg), n_code)
+
+genCCall (PrimTarget (MO_AtomicRead width)) [dst] [addr]
+ = do dflags <- getDynFlags
+ let platform = targetPlatform dflags
+ fmt = intFormat width
+ reg_dst = getRegisterReg platform (CmmLocal dst)
+ form = if widthInBits width == 64 then DS else D
+ Amode addr_reg addr_code <- getAmode form addr
+ lbl_end <- getBlockIdNat
+ return $ addr_code `appOL` toOL [ HWSYNC
+ , LD fmt reg_dst addr_reg
+ , CMP fmt reg_dst (RIReg reg_dst)
+ , BCC NE lbl_end (Just False)
+ , BCC ALWAYS lbl_end Nothing
+ -- See Note [Seemingly useless cmp and bne]
+ , NEWBLOCK lbl_end
+ , ISYNC
+ ]
+
+-- Note [Seemingly useless cmp and bne]
+-- In Power ISA, Book II, Section 4.4.1, Instruction Synchronize Instruction
+-- the second paragraph says that isync may complete before storage accesses
+-- "associated" with a preceding instruction have been performed. The cmp
+-- operation and the following bne introduce a data and control dependency
+-- on the load instruction (See also Power ISA, Book II, Appendix B.2.3, Safe
+-- Fetch).
+-- This is also what gcc does.
+
+
+genCCall (PrimTarget (MO_AtomicWrite width)) [] [addr, val] = do
+ code <- assignMem_IntCode (intFormat width) addr val
+ return $ unitOL(HWSYNC) `appOL` code
+
genCCall (PrimTarget (MO_Clz width)) [dst] [src]
= do dflags <- getDynFlags
let platform = targetPlatform dflags
@@ -1110,17 +1214,17 @@ genCCall (PrimTarget (MO_Clz width)) [dst] [src]
lbl3 <- getBlockIdNat
let vr_hi = getHiVRegFromLo vr_lo
cntlz = toOL [ CMPL II32 vr_hi (RIImm (ImmInt 0))
- , BCC NE lbl2
- , BCC ALWAYS lbl1
+ , BCC NE lbl2 Nothing
+ , BCC ALWAYS lbl1 Nothing
, NEWBLOCK lbl1
, CNTLZ II32 reg_dst vr_lo
, ADD reg_dst reg_dst (RIImm (ImmInt 32))
- , BCC ALWAYS lbl3
+ , BCC ALWAYS lbl3 Nothing
, NEWBLOCK lbl2
, CNTLZ II32 reg_dst vr_hi
- , BCC ALWAYS lbl3
+ , BCC ALWAYS lbl3 Nothing
, NEWBLOCK lbl3
]
@@ -1167,8 +1271,8 @@ genCCall (PrimTarget (MO_Ctz width)) [dst] [src]
cnttzlo <- cnttz format reg_dst vr_lo
let vr_hi = getHiVRegFromLo vr_lo
cnttz64 = toOL [ CMPL format vr_lo (RIImm (ImmInt 0))
- , BCC NE lbl2
- , BCC ALWAYS lbl1
+ , BCC NE lbl2 Nothing
+ , BCC ALWAYS lbl1 Nothing
, NEWBLOCK lbl1
, ADD x' vr_hi (RIImm (ImmInt (-1)))
@@ -1176,12 +1280,12 @@ genCCall (PrimTarget (MO_Ctz width)) [dst] [src]
, CNTLZ format r' x''
-- 32 + (32 - clz(x''))
, SUBFC reg_dst r' (RIImm (ImmInt 64))
- , BCC ALWAYS lbl3
+ , BCC ALWAYS lbl3 Nothing
, NEWBLOCK lbl2
]
`appOL` cnttzlo `appOL`
- toOL [ BCC ALWAYS lbl3
+ toOL [ BCC ALWAYS lbl3 Nothing
, NEWBLOCK lbl3
]
@@ -1229,6 +1333,7 @@ genCCall target dest_regs argsAndHints
PrimTarget (MO_U_Mul2 width) -> multOp2 platform width dest_regs
argsAndHints
PrimTarget (MO_Add2 _) -> add2Op platform dest_regs argsAndHints
+ PrimTarget (MO_AddWordC _) -> addcOp platform dest_regs argsAndHints
PrimTarget (MO_SubWordC _) -> subcOp platform dest_regs argsAndHints
PrimTarget (MO_AddIntC width) -> addSubCOp ADDO platform width
dest_regs argsAndHints
@@ -1315,21 +1420,21 @@ genCCall target dest_regs argsAndHints
-- rhat = un32 - q1*vn1
, MULL fmt tmp q1 (RIReg vn1)
, SUBF rhat tmp un32
- , BCC ALWAYS again1
+ , BCC ALWAYS again1 Nothing
, NEWBLOCK again1
-- if (q1 >= b || q1*vn0 > b*rhat + un1)
, CMPL fmt q1 (RIReg b)
- , BCC GEU then1
- , BCC ALWAYS no1
+ , BCC GEU then1 Nothing
+ , BCC ALWAYS no1 Nothing
, NEWBLOCK no1
, MULL fmt tmp q1 (RIReg vn0)
, SL fmt tmp1 rhat (RIImm (ImmInt half))
, ADD tmp1 tmp1 (RIReg un1)
, CMPL fmt tmp (RIReg tmp1)
- , BCC LEU endif1
- , BCC ALWAYS then1
+ , BCC LEU endif1 Nothing
+ , BCC ALWAYS then1 Nothing
, NEWBLOCK then1
-- q1 = q1 - 1
@@ -1338,8 +1443,8 @@ genCCall target dest_regs argsAndHints
, ADD rhat rhat (RIReg vn1)
-- if (rhat < b) goto again1
, CMPL fmt rhat (RIReg b)
- , BCC LTT again1
- , BCC ALWAYS endif1
+ , BCC LTT again1 Nothing
+ , BCC ALWAYS endif1 Nothing
, NEWBLOCK endif1
-- un21 = un32*b + un1 - q1*v
@@ -1353,21 +1458,21 @@ genCCall target dest_regs argsAndHints
-- rhat = un21- q0*vn1
, MULL fmt tmp q0 (RIReg vn1)
, SUBF rhat tmp un21
- , BCC ALWAYS again2
+ , BCC ALWAYS again2 Nothing
, NEWBLOCK again2
-- if (q0>b || q0*vn0 > b*rhat + un0)
, CMPL fmt q0 (RIReg b)
- , BCC GEU then2
- , BCC ALWAYS no2
+ , BCC GEU then2 Nothing
+ , BCC ALWAYS no2 Nothing
, NEWBLOCK no2
, MULL fmt tmp q0 (RIReg vn0)
, SL fmt tmp1 rhat (RIImm (ImmInt half))
, ADD tmp1 tmp1 (RIReg un0)
, CMPL fmt tmp (RIReg tmp1)
- , BCC LEU endif2
- , BCC ALWAYS then2
+ , BCC LEU endif2 Nothing
+ , BCC ALWAYS then2 Nothing
, NEWBLOCK then2
-- q0 = q0 - 1
@@ -1376,8 +1481,8 @@ genCCall target dest_regs argsAndHints
, ADD rhat rhat (RIReg vn1)
-- if (rhat<b) goto again2
, CMPL fmt rhat (RIReg b)
- , BCC LTT again2
- , BCC ALWAYS endif2
+ , BCC LTT again2 Nothing
+ , BCC ALWAYS endif2 Nothing
, NEWBLOCK endif2
-- compute remainder
@@ -1419,6 +1524,11 @@ genCCall target dest_regs argsAndHints
add2Op _ _ _
= panic "genCCall: Wrong number of arguments/results for add2"
+ addcOp platform [res_r, res_c] [arg_x, arg_y]
+ = add2Op platform [res_c {-hi-}, res_r {-lo-}] [arg_x, arg_y]
+ addcOp _ _ _
+ = panic "genCCall: Wrong number of arguments/results for addc"
+
-- PowerPC subfc sets the carry for rT = ~(rA) + rB + 1,
-- which is 0 for borrow and 1 otherwise. We need 1 and 0
-- so xor with 1.
@@ -1598,7 +1708,7 @@ genCCall' dflags gcp target dest_regs args
uses_pic_base_implicitly = do
-- See Note [implicit register in PPC PIC code]
-- on why we claim to use PIC register here
- when (gopt Opt_PIC dflags && target32Bit platform) $ do
+ when (positionIndependent dflags && target32Bit platform) $ do
_ <- getPicBaseNat $ archWordFormat True
return ()
@@ -1881,6 +1991,10 @@ genCCall' dflags gcp target dest_regs args
MO_F32_Tanh -> (fsLit "tanh", True)
MO_F32_Pwr -> (fsLit "pow", True)
+ MO_F32_Asinh -> (fsLit "asinh", True)
+ MO_F32_Acosh -> (fsLit "acosh", True)
+ MO_F32_Atanh -> (fsLit "atanh", True)
+
MO_F64_Exp -> (fsLit "exp", False)
MO_F64_Log -> (fsLit "log", False)
MO_F64_Sqrt -> (fsLit "sqrt", False)
@@ -1899,32 +2013,40 @@ genCCall' dflags gcp target dest_regs args
MO_F64_Tanh -> (fsLit "tanh", False)
MO_F64_Pwr -> (fsLit "pow", False)
+ MO_F64_Asinh -> (fsLit "asinh", False)
+ MO_F64_Acosh -> (fsLit "acosh", False)
+ MO_F64_Atanh -> (fsLit "atanh", False)
+
MO_UF_Conv w -> (fsLit $ word2FloatLabel w, False)
MO_Memcpy _ -> (fsLit "memcpy", False)
MO_Memset _ -> (fsLit "memset", False)
MO_Memmove _ -> (fsLit "memmove", False)
+ MO_Memcmp _ -> (fsLit "memcmp", False)
MO_BSwap w -> (fsLit $ bSwapLabel w, False)
MO_PopCnt w -> (fsLit $ popCntLabel w, False)
- MO_Clz w -> (fsLit $ clzLabel w, False)
- MO_Ctz w -> (fsLit $ ctzLabel w, False)
- MO_AtomicRMW w amop -> (fsLit $ atomicRMWLabel w amop, False)
+ MO_Pdep w -> (fsLit $ pdepLabel w, False)
+ MO_Pext w -> (fsLit $ pextLabel w, False)
+ MO_Clz _ -> unsupported
+ MO_Ctz _ -> unsupported
+ MO_AtomicRMW {} -> unsupported
MO_Cmpxchg w -> (fsLit $ cmpxchgLabel w, False)
- MO_AtomicRead w -> (fsLit $ atomicReadLabel w, False)
- MO_AtomicWrite w -> (fsLit $ atomicWriteLabel w, False)
+ MO_AtomicRead _ -> unsupported
+ MO_AtomicWrite _ -> unsupported
MO_S_QuotRem {} -> unsupported
MO_U_QuotRem {} -> unsupported
MO_U_QuotRem2 {} -> unsupported
MO_Add2 {} -> unsupported
+ MO_AddWordC {} -> unsupported
MO_SubWordC {} -> unsupported
MO_AddIntC {} -> unsupported
MO_SubIntC {} -> unsupported
MO_U_Mul2 {} -> unsupported
MO_WriteBarrier -> unsupported
MO_Touch -> unsupported
- (MO_Prefetch_Data _ ) -> unsupported
+ MO_Prefetch_Data _ -> unsupported
unsupported = panic ("outOfLineCmmOp: " ++ show mop
++ " not supported")
@@ -1950,7 +2072,7 @@ genSwitch dflags expr targets
]
return code
- | (gopt Opt_PIC dflags) || (not $ target32Bit $ targetPlatform dflags)
+ | (positionIndependent dflags) || (not $ target32Bit $ targetPlatform dflags)
= do
(reg,e_code) <- getSomeReg (cmmOffset dflags expr offset)
let fmt = archWordFormat $ target32Bit $ targetPlatform dflags
@@ -1988,15 +2110,16 @@ generateJumpTableForInstr :: DynFlags -> Instr
-> Maybe (NatCmmDecl CmmStatics Instr)
generateJumpTableForInstr dflags (BCTR ids (Just lbl)) =
let jumpTable
- | (gopt Opt_PIC dflags)
+ | (positionIndependent dflags)
|| (not $ target32Bit $ targetPlatform dflags)
= map jumpTableEntryRel ids
| otherwise = map (jumpTableEntry dflags) ids
where jumpTableEntryRel Nothing
= CmmStaticLit (CmmInt 0 (wordWidth dflags))
jumpTableEntryRel (Just blockid)
- = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0)
- where blockLabel = mkAsmTempLabel (getUnique blockid)
+ = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0
+ (wordWidth dflags))
+ where blockLabel = blockLbl blockid
in Just (CmmData (Section ReadOnlyData lbl) (Statics lbl jumpTable))
generateJumpTableForInstr _ _ = Nothing
diff --git a/compiler/nativeGen/PPC/Cond.hs b/compiler/nativeGen/PPC/Cond.hs
index 0e4b1fd701..bd8bdee81a 100644
--- a/compiler/nativeGen/PPC/Cond.hs
+++ b/compiler/nativeGen/PPC/Cond.hs
@@ -8,6 +8,8 @@ module PPC.Cond (
where
+import GhcPrelude
+
import Panic
data Cond
diff --git a/compiler/nativeGen/PPC/Instr.hs b/compiler/nativeGen/PPC/Instr.hs
index eb179c5a99..8eb5e8fa8d 100644
--- a/compiler/nativeGen/PPC/Instr.hs
+++ b/compiler/nativeGen/PPC/Instr.hs
@@ -23,6 +23,8 @@ module PPC.Instr (
where
+import GhcPrelude
+
import PPC.Regs
import PPC.Cond
import Instruction
@@ -75,19 +77,19 @@ instance Instruction Instr where
mkStackDeallocInstr = ppc_mkStackDeallocInstr
-ppc_mkStackAllocInstr :: Platform -> Int -> Instr
+ppc_mkStackAllocInstr :: Platform -> Int -> [Instr]
ppc_mkStackAllocInstr platform amount
= ppc_mkStackAllocInstr' platform (-amount)
-ppc_mkStackDeallocInstr :: Platform -> Int -> Instr
+ppc_mkStackDeallocInstr :: Platform -> Int -> [Instr]
ppc_mkStackDeallocInstr platform amount
= ppc_mkStackAllocInstr' platform amount
-ppc_mkStackAllocInstr' :: Platform -> Int -> Instr
+ppc_mkStackAllocInstr' :: Platform -> Int -> [Instr]
ppc_mkStackAllocInstr' platform amount
= case platformArch platform of
- ArchPPC -> UPDATE_SP II32 (ImmInt amount)
- ArchPPC_64 _ -> UPDATE_SP II64 (ImmInt amount)
+ ArchPPC -> [UPDATE_SP II32 (ImmInt amount)]
+ ArchPPC_64 _ -> [UPDATE_SP II64 (ImmInt amount)]
_ -> panic $ "ppc_mkStackAllocInstr' "
++ show (platformArch platform)
@@ -124,7 +126,7 @@ allocMoreStack platform slots (CmmProc info lbl live (ListGraph code)) = do
insert_stack_insns (BasicBlock id insns)
| Just new_blockid <- mapLookup id new_blockmap
- = [ BasicBlock id [alloc, BCC ALWAYS new_blockid]
+ = [ BasicBlock id $ alloc ++ [BCC ALWAYS new_blockid Nothing]
, BasicBlock new_blockid block'
]
| otherwise
@@ -137,11 +139,11 @@ allocMoreStack platform slots (CmmProc info lbl live (ListGraph code)) = do
-- "labeled-goto" we use JMP, and for "computed-goto" we
-- use MTCTR followed by BCTR. See 'PPC.CodeGen.genJump'.
= case insn of
- JMP _ -> dealloc : insn : r
- BCTR [] Nothing -> dealloc : insn : r
+ JMP _ -> dealloc ++ (insn : r)
+ BCTR [] Nothing -> dealloc ++ (insn : r)
BCTR ids label -> BCTR (map (fmap retarget) ids) label : r
- BCCFAR cond b -> BCCFAR cond (retarget b) : r
- BCC cond b -> BCC cond (retarget b) : r
+ BCCFAR cond b p -> BCCFAR cond (retarget b) p : r
+ BCC cond b p -> BCC cond (retarget b) p : r
_ -> insn : r
-- BL and BCTRL are call-like instructions rather than
-- jumps, and are used only for C calls.
@@ -190,10 +192,12 @@ data Instr
-- Loads and stores.
| LD Format Reg AddrMode -- Load format, dst, src
| LDFAR Format Reg AddrMode -- Load format, dst, src 32 bit offset
+ | LDR Format Reg AddrMode -- Load and reserve format, dst, src
| LA Format Reg AddrMode -- Load arithmetic format, dst, src
| ST Format Reg AddrMode -- Store format, src, dst
| STFAR Format Reg AddrMode -- Store format, src, dst 32 bit offset
| STU Format Reg AddrMode -- Store with Update format, src, dst
+ | STC Format Reg AddrMode -- Store conditional format, src, dst
| LIS Reg Imm -- Load Immediate Shifted dst, src
| LI Reg Imm -- Load Immediate dst, src
| MR Reg Reg -- Move Register dst, src -- also for fmr
@@ -201,8 +205,12 @@ data Instr
| CMP Format Reg RI -- format, src1, src2
| CMPL Format Reg RI -- format, src1, src2
- | BCC Cond BlockId
- | BCCFAR Cond BlockId
+ | BCC Cond BlockId (Maybe Bool) -- cond, block, hint
+ | BCCFAR Cond BlockId (Maybe Bool) -- cond, block, hint
+ -- hint:
+ -- Just True: branch likely taken
+ -- Just False: branch likely not taken
+ -- Nothing: no hint
| JMP CLabel -- same as branch,
-- but with CLabel instead of block ID
| MTCTR Reg
@@ -232,6 +240,7 @@ data Instr
| DIV Format Bool Reg Reg Reg
| AND Reg Reg RI -- dst, src1, src2
| ANDC Reg Reg Reg -- AND with complement, dst = src1 & ~ src2
+ | NAND Reg Reg Reg -- dst, src1, src2
| OR Reg Reg RI -- dst, src1, src2
| ORIS Reg Reg Imm -- OR Immediate Shifted dst, src1, src2
| XOR Reg Reg RI -- dst, src1, src2
@@ -272,6 +281,8 @@ data Instr
| MFLR Reg -- move from link register
| FETCHPC Reg -- pseudo-instruction:
-- bcl to next insn, mflr reg
+ | HWSYNC -- heavy weight sync
+ | ISYNC -- instruction synchronize
| LWSYNC -- memory barrier
| NOP -- no operation, PowerPC 64 bit
-- needs this as place holder to
@@ -290,17 +301,19 @@ ppc_regUsageOfInstr platform instr
= case instr of
LD _ reg addr -> usage (regAddr addr, [reg])
LDFAR _ reg addr -> usage (regAddr addr, [reg])
+ LDR _ reg addr -> usage (regAddr addr, [reg])
LA _ reg addr -> usage (regAddr addr, [reg])
ST _ reg addr -> usage (reg : regAddr addr, [])
STFAR _ reg addr -> usage (reg : regAddr addr, [])
STU _ reg addr -> usage (reg : regAddr addr, [])
+ STC _ reg addr -> usage (reg : regAddr addr, [])
LIS reg _ -> usage ([], [reg])
LI reg _ -> usage ([], [reg])
MR reg1 reg2 -> usage ([reg2], [reg1])
CMP _ reg ri -> usage (reg : regRI ri,[])
CMPL _ reg ri -> usage (reg : regRI ri,[])
- BCC _ _ -> noUsage
- BCCFAR _ _ -> noUsage
+ BCC _ _ _ -> noUsage
+ BCCFAR _ _ _ -> noUsage
MTCTR reg -> usage ([reg],[])
BCTR _ _ -> noUsage
BL _ params -> usage (params, callClobberedRegs platform)
@@ -325,6 +338,7 @@ ppc_regUsageOfInstr platform instr
AND reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
ANDC reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
+ NAND reg1 reg2 reg3 -> usage ([reg2,reg3], [reg1])
OR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
ORIS reg1 reg2 _ -> usage ([reg2], [reg1])
XOR reg1 reg2 ri -> usage (reg2 : regRI ri, [reg1])
@@ -380,17 +394,19 @@ ppc_patchRegsOfInstr instr env
= case instr of
LD fmt reg addr -> LD fmt (env reg) (fixAddr addr)
LDFAR fmt reg addr -> LDFAR fmt (env reg) (fixAddr addr)
+ LDR fmt reg addr -> LDR fmt (env reg) (fixAddr addr)
LA fmt reg addr -> LA fmt (env reg) (fixAddr addr)
ST fmt reg addr -> ST fmt (env reg) (fixAddr addr)
STFAR fmt reg addr -> STFAR fmt (env reg) (fixAddr addr)
STU fmt reg addr -> STU fmt (env reg) (fixAddr addr)
+ STC fmt reg addr -> STC fmt (env reg) (fixAddr addr)
LIS reg imm -> LIS (env reg) imm
LI reg imm -> LI (env reg) imm
MR reg1 reg2 -> MR (env reg1) (env reg2)
CMP fmt reg ri -> CMP fmt (env reg) (fixRI ri)
CMPL fmt reg ri -> CMPL fmt (env reg) (fixRI ri)
- BCC cond lbl -> BCC cond lbl
- BCCFAR cond lbl -> BCCFAR cond lbl
+ BCC cond lbl p -> BCC cond lbl p
+ BCCFAR cond lbl p -> BCCFAR cond lbl p
MTCTR reg -> MTCTR (env reg)
BCTR targets lbl -> BCTR targets lbl
BL imm argRegs -> BL imm argRegs -- argument regs
@@ -417,6 +433,7 @@ ppc_patchRegsOfInstr instr env
AND reg1 reg2 ri -> AND (env reg1) (env reg2) (fixRI ri)
ANDC reg1 reg2 reg3 -> ANDC (env reg1) (env reg2) (env reg3)
+ NAND reg1 reg2 reg3 -> NAND (env reg1) (env reg2) (env reg3)
OR reg1 reg2 ri -> OR (env reg1) (env reg2) (fixRI ri)
ORIS reg1 reg2 imm -> ORIS (env reg1) (env reg2) imm
XOR reg1 reg2 ri -> XOR (env reg1) (env reg2) (fixRI ri)
@@ -480,8 +497,8 @@ ppc_isJumpishInstr instr
ppc_jumpDestsOfInstr :: Instr -> [BlockId]
ppc_jumpDestsOfInstr insn
= case insn of
- BCC _ id -> [id]
- BCCFAR _ id -> [id]
+ BCC _ id _ -> [id]
+ BCCFAR _ id _ -> [id]
BCTR targets _ -> [id | Just id <- targets]
_ -> []
@@ -492,8 +509,8 @@ ppc_jumpDestsOfInstr insn
ppc_patchJumpInstr :: Instr -> (BlockId -> BlockId) -> Instr
ppc_patchJumpInstr insn patchF
= case insn of
- BCC cc id -> BCC cc (patchF id)
- BCCFAR cc id -> BCCFAR cc (patchF id)
+ BCC cc id p -> BCC cc (patchF id) p
+ BCCFAR cc id p -> BCCFAR cc (patchF id) p
BCTR ids lbl -> BCTR (map (fmap patchF) ids) lbl
_ -> insn
@@ -631,16 +648,12 @@ ppc_mkRegRegMoveInstr src dst
-- | Make an unconditional jump instruction.
--- For architectures with branch delay slots, its ok to put
--- a NOP after the jump. Don't fill the delay slot with an
--- instruction that references regs or you'll confuse the
--- linear allocator.
ppc_mkJumpInstr
:: BlockId
-> [Instr]
ppc_mkJumpInstr id
- = [BCC ALWAYS id]
+ = [BCC ALWAYS id Nothing]
-- | Take the source and destination from this reg -> reg move instruction
@@ -669,12 +682,12 @@ makeFarBranches info_env blocks
handleBlock addr (BasicBlock id instrs)
= BasicBlock id (zipWith makeFar [addr..] instrs)
- makeFar _ (BCC ALWAYS tgt) = BCC ALWAYS tgt
- makeFar addr (BCC cond tgt)
+ makeFar _ (BCC ALWAYS tgt _) = BCC ALWAYS tgt Nothing
+ makeFar addr (BCC cond tgt p)
| abs (addr - targetAddr) >= nearLimit
- = BCCFAR cond tgt
+ = BCCFAR cond tgt p
| otherwise
- = BCC cond tgt
+ = BCC cond tgt p
where Just targetAddr = lookupUFM blockAddressMap tgt
makeFar _ other = other
diff --git a/compiler/nativeGen/PPC/Ppr.hs b/compiler/nativeGen/PPC/Ppr.hs
index 63d01c3913..2f64d82ee5 100644
--- a/compiler/nativeGen/PPC/Ppr.hs
+++ b/compiler/nativeGen/PPC/Ppr.hs
@@ -9,6 +9,8 @@
{-# OPTIONS_GHC -fno-warn-orphans #-}
module PPC.Ppr (pprNatCmmDecl) where
+import GhcPrelude
+
import PPC.Regs
import PPC.Instr
import PPC.Cond
@@ -23,9 +25,10 @@ import Cmm hiding (topInfoTable)
import Hoopl.Collections
import Hoopl.Label
+import BlockId
import CLabel
-import Unique ( pprUniqueAlways, Uniquable(..) )
+import Unique ( pprUniqueAlways, getUnique )
import Platform
import FastString
import Outputable
@@ -78,19 +81,17 @@ pprNatCmmDecl proc@(CmmProc top_info lbl _ (ListGraph blocks)) =
pprFunctionDescriptor :: CLabel -> SDoc
pprFunctionDescriptor lab = pprGloblDecl lab
- $$ text ".section \".opd\",\"aw\""
- $$ text ".align 3"
+ $$ text "\t.section \".opd\", \"aw\""
+ $$ text "\t.align 3"
$$ ppr lab <> char ':'
- $$ text ".quad ."
- <> ppr lab
- <> text ",.TOC.@tocbase,0"
- $$ text ".previous"
- $$ text ".type "
- <> ppr lab
- <> text ", @function"
- $$ char '.'
- <> ppr lab
- <> char ':'
+ $$ text "\t.quad ."
+ <> ppr lab
+ <> text ",.TOC.@tocbase,0"
+ $$ text "\t.previous"
+ $$ text "\t.type"
+ <+> ppr lab
+ <> text ", @function"
+ $$ char '.' <> ppr lab <> char ':'
pprFunctionPrologue :: CLabel ->SDoc
pprFunctionPrologue lab = pprGloblDecl lab
@@ -108,7 +109,7 @@ pprFunctionPrologue lab = pprGloblDecl lab
pprBasicBlock :: LabelMap CmmStatics -> NatBasicBlock Instr -> SDoc
pprBasicBlock info_env (BasicBlock blockid instrs)
= maybe_infotable $$
- pprLabel (mkAsmTempLabel (getUnique blockid)) $$
+ pprLabel (blockLbl blockid) $$
vcat (map pprInstr instrs)
where
maybe_infotable = case mapLookup blockid info_env of
@@ -310,11 +311,13 @@ pprImm (HIGHESTA i)
pprAddr :: AddrMode -> SDoc
pprAddr (AddrRegReg r1 r2)
- = pprReg r1 <+> text ", " <+> pprReg r2
-
-pprAddr (AddrRegImm r1 (ImmInt i)) = hcat [ int i, char '(', pprReg r1, char ')' ]
-pprAddr (AddrRegImm r1 (ImmInteger i)) = hcat [ integer i, char '(', pprReg r1, char ')' ]
-pprAddr (AddrRegImm r1 imm) = hcat [ pprImm imm, char '(', pprReg r1, char ')' ]
+ = pprReg r1 <> char ',' <+> pprReg r2
+pprAddr (AddrRegImm r1 (ImmInt i))
+ = hcat [ int i, char '(', pprReg r1, char ')' ]
+pprAddr (AddrRegImm r1 (ImmInteger i))
+ = hcat [ integer i, char '(', pprReg r1, char ')' ]
+pprAddr (AddrRegImm r1 imm)
+ = hcat [ pprImm imm, char '(', pprReg r1, char ')' ]
pprSectionAlign :: Section -> SDoc
@@ -450,15 +453,27 @@ pprInstr (LD fmt reg addr) = hcat [
text ", ",
pprAddr addr
]
+
pprInstr (LDFAR fmt reg (AddrRegImm source off)) =
sdocWithPlatform $ \platform -> vcat [
pprInstr (ADDIS (tmpReg platform) source (HA off)),
pprInstr (LD fmt reg (AddrRegImm (tmpReg platform) (LO off)))
]
-
pprInstr (LDFAR _ _ _) =
panic "PPC.Ppr.pprInstr LDFAR: no match"
+pprInstr (LDR fmt reg1 addr) = hcat [
+ text "\tl",
+ case fmt of
+ II32 -> char 'w'
+ II64 -> char 'd'
+ _ -> panic "PPC.Ppr.Instr LDR: no match",
+ text "arx\t",
+ pprReg reg1,
+ text ", ",
+ pprAddr addr
+ ]
+
pprInstr (LA fmt reg addr) = hcat [
char '\t',
text "l",
@@ -508,6 +523,17 @@ pprInstr (STU fmt reg addr) = hcat [
text ", ",
pprAddr addr
]
+pprInstr (STC fmt reg1 addr) = hcat [
+ text "\tst",
+ case fmt of
+ II32 -> char 'w'
+ II64 -> char 'd'
+ _ -> panic "PPC.Ppr.Instr STC: no match",
+ text "cx.\t",
+ pprReg reg1,
+ text ", ",
+ pprAddr addr
+ ]
pprInstr (LIS reg imm) = hcat [
char '\t',
text "lis",
@@ -569,19 +595,25 @@ pprInstr (CMPL fmt reg ri) = hcat [
RIReg _ -> empty
RIImm _ -> char 'i'
]
-pprInstr (BCC cond blockid) = hcat [
+pprInstr (BCC cond blockid prediction) = hcat [
char '\t',
text "b",
pprCond cond,
+ pprPrediction prediction,
char '\t',
ppr lbl
]
- where lbl = mkAsmTempLabel (getUnique blockid)
+ where lbl = mkLocalBlockLabel (getUnique blockid)
+ pprPrediction p = case p of
+ Nothing -> empty
+ Just True -> char '+'
+ Just False -> char '-'
-pprInstr (BCCFAR cond blockid) = vcat [
+pprInstr (BCCFAR cond blockid prediction) = vcat [
hcat [
text "\tb",
pprCond (condNegate cond),
+ neg_prediction,
text "\t$+8"
],
hcat [
@@ -589,7 +621,11 @@ pprInstr (BCCFAR cond blockid) = vcat [
ppr lbl
]
]
- where lbl = mkAsmTempLabel (getUnique blockid)
+ where lbl = mkLocalBlockLabel (getUnique blockid)
+ neg_prediction = case prediction of
+ Nothing -> empty
+ Just True -> char '-'
+ Just False -> char '+'
pprInstr (JMP lbl)
-- We never jump to ForeignLabels; if we ever do, c.f. handling for "BL"
@@ -741,6 +777,7 @@ pprInstr (AND reg1 reg2 (RIImm imm)) = hcat [
]
pprInstr (AND reg1 reg2 ri) = pprLogic (sLit "and") reg1 reg2 ri
pprInstr (ANDC reg1 reg2 reg3) = pprLogic (sLit "andc") reg1 reg2 (RIReg reg3)
+pprInstr (NAND reg1 reg2 reg3) = pprLogic (sLit "nand") reg1 reg2 (RIReg reg3)
pprInstr (OR reg1 reg2 ri) = pprLogic (sLit "or") reg1 reg2 ri
pprInstr (XOR reg1 reg2 ri) = pprLogic (sLit "xor") reg1 reg2 ri
@@ -922,6 +959,10 @@ pprInstr (FETCHPC reg) = vcat [
hcat [ text "1:\tmflr\t", pprReg reg ]
]
+pprInstr HWSYNC = text "\tsync"
+
+pprInstr ISYNC = text "\tisync"
+
pprInstr LWSYNC = text "\tlwsync"
pprInstr NOP = text "\tnop"
diff --git a/compiler/nativeGen/PPC/RegInfo.hs b/compiler/nativeGen/PPC/RegInfo.hs
index c4724d4193..30a07b9440 100644
--- a/compiler/nativeGen/PPC/RegInfo.hs
+++ b/compiler/nativeGen/PPC/RegInfo.hs
@@ -20,6 +20,8 @@ where
#include "nativeGen/NCG.h"
#include "HsVersions.h"
+import GhcPrelude
+
import PPC.Instr
import BlockId
@@ -49,14 +51,14 @@ shortcutStatics fn (Statics lbl statics)
shortcutLabel :: (BlockId -> Maybe JumpDest) -> CLabel -> CLabel
shortcutLabel fn lab
- | Just uq <- maybeAsmTemp lab = shortBlockId fn (mkBlockId uq)
- | otherwise = lab
+ | Just blkId <- maybeLocalBlockLabel lab = shortBlockId fn blkId
+ | otherwise = lab
shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
shortcutStatic fn (CmmStaticLit (CmmLabel lab))
= CmmStaticLit (CmmLabel (shortcutLabel fn lab))
-shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
- = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off)
+shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off w))
+ = CmmStaticLit (CmmLabelDiffOff (shortcutLabel fn lbl1) lbl2 off w)
-- slightly dodgy, we're ignoring the second label, but this
-- works with the way we use CmmLabelDiffOff for jump tables now.
shortcutStatic _ other_static
@@ -69,6 +71,6 @@ shortBlockId
shortBlockId fn blockid =
case fn blockid of
- Nothing -> mkAsmTempLabel uq
+ Nothing -> mkLocalBlockLabel uq
Just (DestBlockId blockid') -> shortBlockId fn blockid'
where uq = getUnique blockid
diff --git a/compiler/nativeGen/PPC/Regs.hs b/compiler/nativeGen/PPC/Regs.hs
index a1befc7837..227517be88 100644
--- a/compiler/nativeGen/PPC/Regs.hs
+++ b/compiler/nativeGen/PPC/Regs.hs
@@ -50,6 +50,8 @@ where
#include "nativeGen/NCG.h"
#include "HsVersions.h"
+import GhcPrelude
+
import Reg
import RegClass
import Format
@@ -70,7 +72,7 @@ import Data.Int ( Int8, Int16, Int32, Int64 )
-- squeese functions for the graph allocator -----------------------------------
-- | regSqueeze_class reg
--- Calculuate the maximum number of register colors that could be
+-- Calculate the maximum number of register colors that could be
-- denied to a node of this class due to having this reg
-- as a neighbour.
--
@@ -163,7 +165,7 @@ litToImm (CmmFloat f W32) = ImmFloat f
litToImm (CmmFloat f W64) = ImmDouble f
litToImm (CmmLabel l) = ImmCLbl l
litToImm (CmmLabelOff l off) = ImmIndex l off
-litToImm (CmmLabelDiffOff l1 l2 off)
+litToImm (CmmLabelDiffOff l1 l2 off _)
= ImmConstantSum
(ImmConstantDiff (ImmCLbl l1) (ImmCLbl l2))
(ImmInt off)