diff options
Diffstat (limited to 'compiler/nativeGen/PPC/CodeGen.hs')
-rw-r--r-- | compiler/nativeGen/PPC/CodeGen.hs | 2453 |
1 files changed, 0 insertions, 2453 deletions
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs deleted file mode 100644 index ad47501981..0000000000 --- a/compiler/nativeGen/PPC/CodeGen.hs +++ /dev/null @@ -1,2453 +0,0 @@ -{-# LANGUAGE CPP, GADTs #-} - ------------------------------------------------------------------------------ --- --- Generating machine code (instruction selection) --- --- (c) The University of Glasgow 1996-2004 --- ------------------------------------------------------------------------------ - --- This is a big module, but, if you pay attention to --- (a) the sectioning, and (b) the type signatures, --- the structure should not be too overwhelming. - -module PPC.CodeGen ( - cmmTopCodeGen, - generateJumpTableForInstr, - InstrBlock -) - -where - -#include "HsVersions.h" - --- NCG stuff: -import GhcPrelude - -import GHC.Platform.Regs -import PPC.Instr -import PPC.Cond -import PPC.Regs -import CPrim -import NCGMonad ( NatM, getNewRegNat, getNewLabelNat - , getBlockIdNat, getPicBaseNat, getNewRegPairNat - , getPicBaseMaybeNat ) -import Instruction -import PIC -import Format -import RegClass -import Reg -import TargetReg -import GHC.Platform - --- Our intermediate code: -import GHC.Cmm.BlockId -import GHC.Cmm.Ppr ( pprExpr ) -import GHC.Cmm -import GHC.Cmm.Utils -import GHC.Cmm.Switch -import GHC.Cmm.CLabel -import GHC.Cmm.Dataflow.Block -import GHC.Cmm.Dataflow.Graph - --- The rest: -import OrdList -import Outputable -import GHC.Driver.Session - -import Control.Monad ( mapAndUnzipM, when ) -import Data.Bits -import Data.Word - -import BasicTypes -import FastString -import Util - --- ----------------------------------------------------------------------------- --- Top-level of the instruction selector - --- | 'InstrBlock's are the insn sequences generated by the insn selectors. --- They are really trees of insns to facilitate fast appending, where a --- left-to-right traversal (pre-order?) yields the insns in the correct --- order. - -cmmTopCodeGen - :: RawCmmDecl - -> NatM [NatCmmDecl RawCmmStatics Instr] - -cmmTopCodeGen (CmmProc info lab live graph) = do - let blocks = toBlockListEntryFirst graph - (nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks - dflags <- getDynFlags - let proc = CmmProc info lab live (ListGraph $ concat nat_blocks) - tops = proc : concat statics - os = platformOS $ targetPlatform dflags - arch = platformArch $ targetPlatform dflags - case arch of - ArchPPC | os == OSAIX -> return tops - | otherwise -> do - picBaseMb <- getPicBaseMaybeNat - case picBaseMb of - Just picBase -> initializePicBase_ppc arch os picBase tops - Nothing -> return tops - ArchPPC_64 ELF_V1 -> fixup_entry tops - -- generating function descriptor is handled in - -- pretty printer - ArchPPC_64 ELF_V2 -> fixup_entry tops - -- generating function prologue is handled in - -- pretty printer - _ -> panic "PPC.cmmTopCodeGen: unknown arch" - where - fixup_entry (CmmProc info lab live (ListGraph (entry:blocks)) : statics) - = do - let BasicBlock bID insns = entry - bID' <- if lab == (blockLbl bID) - then newBlockId - else return bID - let b' = BasicBlock bID' insns - return (CmmProc info lab live (ListGraph (b':blocks)) : statics) - fixup_entry _ = panic "cmmTopCodegen: Broken CmmProc" - -cmmTopCodeGen (CmmData sec dat) = do - return [CmmData sec dat] -- no translation, we just use CmmStatic - -basicBlockCodeGen - :: Block CmmNode C C - -> NatM ( [NatBasicBlock Instr] - , [NatCmmDecl RawCmmStatics Instr]) - -basicBlockCodeGen block = do - let (_, nodes, tail) = blockSplit block - id = entryLabel block - stmts = blockToList nodes - mid_instrs <- stmtsToInstrs stmts - tail_instrs <- stmtToInstrs tail - let instrs = mid_instrs `appOL` tail_instrs - -- code generation may introduce new basic block boundaries, which - -- are indicated by the NEWBLOCK instruction. We must split up the - -- instruction stream into basic blocks again. Also, we extract - -- LDATAs here too. - let - (top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs - - mkBlocks (NEWBLOCK id) (instrs,blocks,statics) - = ([], BasicBlock id instrs : blocks, statics) - mkBlocks (LDATA sec dat) (instrs,blocks,statics) - = (instrs, blocks, CmmData sec dat:statics) - mkBlocks instr (instrs,blocks,statics) - = (instr:instrs, blocks, statics) - return (BasicBlock id top : other_blocks, statics) - -stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock -stmtsToInstrs stmts - = do instrss <- mapM stmtToInstrs stmts - return (concatOL instrss) - -stmtToInstrs :: CmmNode e x -> NatM InstrBlock -stmtToInstrs stmt = do - dflags <- getDynFlags - case stmt of - CmmComment s -> return (unitOL (COMMENT s)) - CmmTick {} -> return nilOL - CmmUnwind {} -> return nilOL - - CmmAssign reg src - | isFloatType ty -> assignReg_FltCode format reg src - | target32Bit (targetPlatform dflags) && - isWord64 ty -> assignReg_I64Code reg src - | otherwise -> assignReg_IntCode format reg src - where ty = cmmRegType dflags reg - format = cmmTypeFormat ty - - CmmStore addr src - | isFloatType ty -> assignMem_FltCode format addr src - | target32Bit (targetPlatform dflags) && - isWord64 ty -> assignMem_I64Code addr src - | otherwise -> assignMem_IntCode format addr src - where ty = cmmExprType dflags src - format = cmmTypeFormat ty - - CmmUnsafeForeignCall target result_regs args - -> genCCall target result_regs args - - CmmBranch id -> genBranch id - CmmCondBranch arg true false prediction -> do - b1 <- genCondJump true arg prediction - b2 <- genBranch false - return (b1 `appOL` b2) - CmmSwitch arg ids -> do dflags <- getDynFlags - genSwitch dflags arg ids - CmmCall { cml_target = arg - , cml_args_regs = gregs } -> do - dflags <- getDynFlags - genJump arg (jumpRegs dflags gregs) - _ -> - panic "stmtToInstrs: statement should have been cps'd away" - -jumpRegs :: DynFlags -> [GlobalReg] -> [Reg] -jumpRegs dflags gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ] - where platform = targetPlatform dflags - --------------------------------------------------------------------------------- --- | 'InstrBlock's are the insn sequences generated by the insn selectors. --- They are really trees of insns to facilitate fast appending, where a --- left-to-right traversal yields the insns in the correct order. --- -type InstrBlock - = OrdList Instr - - --- | Register's passed up the tree. If the stix code forces the register --- to live in a pre-decided machine register, it comes out as @Fixed@; --- otherwise, it comes out as @Any@, and the parent can decide which --- register to put it in. --- -data Register - = Fixed Format Reg InstrBlock - | Any Format (Reg -> InstrBlock) - - -swizzleRegisterRep :: Register -> Format -> Register -swizzleRegisterRep (Fixed _ reg code) format = Fixed format reg code -swizzleRegisterRep (Any _ codefn) format = Any format codefn - - --- | Grab the Reg for a CmmReg -getRegisterReg :: Platform -> CmmReg -> Reg - -getRegisterReg _ (CmmLocal (LocalReg u pk)) - = RegVirtual $ mkVirtualReg u (cmmTypeFormat pk) - -getRegisterReg platform (CmmGlobal mid) - = case globalRegMaybe platform mid of - Just reg -> RegReal reg - Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid) - -- By this stage, the only MagicIds remaining should be the - -- ones which map to a real machine register on this - -- platform. Hence ... - --- | Convert a BlockId to some CmmStatic data -jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic -jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags)) -jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel) - where blockLabel = blockLbl blockid - - - --- ----------------------------------------------------------------------------- --- General things for putting together code sequences - --- Expand CmmRegOff. ToDo: should we do it this way around, or convert --- CmmExprs into CmmRegOff? -mangleIndexTree :: DynFlags -> CmmExpr -> CmmExpr -mangleIndexTree dflags (CmmRegOff reg off) - = CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)] - where width = typeWidth (cmmRegType dflags reg) - -mangleIndexTree _ _ - = panic "PPC.CodeGen.mangleIndexTree: no match" - --- ----------------------------------------------------------------------------- --- Code gen for 64-bit arithmetic on 32-bit platforms - -{- -Simple support for generating 64-bit code (ie, 64 bit values and 64 -bit assignments) on 32-bit platforms. Unlike the main code generator -we merely shoot for generating working code as simply as possible, and -pay little attention to code quality. Specifically, there is no -attempt to deal cleverly with the fixed-vs-floating register -distinction; all values are generated into (pairs of) floating -registers, even if this would mean some redundant reg-reg moves as a -result. Only one of the VRegUniques is returned, since it will be -of the VRegUniqueLo form, and the upper-half VReg can be determined -by applying getHiVRegFromLo to it. --} - -data ChildCode64 -- a.k.a "Register64" - = ChildCode64 - InstrBlock -- code - Reg -- the lower 32-bit temporary which contains the - -- result; use getHiVRegFromLo to find the other - -- VRegUnique. Rules of this simplified insn - -- selection game are therefore that the returned - -- Reg may be modified - - --- | Compute an expression into a register, but --- we don't mind which one it is. -getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock) -getSomeReg expr = do - r <- getRegister expr - case r of - Any rep code -> do - tmp <- getNewRegNat rep - return (tmp, code tmp) - Fixed _ reg code -> - return (reg, code) - -getI64Amodes :: CmmExpr -> NatM (AddrMode, AddrMode, InstrBlock) -getI64Amodes addrTree = do - Amode hi_addr addr_code <- getAmode D addrTree - case addrOffset hi_addr 4 of - Just lo_addr -> return (hi_addr, lo_addr, addr_code) - Nothing -> do (hi_ptr, code) <- getSomeReg addrTree - return (AddrRegImm hi_ptr (ImmInt 0), - AddrRegImm hi_ptr (ImmInt 4), - code) - - -assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock -assignMem_I64Code addrTree valueTree = do - (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree - ChildCode64 vcode rlo <- iselExpr64 valueTree - let - rhi = getHiVRegFromLo rlo - - -- Big-endian store - mov_hi = ST II32 rhi hi_addr - mov_lo = ST II32 rlo lo_addr - return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi) - - -assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock -assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do - ChildCode64 vcode r_src_lo <- iselExpr64 valueTree - let - r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32 - r_dst_hi = getHiVRegFromLo r_dst_lo - r_src_hi = getHiVRegFromLo r_src_lo - mov_lo = MR r_dst_lo r_src_lo - mov_hi = MR r_dst_hi r_src_hi - return ( - vcode `snocOL` mov_lo `snocOL` mov_hi - ) - -assignReg_I64Code _ _ - = panic "assignReg_I64Code(powerpc): invalid lvalue" - - -iselExpr64 :: CmmExpr -> NatM ChildCode64 -iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do - (hi_addr, lo_addr, addr_code) <- getI64Amodes addrTree - (rlo, rhi) <- getNewRegPairNat II32 - let mov_hi = LD II32 rhi hi_addr - mov_lo = LD II32 rlo lo_addr - return $ ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi) - rlo - -iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty - = return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32)) - -iselExpr64 (CmmLit (CmmInt i _)) = do - (rlo,rhi) <- getNewRegPairNat II32 - let - half0 = fromIntegral (fromIntegral i :: Word16) - half1 = fromIntegral (fromIntegral (i `shiftR` 16) :: Word16) - half2 = fromIntegral (fromIntegral (i `shiftR` 32) :: Word16) - half3 = fromIntegral (fromIntegral (i `shiftR` 48) :: Word16) - - code = toOL [ - LIS rlo (ImmInt half1), - OR rlo rlo (RIImm $ ImmInt half0), - LIS rhi (ImmInt half3), - OR rhi rhi (RIImm $ ImmInt half2) - ] - return (ChildCode64 code rlo) - -iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do - ChildCode64 code1 r1lo <- iselExpr64 e1 - ChildCode64 code2 r2lo <- iselExpr64 e2 - (rlo,rhi) <- getNewRegPairNat II32 - let - r1hi = getHiVRegFromLo r1lo - r2hi = getHiVRegFromLo r2lo - code = code1 `appOL` - code2 `appOL` - toOL [ ADDC rlo r1lo r2lo, - ADDE rhi r1hi r2hi ] - return (ChildCode64 code rlo) - -iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do - ChildCode64 code1 r1lo <- iselExpr64 e1 - ChildCode64 code2 r2lo <- iselExpr64 e2 - (rlo,rhi) <- getNewRegPairNat II32 - let - r1hi = getHiVRegFromLo r1lo - r2hi = getHiVRegFromLo r2lo - code = code1 `appOL` - code2 `appOL` - toOL [ SUBFC rlo r2lo (RIReg r1lo), - SUBFE rhi r2hi r1hi ] - return (ChildCode64 code rlo) - -iselExpr64 (CmmMachOp (MO_UU_Conv W32 W64) [expr]) = do - (expr_reg,expr_code) <- getSomeReg expr - (rlo, rhi) <- getNewRegPairNat II32 - let mov_hi = LI rhi (ImmInt 0) - mov_lo = MR rlo expr_reg - return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi) - rlo - -iselExpr64 (CmmMachOp (MO_SS_Conv W32 W64) [expr]) = do - (expr_reg,expr_code) <- getSomeReg expr - (rlo, rhi) <- getNewRegPairNat II32 - let mov_hi = SRA II32 rhi expr_reg (RIImm (ImmInt 31)) - mov_lo = MR rlo expr_reg - return $ ChildCode64 (expr_code `snocOL` mov_lo `snocOL` mov_hi) - rlo -iselExpr64 expr - = pprPanic "iselExpr64(powerpc)" (pprExpr expr) - - - -getRegister :: CmmExpr -> NatM Register -getRegister e = do dflags <- getDynFlags - getRegister' dflags e - -getRegister' :: DynFlags -> CmmExpr -> NatM Register - -getRegister' dflags (CmmReg (CmmGlobal PicBaseReg)) - | OSAIX <- platformOS (targetPlatform dflags) = do - let code dst = toOL [ LD II32 dst tocAddr ] - tocAddr = AddrRegImm toc (ImmLit (text "ghc_toc_table[TC]")) - return (Any II32 code) - | target32Bit (targetPlatform dflags) = do - reg <- getPicBaseNat $ archWordFormat (target32Bit (targetPlatform dflags)) - return (Fixed (archWordFormat (target32Bit (targetPlatform dflags))) - reg nilOL) - | otherwise = return (Fixed II64 toc nilOL) - -getRegister' dflags (CmmReg reg) - = return (Fixed (cmmTypeFormat (cmmRegType dflags reg)) - (getRegisterReg (targetPlatform dflags) reg) nilOL) - -getRegister' dflags tree@(CmmRegOff _ _) - = getRegister' dflags (mangleIndexTree dflags tree) - - -- for 32-bit architectures, support some 64 -> 32 bit conversions: - -- TO_W_(x), TO_W_(x >> 32) - -getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32) - [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) - | target32Bit (targetPlatform dflags) = do - ChildCode64 code rlo <- iselExpr64 x - return $ Fixed II32 (getHiVRegFromLo rlo) code - -getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32) - [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) - | target32Bit (targetPlatform dflags) = do - ChildCode64 code rlo <- iselExpr64 x - return $ Fixed II32 (getHiVRegFromLo rlo) code - -getRegister' dflags (CmmMachOp (MO_UU_Conv W64 W32) [x]) - | target32Bit (targetPlatform dflags) = do - ChildCode64 code rlo <- iselExpr64 x - return $ Fixed II32 rlo code - -getRegister' dflags (CmmMachOp (MO_SS_Conv W64 W32) [x]) - | target32Bit (targetPlatform dflags) = do - ChildCode64 code rlo <- iselExpr64 x - return $ Fixed II32 rlo code - -getRegister' dflags (CmmLoad mem pk) - | not (isWord64 pk) = do - let platform = targetPlatform dflags - Amode addr addr_code <- getAmode D mem - let code dst = ASSERT((targetClassOfReg platform dst == RcDouble) == isFloatType pk) - addr_code `snocOL` LD format dst addr - return (Any format code) - | not (target32Bit (targetPlatform dflags)) = do - Amode addr addr_code <- getAmode DS mem - let code dst = addr_code `snocOL` LD II64 dst addr - return (Any II64 code) - - where format = cmmTypeFormat pk - --- catch simple cases of zero- or sign-extended load -getRegister' _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad mem _]) = do - Amode addr addr_code <- getAmode D mem - return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr)) - -getRegister' _ (CmmMachOp (MO_XX_Conv W8 W32) [CmmLoad mem _]) = do - Amode addr addr_code <- getAmode D mem - return (Any II32 (\dst -> addr_code `snocOL` LD II8 dst addr)) - -getRegister' _ (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad mem _]) = do - Amode addr addr_code <- getAmode D mem - return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr)) - -getRegister' _ (CmmMachOp (MO_XX_Conv W8 W64) [CmmLoad mem _]) = do - Amode addr addr_code <- getAmode D mem - return (Any II64 (\dst -> addr_code `snocOL` LD II8 dst addr)) - --- Note: there is no Load Byte Arithmetic instruction, so no signed case here - -getRegister' _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad mem _]) = do - Amode addr addr_code <- getAmode D mem - return (Any II32 (\dst -> addr_code `snocOL` LD II16 dst addr)) - -getRegister' _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad mem _]) = do - Amode addr addr_code <- getAmode D mem - return (Any II32 (\dst -> addr_code `snocOL` LA II16 dst addr)) - -getRegister' _ (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad mem _]) = do - Amode addr addr_code <- getAmode D mem - return (Any II64 (\dst -> addr_code `snocOL` LD II16 dst addr)) - -getRegister' _ (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad mem _]) = do - Amode addr addr_code <- getAmode D mem - return (Any II64 (\dst -> addr_code `snocOL` LA II16 dst addr)) - -getRegister' _ (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad mem _]) = do - Amode addr addr_code <- getAmode D mem - return (Any II64 (\dst -> addr_code `snocOL` LD II32 dst addr)) - -getRegister' _ (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad mem _]) = do - -- lwa is DS-form. See Note [Power instruction format] - Amode addr addr_code <- getAmode DS mem - return (Any II64 (\dst -> addr_code `snocOL` LA II32 dst addr)) - -getRegister' dflags (CmmMachOp mop [x]) -- unary MachOps - = case mop of - MO_Not rep -> triv_ucode_int rep NOT - - MO_F_Neg w -> triv_ucode_float w FNEG - MO_S_Neg w -> triv_ucode_int w NEG - - MO_FF_Conv W64 W32 -> trivialUCode FF32 FRSP x - MO_FF_Conv W32 W64 -> conversionNop FF64 x - - MO_FS_Conv from to -> coerceFP2Int from to x - MO_SF_Conv from to -> coerceInt2FP from to x - - MO_SS_Conv from to - | from >= to -> conversionNop (intFormat to) x - | otherwise -> triv_ucode_int to (EXTS (intFormat from)) - - MO_UU_Conv from to - | from >= to -> conversionNop (intFormat to) x - | otherwise -> clearLeft from to - - MO_XX_Conv _ to -> conversionNop (intFormat to) x - - _ -> panic "PPC.CodeGen.getRegister: no match" - - where - triv_ucode_int width instr = trivialUCode (intFormat width) instr x - triv_ucode_float width instr = trivialUCode (floatFormat width) instr x - - conversionNop new_format expr - = do e_code <- getRegister' dflags expr - return (swizzleRegisterRep e_code new_format) - - clearLeft from to - = do (src1, code1) <- getSomeReg x - let arch_fmt = intFormat (wordWidth dflags) - arch_bits = widthInBits (wordWidth dflags) - size = widthInBits from - code dst = code1 `snocOL` - CLRLI arch_fmt dst src1 (arch_bits - size) - return (Any (intFormat to) code) - -getRegister' _ (CmmMachOp mop [x, y]) -- dyadic PrimOps - = case mop of - MO_F_Eq _ -> condFltReg EQQ x y - MO_F_Ne _ -> condFltReg NE x y - MO_F_Gt _ -> condFltReg GTT x y - MO_F_Ge _ -> condFltReg GE x y - MO_F_Lt _ -> condFltReg LTT x y - MO_F_Le _ -> condFltReg LE x y - - MO_Eq rep -> condIntReg EQQ rep x y - MO_Ne rep -> condIntReg NE rep x y - - MO_S_Gt rep -> condIntReg GTT rep x y - MO_S_Ge rep -> condIntReg GE rep x y - MO_S_Lt rep -> condIntReg LTT rep x y - MO_S_Le rep -> condIntReg LE rep x y - - MO_U_Gt rep -> condIntReg GU rep x y - MO_U_Ge rep -> condIntReg GEU rep x y - MO_U_Lt rep -> condIntReg LU rep x y - MO_U_Le rep -> condIntReg LEU rep x y - - MO_F_Add w -> triv_float w FADD - MO_F_Sub w -> triv_float w FSUB - MO_F_Mul w -> triv_float w FMUL - MO_F_Quot w -> triv_float w FDIV - - -- optimize addition with 32-bit immediate - -- (needed for PIC) - MO_Add W32 -> - case y of - CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate W32 True imm - -> trivialCode W32 True ADD x (CmmLit $ CmmInt imm immrep) - CmmLit lit - -> do - (src, srcCode) <- getSomeReg x - let imm = litToImm lit - code dst = srcCode `appOL` toOL [ - ADDIS dst src (HA imm), - ADD dst dst (RIImm (LO imm)) - ] - return (Any II32 code) - _ -> trivialCode W32 True ADD x y - - MO_Add rep -> trivialCode rep True ADD x y - MO_Sub rep -> - case y of - CmmLit (CmmInt imm immrep) | Just _ <- makeImmediate rep True (-imm) - -> trivialCode rep True ADD x (CmmLit $ CmmInt (-imm) immrep) - _ -> case x of - CmmLit (CmmInt imm _) - | Just _ <- makeImmediate rep True imm - -- subfi ('subtract from' with immediate) doesn't exist - -> trivialCode rep True SUBFC y x - _ -> trivialCodeNoImm' (intFormat rep) SUBF y x - - MO_Mul rep -> shiftMulCode rep True MULL x y - MO_S_MulMayOflo rep -> do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - let - format = intFormat rep - code dst = code1 `appOL` code2 - `appOL` toOL [ MULLO format dst src1 src2 - , MFOV format dst - ] - return (Any format code) - - MO_S_Quot rep -> divCode rep True x y - MO_U_Quot rep -> divCode rep False x y - - MO_S_Rem rep -> remainder rep True x y - MO_U_Rem rep -> remainder rep False x y - - MO_And rep -> case y of - (CmmLit (CmmInt imm _)) | imm == -8 || imm == -4 - -> do - (src, srcCode) <- getSomeReg x - let clear_mask = if imm == -4 then 2 else 3 - fmt = intFormat rep - code dst = srcCode - `appOL` unitOL (CLRRI fmt dst src clear_mask) - return (Any fmt code) - _ -> trivialCode rep False AND x y - MO_Or rep -> trivialCode rep False OR x y - MO_Xor rep -> trivialCode rep False XOR x y - - MO_Shl rep -> shiftMulCode rep False SL x y - MO_S_Shr rep -> srCode rep True SRA x y - MO_U_Shr rep -> srCode rep False SR x y - _ -> panic "PPC.CodeGen.getRegister: no match" - - where - triv_float :: Width -> (Format -> Reg -> Reg -> Reg -> Instr) -> NatM Register - triv_float width instr = trivialCodeNoImm (floatFormat width) instr x y - - remainder :: Width -> Bool -> CmmExpr -> CmmExpr -> NatM Register - remainder rep sgn x y = do - let fmt = intFormat rep - tmp <- getNewRegNat fmt - code <- remainderCode rep sgn tmp x y - return (Any fmt code) - - -getRegister' _ (CmmLit (CmmInt i rep)) - | Just imm <- makeImmediate rep True i - = let - code dst = unitOL (LI dst imm) - in - return (Any (intFormat rep) code) - -getRegister' _ (CmmLit (CmmFloat f frep)) = do - lbl <- getNewLabelNat - dflags <- getDynFlags - dynRef <- cmmMakeDynamicReference dflags DataReference lbl - Amode addr addr_code <- getAmode D dynRef - let format = floatFormat frep - code dst = - LDATA (Section ReadOnlyData lbl) - (RawCmmStatics lbl [CmmStaticLit (CmmFloat f frep)]) - `consOL` (addr_code `snocOL` LD format dst addr) - return (Any format code) - -getRegister' dflags (CmmLit lit) - | target32Bit (targetPlatform dflags) - = let rep = cmmLitType dflags lit - imm = litToImm lit - code dst = toOL [ - LIS dst (HA imm), - ADD dst dst (RIImm (LO imm)) - ] - in return (Any (cmmTypeFormat rep) code) - | otherwise - = do lbl <- getNewLabelNat - dflags <- getDynFlags - dynRef <- cmmMakeDynamicReference dflags DataReference lbl - Amode addr addr_code <- getAmode D dynRef - let rep = cmmLitType dflags lit - format = cmmTypeFormat rep - code dst = - LDATA (Section ReadOnlyData lbl) (RawCmmStatics lbl [CmmStaticLit lit]) - `consOL` (addr_code `snocOL` LD format dst addr) - return (Any format code) - -getRegister' _ other = pprPanic "getRegister(ppc)" (pprExpr other) - - -- extend?Rep: wrap integer expression of type `from` - -- in a conversion to `to` -extendSExpr :: Width -> Width -> CmmExpr -> CmmExpr -extendSExpr from to x = CmmMachOp (MO_SS_Conv from to) [x] - -extendUExpr :: Width -> Width -> CmmExpr -> CmmExpr -extendUExpr from to x = CmmMachOp (MO_UU_Conv from to) [x] - --- ----------------------------------------------------------------------------- --- The 'Amode' type: Memory addressing modes passed up the tree. - -data Amode - = Amode AddrMode InstrBlock - -{- -Now, given a tree (the argument to a CmmLoad) that references memory, -produce a suitable addressing mode. - -A Rule of the Game (tm) for Amodes: use of the addr bit must -immediately follow use of the code part, since the code part puts -values in registers which the addr then refers to. So you can't put -anything in between, lest it overwrite some of those registers. If -you need to do some other computation between the code part and use of -the addr bit, first store the effective address from the amode in a -temporary, then do the other computation, and then use the temporary: - - code - LEA amode, tmp - ... other computation ... - ... (tmp) ... --} - -{- Note [Power instruction format] -In some instructions the 16 bit offset must be a multiple of 4, i.e. -the two least significant bits must be zero. The "Power ISA" specification -calls these instruction formats "DS-FORM" and the instructions with -arbitrary 16 bit offsets are "D-FORM". - -The Power ISA specification document can be obtained from www.power.org. --} -data InstrForm = D | DS - -getAmode :: InstrForm -> CmmExpr -> NatM Amode -getAmode inf tree@(CmmRegOff _ _) - = do dflags <- getDynFlags - getAmode inf (mangleIndexTree dflags tree) - -getAmode _ (CmmMachOp (MO_Sub W32) [x, CmmLit (CmmInt i _)]) - | Just off <- makeImmediate W32 True (-i) - = do - (reg, code) <- getSomeReg x - return (Amode (AddrRegImm reg off) code) - - -getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit (CmmInt i _)]) - | Just off <- makeImmediate W32 True i - = do - (reg, code) <- getSomeReg x - return (Amode (AddrRegImm reg off) code) - -getAmode D (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)]) - | Just off <- makeImmediate W64 True (-i) - = do - (reg, code) <- getSomeReg x - return (Amode (AddrRegImm reg off) code) - - -getAmode D (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)]) - | Just off <- makeImmediate W64 True i - = do - (reg, code) <- getSomeReg x - return (Amode (AddrRegImm reg off) code) - -getAmode DS (CmmMachOp (MO_Sub W64) [x, CmmLit (CmmInt i _)]) - | Just off <- makeImmediate W64 True (-i) - = do - (reg, code) <- getSomeReg x - (reg', off', code') <- - if i `mod` 4 == 0 - then do return (reg, off, code) - else do - tmp <- getNewRegNat II64 - return (tmp, ImmInt 0, - code `snocOL` ADD tmp reg (RIImm off)) - return (Amode (AddrRegImm reg' off') code') - -getAmode DS (CmmMachOp (MO_Add W64) [x, CmmLit (CmmInt i _)]) - | Just off <- makeImmediate W64 True i - = do - (reg, code) <- getSomeReg x - (reg', off', code') <- - if i `mod` 4 == 0 - then do return (reg, off, code) - else do - tmp <- getNewRegNat II64 - return (tmp, ImmInt 0, - code `snocOL` ADD tmp reg (RIImm off)) - return (Amode (AddrRegImm reg' off') code') - - -- optimize addition with 32-bit immediate - -- (needed for PIC) -getAmode _ (CmmMachOp (MO_Add W32) [x, CmmLit lit]) - = do - dflags <- getDynFlags - (src, srcCode) <- getSomeReg x - let imm = litToImm lit - case () of - _ | OSAIX <- platformOS (targetPlatform dflags) - , isCmmLabelType lit -> - -- HA16/LO16 relocations on labels not supported on AIX - return (Amode (AddrRegImm src imm) srcCode) - | otherwise -> do - tmp <- getNewRegNat II32 - let code = srcCode `snocOL` ADDIS tmp src (HA imm) - return (Amode (AddrRegImm tmp (LO imm)) code) - where - isCmmLabelType (CmmLabel {}) = True - isCmmLabelType (CmmLabelOff {}) = True - isCmmLabelType (CmmLabelDiffOff {}) = True - isCmmLabelType _ = False - -getAmode _ (CmmLit lit) - = do - dflags <- getDynFlags - case platformArch $ targetPlatform dflags of - ArchPPC -> do - tmp <- getNewRegNat II32 - let imm = litToImm lit - code = unitOL (LIS tmp (HA imm)) - return (Amode (AddrRegImm tmp (LO imm)) code) - _ -> do -- TODO: Load from TOC, - -- see getRegister' _ (CmmLit lit) - tmp <- getNewRegNat II64 - let imm = litToImm lit - code = toOL [ - LIS tmp (HIGHESTA imm), - OR tmp tmp (RIImm (HIGHERA imm)), - SL II64 tmp tmp (RIImm (ImmInt 32)), - ORIS tmp tmp (HA imm) - ] - return (Amode (AddrRegImm tmp (LO imm)) code) - -getAmode _ (CmmMachOp (MO_Add W32) [x, y]) - = do - (regX, codeX) <- getSomeReg x - (regY, codeY) <- getSomeReg y - return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY)) - -getAmode _ (CmmMachOp (MO_Add W64) [x, y]) - = do - (regX, codeX) <- getSomeReg x - (regY, codeY) <- getSomeReg y - return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY)) - -getAmode _ other - = do - (reg, code) <- getSomeReg other - let - off = ImmInt 0 - return (Amode (AddrRegImm reg off) code) - - --- The 'CondCode' type: Condition codes passed up the tree. -data CondCode - = CondCode Bool Cond InstrBlock - --- Set up a condition code for a conditional branch. - -getCondCode :: CmmExpr -> NatM CondCode - --- almost the same as everywhere else - but we need to --- extend small integers to 32 bit or 64 bit first - -getCondCode (CmmMachOp mop [x, y]) - = do - case mop of - MO_F_Eq W32 -> condFltCode EQQ x y - MO_F_Ne W32 -> condFltCode NE x y - MO_F_Gt W32 -> condFltCode GTT x y - MO_F_Ge W32 -> condFltCode GE x y - MO_F_Lt W32 -> condFltCode LTT x y - MO_F_Le W32 -> condFltCode LE x y - - MO_F_Eq W64 -> condFltCode EQQ x y - MO_F_Ne W64 -> condFltCode NE x y - MO_F_Gt W64 -> condFltCode GTT x y - MO_F_Ge W64 -> condFltCode GE x y - MO_F_Lt W64 -> condFltCode LTT x y - MO_F_Le W64 -> condFltCode LE x y - - MO_Eq rep -> condIntCode EQQ rep x y - MO_Ne rep -> condIntCode NE rep x y - - MO_S_Gt rep -> condIntCode GTT rep x y - MO_S_Ge rep -> condIntCode GE rep x y - MO_S_Lt rep -> condIntCode LTT rep x y - MO_S_Le rep -> condIntCode LE rep x y - - MO_U_Gt rep -> condIntCode GU rep x y - MO_U_Ge rep -> condIntCode GEU rep x y - MO_U_Lt rep -> condIntCode LU rep x y - MO_U_Le rep -> condIntCode LEU rep x y - - _ -> pprPanic "getCondCode(powerpc)" (pprMachOp mop) - -getCondCode _ = panic "getCondCode(2)(powerpc)" - - --- @cond(Int|Flt)Code@: Turn a boolean expression into a condition, to be --- passed back up the tree. - -condIntCode :: Cond -> Width -> CmmExpr -> CmmExpr -> NatM CondCode -condIntCode cond width x y = do - dflags <- getDynFlags - condIntCode' (target32Bit (targetPlatform dflags)) cond width x y - -condIntCode' :: Bool -> Cond -> Width -> CmmExpr -> CmmExpr -> NatM CondCode - --- simple code for 64-bit on 32-bit platforms -condIntCode' True cond W64 x y - | condUnsigned cond - = do - ChildCode64 code_x x_lo <- iselExpr64 x - ChildCode64 code_y y_lo <- iselExpr64 y - let x_hi = getHiVRegFromLo x_lo - y_hi = getHiVRegFromLo y_lo - end_lbl <- getBlockIdNat - let code = code_x `appOL` code_y `appOL` toOL - [ CMPL II32 x_hi (RIReg y_hi) - , BCC NE end_lbl Nothing - , CMPL II32 x_lo (RIReg y_lo) - , BCC ALWAYS end_lbl Nothing - - , NEWBLOCK end_lbl - ] - return (CondCode False cond code) - | otherwise - = do - ChildCode64 code_x x_lo <- iselExpr64 x - ChildCode64 code_y y_lo <- iselExpr64 y - let x_hi = getHiVRegFromLo x_lo - y_hi = getHiVRegFromLo y_lo - end_lbl <- getBlockIdNat - cmp_lo <- getBlockIdNat - let code = code_x `appOL` code_y `appOL` toOL - [ CMP II32 x_hi (RIReg y_hi) - , BCC NE end_lbl Nothing - , CMP II32 x_hi (RIImm (ImmInt 0)) - , BCC LE cmp_lo Nothing - , CMPL II32 x_lo (RIReg y_lo) - , BCC ALWAYS end_lbl Nothing - , NEWBLOCK cmp_lo - , CMPL II32 y_lo (RIReg x_lo) - , BCC ALWAYS end_lbl Nothing - - , NEWBLOCK end_lbl - ] - return (CondCode False cond code) - --- optimize pointer tag checks. Operation andi. sets condition register --- so cmpi ..., 0 is redundant. -condIntCode' _ cond _ (CmmMachOp (MO_And _) [x, CmmLit (CmmInt imm rep)]) - (CmmLit (CmmInt 0 _)) - | not $ condUnsigned cond, - Just src2 <- makeImmediate rep False imm - = do - (src1, code) <- getSomeReg x - let code' = code `snocOL` AND r0 src1 (RIImm src2) - return (CondCode False cond code') - -condIntCode' _ cond width x (CmmLit (CmmInt y rep)) - | Just src2 <- makeImmediate rep (not $ condUnsigned cond) y - = do - let op_len = max W32 width - let extend = extendSExpr width op_len - (src1, code) <- getSomeReg (extend x) - let format = intFormat op_len - code' = code `snocOL` - (if condUnsigned cond then CMPL else CMP) format src1 (RIImm src2) - return (CondCode False cond code') - -condIntCode' _ cond width x y = do - let op_len = max W32 width - let extend = if condUnsigned cond then extendUExpr width op_len - else extendSExpr width op_len - (src1, code1) <- getSomeReg (extend x) - (src2, code2) <- getSomeReg (extend y) - let format = intFormat op_len - code' = code1 `appOL` code2 `snocOL` - (if condUnsigned cond then CMPL else CMP) format src1 (RIReg src2) - return (CondCode False cond code') - -condFltCode :: Cond -> CmmExpr -> CmmExpr -> NatM CondCode -condFltCode cond x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - let - code' = code1 `appOL` code2 `snocOL` FCMP src1 src2 - code'' = case cond of -- twiddle CR to handle unordered case - GE -> code' `snocOL` CRNOR ltbit eqbit gtbit - LE -> code' `snocOL` CRNOR gtbit eqbit ltbit - _ -> code' - where - ltbit = 0 ; eqbit = 2 ; gtbit = 1 - return (CondCode True cond code'') - - - --- ----------------------------------------------------------------------------- --- Generating assignments - --- Assignments are really at the heart of the whole code generation --- business. Almost all top-level nodes of any real importance are --- assignments, which correspond to loads, stores, or register --- transfers. If we're really lucky, some of the register transfers --- will go away, because we can use the destination register to --- complete the code generation for the right hand side. This only --- fails when the right hand side is forced into a fixed register --- (e.g. the result of a call). - -assignMem_IntCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock -assignReg_IntCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock - -assignMem_FltCode :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock -assignReg_FltCode :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock - -assignMem_IntCode pk addr src = do - (srcReg, code) <- getSomeReg src - Amode dstAddr addr_code <- case pk of - II64 -> getAmode DS addr - _ -> getAmode D addr - return $ code `appOL` addr_code `snocOL` ST pk srcReg dstAddr - --- dst is a reg, but src could be anything -assignReg_IntCode _ reg src - = do - dflags <- getDynFlags - let dst = getRegisterReg (targetPlatform dflags) reg - r <- getRegister src - return $ case r of - Any _ code -> code dst - Fixed _ freg fcode -> fcode `snocOL` MR dst freg - - - --- Easy, isn't it? -assignMem_FltCode = assignMem_IntCode -assignReg_FltCode = assignReg_IntCode - - - -genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock - -genJump (CmmLit (CmmLabel lbl)) regs - = return (unitOL $ JMP lbl regs) - -genJump tree gregs - = do - dflags <- getDynFlags - genJump' tree (platformToGCP (targetPlatform dflags)) gregs - -genJump' :: CmmExpr -> GenCCallPlatform -> [Reg] -> NatM InstrBlock - -genJump' tree (GCP64ELF 1) regs - = do - (target,code) <- getSomeReg tree - return (code - `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 0)) - `snocOL` LD II64 toc (AddrRegImm target (ImmInt 8)) - `snocOL` MTCTR r11 - `snocOL` LD II64 r11 (AddrRegImm target (ImmInt 16)) - `snocOL` BCTR [] Nothing regs) - -genJump' tree (GCP64ELF 2) regs - = do - (target,code) <- getSomeReg tree - return (code - `snocOL` MR r12 target - `snocOL` MTCTR r12 - `snocOL` BCTR [] Nothing regs) - -genJump' tree _ regs - = do - (target,code) <- getSomeReg tree - return (code `snocOL` MTCTR target `snocOL` BCTR [] Nothing regs) - --- ----------------------------------------------------------------------------- --- Unconditional branches -genBranch :: BlockId -> NatM InstrBlock -genBranch = return . toOL . mkJumpInstr - - --- ----------------------------------------------------------------------------- --- Conditional jumps - -{- -Conditional jumps are always to local labels, so we can use branch -instructions. We peek at the arguments to decide what kind of -comparison to do. --} - - -genCondJump - :: BlockId -- the branch target - -> CmmExpr -- the condition on which to branch - -> Maybe Bool - -> NatM InstrBlock - -genCondJump id bool prediction = do - CondCode _ cond code <- getCondCode bool - return (code `snocOL` BCC cond id prediction) - - - --- ----------------------------------------------------------------------------- --- Generating C calls - --- Now the biggest nightmare---calls. Most of the nastiness is buried in --- @get_arg@, which moves the arguments to the correct registers/stack --- locations. Apart from that, the code is easy. - -genCCall :: ForeignTarget -- function to call - -> [CmmFormal] -- where to put the result - -> [CmmActual] -- arguments (of mixed type) - -> NatM InstrBlock -genCCall (PrimTarget MO_ReadBarrier) _ _ - = return $ unitOL LWSYNC -genCCall (PrimTarget MO_WriteBarrier) _ _ - = return $ unitOL LWSYNC - -genCCall (PrimTarget MO_Touch) _ _ - = return $ nilOL - -genCCall (PrimTarget (MO_Prefetch_Data _)) _ _ - = return $ nilOL - -genCCall (PrimTarget (MO_AtomicRMW width amop)) [dst] [addr, n] - = do dflags <- getDynFlags - let platform = targetPlatform dflags - fmt = intFormat width - reg_dst = getRegisterReg platform (CmmLocal dst) - (instr, n_code) <- case amop of - AMO_Add -> getSomeRegOrImm ADD True reg_dst - AMO_Sub -> case n of - CmmLit (CmmInt i _) - | Just imm <- makeImmediate width True (-i) - -> return (ADD reg_dst reg_dst (RIImm imm), nilOL) - _ - -> do - (n_reg, n_code) <- getSomeReg n - return (SUBF reg_dst n_reg reg_dst, n_code) - AMO_And -> getSomeRegOrImm AND False reg_dst - AMO_Nand -> do (n_reg, n_code) <- getSomeReg n - return (NAND reg_dst reg_dst n_reg, n_code) - AMO_Or -> getSomeRegOrImm OR False reg_dst - AMO_Xor -> getSomeRegOrImm XOR False reg_dst - Amode addr_reg addr_code <- getAmodeIndex addr - lbl_retry <- getBlockIdNat - return $ n_code `appOL` addr_code - `appOL` toOL [ HWSYNC - , BCC ALWAYS lbl_retry Nothing - - , NEWBLOCK lbl_retry - , LDR fmt reg_dst addr_reg - , instr - , STC fmt reg_dst addr_reg - , BCC NE lbl_retry (Just False) - , ISYNC - ] - where - getAmodeIndex (CmmMachOp (MO_Add _) [x, y]) - = do - (regX, codeX) <- getSomeReg x - (regY, codeY) <- getSomeReg y - return (Amode (AddrRegReg regX regY) (codeX `appOL` codeY)) - getAmodeIndex other - = do - (reg, code) <- getSomeReg other - return (Amode (AddrRegReg r0 reg) code) -- NB: r0 is 0 here! - getSomeRegOrImm op sign dst - = case n of - CmmLit (CmmInt i _) | Just imm <- makeImmediate width sign i - -> return (op dst dst (RIImm imm), nilOL) - _ - -> do - (n_reg, n_code) <- getSomeReg n - return (op dst dst (RIReg n_reg), n_code) - -genCCall (PrimTarget (MO_AtomicRead width)) [dst] [addr] - = do dflags <- getDynFlags - let platform = targetPlatform dflags - fmt = intFormat width - reg_dst = getRegisterReg platform (CmmLocal dst) - form = if widthInBits width == 64 then DS else D - Amode addr_reg addr_code <- getAmode form addr - lbl_end <- getBlockIdNat - return $ addr_code `appOL` toOL [ HWSYNC - , LD fmt reg_dst addr_reg - , CMP fmt reg_dst (RIReg reg_dst) - , BCC NE lbl_end (Just False) - , BCC ALWAYS lbl_end Nothing - -- See Note [Seemingly useless cmp and bne] - , NEWBLOCK lbl_end - , ISYNC - ] - --- Note [Seemingly useless cmp and bne] --- In Power ISA, Book II, Section 4.4.1, Instruction Synchronize Instruction --- the second paragraph says that isync may complete before storage accesses --- "associated" with a preceding instruction have been performed. The cmp --- operation and the following bne introduce a data and control dependency --- on the load instruction (See also Power ISA, Book II, Appendix B.2.3, Safe --- Fetch). --- This is also what gcc does. - - -genCCall (PrimTarget (MO_AtomicWrite width)) [] [addr, val] = do - code <- assignMem_IntCode (intFormat width) addr val - return $ unitOL(HWSYNC) `appOL` code - -genCCall (PrimTarget (MO_Clz width)) [dst] [src] - = do dflags <- getDynFlags - let platform = targetPlatform dflags - reg_dst = getRegisterReg platform (CmmLocal dst) - if target32Bit platform && width == W64 - then do - ChildCode64 code vr_lo <- iselExpr64 src - lbl1 <- getBlockIdNat - lbl2 <- getBlockIdNat - lbl3 <- getBlockIdNat - let vr_hi = getHiVRegFromLo vr_lo - cntlz = toOL [ CMPL II32 vr_hi (RIImm (ImmInt 0)) - , BCC NE lbl2 Nothing - , BCC ALWAYS lbl1 Nothing - - , NEWBLOCK lbl1 - , CNTLZ II32 reg_dst vr_lo - , ADD reg_dst reg_dst (RIImm (ImmInt 32)) - , BCC ALWAYS lbl3 Nothing - - , NEWBLOCK lbl2 - , CNTLZ II32 reg_dst vr_hi - , BCC ALWAYS lbl3 Nothing - - , NEWBLOCK lbl3 - ] - return $ code `appOL` cntlz - else do - let format = if width == W64 then II64 else II32 - (s_reg, s_code) <- getSomeReg src - (pre, reg , post) <- - case width of - W64 -> return (nilOL, s_reg, nilOL) - W32 -> return (nilOL, s_reg, nilOL) - W16 -> do - reg_tmp <- getNewRegNat format - return - ( unitOL $ AND reg_tmp s_reg (RIImm (ImmInt 65535)) - , reg_tmp - , unitOL $ ADD reg_dst reg_dst (RIImm (ImmInt (-16))) - ) - W8 -> do - reg_tmp <- getNewRegNat format - return - ( unitOL $ AND reg_tmp s_reg (RIImm (ImmInt 255)) - , reg_tmp - , unitOL $ ADD reg_dst reg_dst (RIImm (ImmInt (-24))) - ) - _ -> panic "genCall: Clz wrong format" - let cntlz = unitOL (CNTLZ format reg_dst reg) - return $ s_code `appOL` pre `appOL` cntlz `appOL` post - -genCCall (PrimTarget (MO_Ctz width)) [dst] [src] - = do dflags <- getDynFlags - let platform = targetPlatform dflags - reg_dst = getRegisterReg platform (CmmLocal dst) - if target32Bit platform && width == W64 - then do - let format = II32 - ChildCode64 code vr_lo <- iselExpr64 src - lbl1 <- getBlockIdNat - lbl2 <- getBlockIdNat - lbl3 <- getBlockIdNat - x' <- getNewRegNat format - x'' <- getNewRegNat format - r' <- getNewRegNat format - cnttzlo <- cnttz format reg_dst vr_lo - let vr_hi = getHiVRegFromLo vr_lo - cnttz64 = toOL [ CMPL format vr_lo (RIImm (ImmInt 0)) - , BCC NE lbl2 Nothing - , BCC ALWAYS lbl1 Nothing - - , NEWBLOCK lbl1 - , ADD x' vr_hi (RIImm (ImmInt (-1))) - , ANDC x'' x' vr_hi - , CNTLZ format r' x'' - -- 32 + (32 - clz(x'')) - , SUBFC reg_dst r' (RIImm (ImmInt 64)) - , BCC ALWAYS lbl3 Nothing - - , NEWBLOCK lbl2 - ] - `appOL` cnttzlo `appOL` - toOL [ BCC ALWAYS lbl3 Nothing - - , NEWBLOCK lbl3 - ] - return $ code `appOL` cnttz64 - else do - let format = if width == W64 then II64 else II32 - (s_reg, s_code) <- getSomeReg src - (reg_ctz, pre_code) <- - case width of - W64 -> return (s_reg, nilOL) - W32 -> return (s_reg, nilOL) - W16 -> do - reg_tmp <- getNewRegNat format - return (reg_tmp, unitOL $ ORIS reg_tmp s_reg (ImmInt 1)) - W8 -> do - reg_tmp <- getNewRegNat format - return (reg_tmp, unitOL $ OR reg_tmp s_reg (RIImm (ImmInt 256))) - _ -> panic "genCall: Ctz wrong format" - ctz_code <- cnttz format reg_dst reg_ctz - return $ s_code `appOL` pre_code `appOL` ctz_code - where - -- cnttz(x) = sizeof(x) - cntlz(~x & (x - 1)) - -- see Henry S. Warren, Hacker's Delight, p 107 - cnttz format dst src = do - let format_bits = 8 * formatInBytes format - x' <- getNewRegNat format - x'' <- getNewRegNat format - r' <- getNewRegNat format - return $ toOL [ ADD x' src (RIImm (ImmInt (-1))) - , ANDC x'' x' src - , CNTLZ format r' x'' - , SUBFC dst r' (RIImm (ImmInt (format_bits))) - ] - -genCCall target dest_regs argsAndHints - = do dflags <- getDynFlags - let platform = targetPlatform dflags - case target of - PrimTarget (MO_S_QuotRem width) -> divOp1 platform True width - dest_regs argsAndHints - PrimTarget (MO_U_QuotRem width) -> divOp1 platform False width - dest_regs argsAndHints - PrimTarget (MO_U_QuotRem2 width) -> divOp2 platform width dest_regs - argsAndHints - PrimTarget (MO_U_Mul2 width) -> multOp2 platform width dest_regs - argsAndHints - PrimTarget (MO_Add2 _) -> add2Op platform dest_regs argsAndHints - PrimTarget (MO_AddWordC _) -> addcOp platform dest_regs argsAndHints - PrimTarget (MO_SubWordC _) -> subcOp platform dest_regs argsAndHints - PrimTarget (MO_AddIntC width) -> addSubCOp ADDO platform width - dest_regs argsAndHints - PrimTarget (MO_SubIntC width) -> addSubCOp SUBFO platform width - dest_regs argsAndHints - PrimTarget MO_F64_Fabs -> fabs platform dest_regs argsAndHints - PrimTarget MO_F32_Fabs -> fabs platform dest_regs argsAndHints - _ -> genCCall' dflags (platformToGCP platform) - target dest_regs argsAndHints - where divOp1 platform signed width [res_q, res_r] [arg_x, arg_y] - = do let reg_q = getRegisterReg platform (CmmLocal res_q) - reg_r = getRegisterReg platform (CmmLocal res_r) - remainderCode width signed reg_q arg_x arg_y - <*> pure reg_r - - divOp1 _ _ _ _ _ - = panic "genCCall: Wrong number of arguments for divOp1" - divOp2 platform width [res_q, res_r] - [arg_x_high, arg_x_low, arg_y] - = do let reg_q = getRegisterReg platform (CmmLocal res_q) - reg_r = getRegisterReg platform (CmmLocal res_r) - fmt = intFormat width - half = 4 * (formatInBytes fmt) - (xh_reg, xh_code) <- getSomeReg arg_x_high - (xl_reg, xl_code) <- getSomeReg arg_x_low - (y_reg, y_code) <- getSomeReg arg_y - s <- getNewRegNat fmt - b <- getNewRegNat fmt - v <- getNewRegNat fmt - vn1 <- getNewRegNat fmt - vn0 <- getNewRegNat fmt - un32 <- getNewRegNat fmt - tmp <- getNewRegNat fmt - un10 <- getNewRegNat fmt - un1 <- getNewRegNat fmt - un0 <- getNewRegNat fmt - q1 <- getNewRegNat fmt - rhat <- getNewRegNat fmt - tmp1 <- getNewRegNat fmt - q0 <- getNewRegNat fmt - un21 <- getNewRegNat fmt - again1 <- getBlockIdNat - no1 <- getBlockIdNat - then1 <- getBlockIdNat - endif1 <- getBlockIdNat - again2 <- getBlockIdNat - no2 <- getBlockIdNat - then2 <- getBlockIdNat - endif2 <- getBlockIdNat - return $ y_code `appOL` xl_code `appOL` xh_code `appOL` - -- see Hacker's Delight p 196 Figure 9-3 - toOL [ -- b = 2 ^ (bits_in_word / 2) - LI b (ImmInt 1) - , SL fmt b b (RIImm (ImmInt half)) - -- s = clz(y) - , CNTLZ fmt s y_reg - -- v = y << s - , SL fmt v y_reg (RIReg s) - -- vn1 = upper half of v - , SR fmt vn1 v (RIImm (ImmInt half)) - -- vn0 = lower half of v - , CLRLI fmt vn0 v half - -- un32 = (u1 << s) - -- | (u0 >> (bits_in_word - s)) - , SL fmt un32 xh_reg (RIReg s) - , SUBFC tmp s - (RIImm (ImmInt (8 * formatInBytes fmt))) - , SR fmt tmp xl_reg (RIReg tmp) - , OR un32 un32 (RIReg tmp) - -- un10 = u0 << s - , SL fmt un10 xl_reg (RIReg s) - -- un1 = upper half of un10 - , SR fmt un1 un10 (RIImm (ImmInt half)) - -- un0 = lower half of un10 - , CLRLI fmt un0 un10 half - -- q1 = un32/vn1 - , DIV fmt False q1 un32 vn1 - -- rhat = un32 - q1*vn1 - , MULL fmt tmp q1 (RIReg vn1) - , SUBF rhat tmp un32 - , BCC ALWAYS again1 Nothing - - , NEWBLOCK again1 - -- if (q1 >= b || q1*vn0 > b*rhat + un1) - , CMPL fmt q1 (RIReg b) - , BCC GEU then1 Nothing - , BCC ALWAYS no1 Nothing - - , NEWBLOCK no1 - , MULL fmt tmp q1 (RIReg vn0) - , SL fmt tmp1 rhat (RIImm (ImmInt half)) - , ADD tmp1 tmp1 (RIReg un1) - , CMPL fmt tmp (RIReg tmp1) - , BCC LEU endif1 Nothing - , BCC ALWAYS then1 Nothing - - , NEWBLOCK then1 - -- q1 = q1 - 1 - , ADD q1 q1 (RIImm (ImmInt (-1))) - -- rhat = rhat + vn1 - , ADD rhat rhat (RIReg vn1) - -- if (rhat < b) goto again1 - , CMPL fmt rhat (RIReg b) - , BCC LTT again1 Nothing - , BCC ALWAYS endif1 Nothing - - , NEWBLOCK endif1 - -- un21 = un32*b + un1 - q1*v - , SL fmt un21 un32 (RIImm (ImmInt half)) - , ADD un21 un21 (RIReg un1) - , MULL fmt tmp q1 (RIReg v) - , SUBF un21 tmp un21 - -- compute second quotient digit - -- q0 = un21/vn1 - , DIV fmt False q0 un21 vn1 - -- rhat = un21- q0*vn1 - , MULL fmt tmp q0 (RIReg vn1) - , SUBF rhat tmp un21 - , BCC ALWAYS again2 Nothing - - , NEWBLOCK again2 - -- if (q0>b || q0*vn0 > b*rhat + un0) - , CMPL fmt q0 (RIReg b) - , BCC GEU then2 Nothing - , BCC ALWAYS no2 Nothing - - , NEWBLOCK no2 - , MULL fmt tmp q0 (RIReg vn0) - , SL fmt tmp1 rhat (RIImm (ImmInt half)) - , ADD tmp1 tmp1 (RIReg un0) - , CMPL fmt tmp (RIReg tmp1) - , BCC LEU endif2 Nothing - , BCC ALWAYS then2 Nothing - - , NEWBLOCK then2 - -- q0 = q0 - 1 - , ADD q0 q0 (RIImm (ImmInt (-1))) - -- rhat = rhat + vn1 - , ADD rhat rhat (RIReg vn1) - -- if (rhat<b) goto again2 - , CMPL fmt rhat (RIReg b) - , BCC LTT again2 Nothing - , BCC ALWAYS endif2 Nothing - - , NEWBLOCK endif2 - -- compute remainder - -- r = (un21*b + un0 - q0*v) >> s - , SL fmt reg_r un21 (RIImm (ImmInt half)) - , ADD reg_r reg_r (RIReg un0) - , MULL fmt tmp q0 (RIReg v) - , SUBF reg_r tmp reg_r - , SR fmt reg_r reg_r (RIReg s) - -- compute quotient - -- q = q1*b + q0 - , SL fmt reg_q q1 (RIImm (ImmInt half)) - , ADD reg_q reg_q (RIReg q0) - ] - divOp2 _ _ _ _ - = panic "genCCall: Wrong number of arguments for divOp2" - multOp2 platform width [res_h, res_l] [arg_x, arg_y] - = do let reg_h = getRegisterReg platform (CmmLocal res_h) - reg_l = getRegisterReg platform (CmmLocal res_l) - fmt = intFormat width - (x_reg, x_code) <- getSomeReg arg_x - (y_reg, y_code) <- getSomeReg arg_y - return $ y_code `appOL` x_code - `appOL` toOL [ MULL fmt reg_l x_reg (RIReg y_reg) - , MULHU fmt reg_h x_reg y_reg - ] - multOp2 _ _ _ _ - = panic "genCall: Wrong number of arguments for multOp2" - add2Op platform [res_h, res_l] [arg_x, arg_y] - = do let reg_h = getRegisterReg platform (CmmLocal res_h) - reg_l = getRegisterReg platform (CmmLocal res_l) - (x_reg, x_code) <- getSomeReg arg_x - (y_reg, y_code) <- getSomeReg arg_y - return $ y_code `appOL` x_code - `appOL` toOL [ LI reg_h (ImmInt 0) - , ADDC reg_l x_reg y_reg - , ADDZE reg_h reg_h - ] - add2Op _ _ _ - = panic "genCCall: Wrong number of arguments/results for add2" - - addcOp platform [res_r, res_c] [arg_x, arg_y] - = add2Op platform [res_c {-hi-}, res_r {-lo-}] [arg_x, arg_y] - addcOp _ _ _ - = panic "genCCall: Wrong number of arguments/results for addc" - - -- PowerPC subfc sets the carry for rT = ~(rA) + rB + 1, - -- which is 0 for borrow and 1 otherwise. We need 1 and 0 - -- so xor with 1. - subcOp platform [res_r, res_c] [arg_x, arg_y] - = do let reg_r = getRegisterReg platform (CmmLocal res_r) - reg_c = getRegisterReg platform (CmmLocal res_c) - (x_reg, x_code) <- getSomeReg arg_x - (y_reg, y_code) <- getSomeReg arg_y - return $ y_code `appOL` x_code - `appOL` toOL [ LI reg_c (ImmInt 0) - , SUBFC reg_r y_reg (RIReg x_reg) - , ADDZE reg_c reg_c - , XOR reg_c reg_c (RIImm (ImmInt 1)) - ] - subcOp _ _ _ - = panic "genCCall: Wrong number of arguments/results for subc" - addSubCOp instr platform width [res_r, res_c] [arg_x, arg_y] - = do let reg_r = getRegisterReg platform (CmmLocal res_r) - reg_c = getRegisterReg platform (CmmLocal res_c) - (x_reg, x_code) <- getSomeReg arg_x - (y_reg, y_code) <- getSomeReg arg_y - return $ y_code `appOL` x_code - `appOL` toOL [ instr reg_r y_reg x_reg, - -- SUBFO argument order reversed! - MFOV (intFormat width) reg_c - ] - addSubCOp _ _ _ _ _ - = panic "genCall: Wrong number of arguments/results for addC" - fabs platform [res] [arg] - = do let res_r = getRegisterReg platform (CmmLocal res) - (arg_reg, arg_code) <- getSomeReg arg - return $ arg_code `snocOL` FABS res_r arg_reg - fabs _ _ _ - = panic "genCall: Wrong number of arguments/results for fabs" - --- TODO: replace 'Int' by an enum such as 'PPC_64ABI' -data GenCCallPlatform = GCP32ELF | GCP64ELF !Int | GCPAIX - -platformToGCP :: Platform -> GenCCallPlatform -platformToGCP platform - = case platformOS platform of - OSAIX -> GCPAIX - _ -> case platformArch platform of - ArchPPC -> GCP32ELF - ArchPPC_64 ELF_V1 -> GCP64ELF 1 - ArchPPC_64 ELF_V2 -> GCP64ELF 2 - _ -> panic "platformToGCP: Not PowerPC" - - -genCCall' - :: DynFlags - -> GenCCallPlatform - -> ForeignTarget -- function to call - -> [CmmFormal] -- where to put the result - -> [CmmActual] -- arguments (of mixed type) - -> NatM InstrBlock - -{- - PowerPC Linux uses the System V Release 4 Calling Convention - for PowerPC. It is described in the - "System V Application Binary Interface PowerPC Processor Supplement". - - PowerPC 64 Linux uses the System V Release 4 Calling Convention for - 64-bit PowerPC. It is specified in - "64-bit PowerPC ELF Application Binary Interface Supplement 1.9" - (PPC64 ELF v1.9). - - PowerPC 64 Linux in little endian mode uses the "Power Architecture 64-Bit - ELF V2 ABI Specification -- OpenPOWER ABI for Linux Supplement" - (PPC64 ELF v2). - - AIX follows the "PowerOpen ABI: Application Binary Interface Big-Endian - 32-Bit Hardware Implementation" - - All four conventions are similar: - Parameters may be passed in general-purpose registers starting at r3, in - floating point registers starting at f1, or on the stack. - - But there are substantial differences: - * The number of registers used for parameter passing and the exact set of - nonvolatile registers differs (see MachRegs.hs). - * On AIX and 64-bit ELF, stack space is always reserved for parameters, - even if they are passed in registers. The called routine may choose to - save parameters from registers to the corresponding space on the stack. - * On AIX and 64-bit ELF, a corresponding amount of GPRs is skipped when - a floating point parameter is passed in an FPR. - * SysV insists on either passing I64 arguments on the stack, or in two GPRs, - starting with an odd-numbered GPR. It may skip a GPR to achieve this. - AIX just treats an I64 likt two separate I32s (high word first). - * I64 and FF64 arguments are 8-byte aligned on the stack for SysV, but only - 4-byte aligned like everything else on AIX. - * The SysV spec claims that FF32 is represented as FF64 on the stack. GCC on - PowerPC Linux does not agree, so neither do we. - - According to all conventions, the parameter area should be part of the - caller's stack frame, allocated in the caller's prologue code (large enough - to hold the parameter lists for all called routines). The NCG already - uses the stack for register spilling, leaving 64 bytes free at the top. - If we need a larger parameter area than that, we increase the size - of the stack frame just before ccalling. --} - - -genCCall' dflags gcp target dest_regs args - = do - (finalStack,passArgumentsCode,usedRegs) <- passArguments - (zip3 args argReps argHints) - allArgRegs - (allFPArgRegs platform) - initialStackOffset - nilOL [] - - (labelOrExpr, reduceToFF32) <- case target of - ForeignTarget (CmmLit (CmmLabel lbl)) _ -> do - uses_pic_base_implicitly - return (Left lbl, False) - ForeignTarget expr _ -> do - uses_pic_base_implicitly - return (Right expr, False) - PrimTarget mop -> outOfLineMachOp mop - - let codeBefore = move_sp_down finalStack `appOL` passArgumentsCode - codeAfter = move_sp_up finalStack `appOL` moveResult reduceToFF32 - - case labelOrExpr of - Left lbl -> do -- the linker does all the work for us - return ( codeBefore - `snocOL` BL lbl usedRegs - `appOL` maybeNOP -- some ABI require a NOP after BL - `appOL` codeAfter) - Right dyn -> do -- implement call through function pointer - (dynReg, dynCode) <- getSomeReg dyn - case gcp of - GCP64ELF 1 -> return ( dynCode - `appOL` codeBefore - `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 40)) - `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 0)) - `snocOL` LD II64 toc (AddrRegImm dynReg (ImmInt 8)) - `snocOL` MTCTR r11 - `snocOL` LD II64 r11 (AddrRegImm dynReg (ImmInt 16)) - `snocOL` BCTRL usedRegs - `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 40)) - `appOL` codeAfter) - GCP64ELF 2 -> return ( dynCode - `appOL` codeBefore - `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 24)) - `snocOL` MR r12 dynReg - `snocOL` MTCTR r12 - `snocOL` BCTRL usedRegs - `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 24)) - `appOL` codeAfter) - GCPAIX -> return ( dynCode - -- AIX/XCOFF follows the PowerOPEN ABI - -- which is quite similar to LinuxPPC64/ELFv1 - `appOL` codeBefore - `snocOL` ST spFormat toc (AddrRegImm sp (ImmInt 20)) - `snocOL` LD II32 r11 (AddrRegImm dynReg (ImmInt 0)) - `snocOL` LD II32 toc (AddrRegImm dynReg (ImmInt 4)) - `snocOL` MTCTR r11 - `snocOL` LD II32 r11 (AddrRegImm dynReg (ImmInt 8)) - `snocOL` BCTRL usedRegs - `snocOL` LD spFormat toc (AddrRegImm sp (ImmInt 20)) - `appOL` codeAfter) - _ -> return ( dynCode - `snocOL` MTCTR dynReg - `appOL` codeBefore - `snocOL` BCTRL usedRegs - `appOL` codeAfter) - where - platform = targetPlatform dflags - - uses_pic_base_implicitly = do - -- See Note [implicit register in PPC PIC code] - -- on why we claim to use PIC register here - when (positionIndependent dflags && target32Bit platform) $ do - _ <- getPicBaseNat $ archWordFormat True - return () - - initialStackOffset = case gcp of - GCPAIX -> 24 - GCP32ELF -> 8 - GCP64ELF 1 -> 48 - GCP64ELF 2 -> 32 - _ -> panic "genCall': unknown calling convention" - -- size of linkage area + size of arguments, in bytes - stackDelta finalStack = case gcp of - GCPAIX -> - roundTo 16 $ (24 +) $ max 32 $ sum $ - map (widthInBytes . typeWidth) argReps - GCP32ELF -> roundTo 16 finalStack - GCP64ELF 1 -> - roundTo 16 $ (48 +) $ max 64 $ sum $ - map (roundTo 8 . widthInBytes . typeWidth) - argReps - GCP64ELF 2 -> - roundTo 16 $ (32 +) $ max 64 $ sum $ - map (roundTo 8 . widthInBytes . typeWidth) - argReps - _ -> panic "genCall': unknown calling conv." - - argReps = map (cmmExprType dflags) args - (argHints, _) = foreignTargetHints target - - roundTo a x | x `mod` a == 0 = x - | otherwise = x + a - (x `mod` a) - - spFormat = if target32Bit platform then II32 else II64 - - -- TODO: Do not create a new stack frame if delta is too large. - move_sp_down finalStack - | delta > stackFrameHeaderSize dflags = - toOL [STU spFormat sp (AddrRegImm sp (ImmInt (-delta))), - DELTA (-delta)] - | otherwise = nilOL - where delta = stackDelta finalStack - move_sp_up finalStack - | delta > stackFrameHeaderSize dflags = - toOL [ADD sp sp (RIImm (ImmInt delta)), - DELTA 0] - | otherwise = nilOL - where delta = stackDelta finalStack - - -- A NOP instruction is required after a call (bl instruction) - -- on AIX and 64-Bit Linux. - -- If the call is to a function with a different TOC (r2) the - -- link editor replaces the NOP instruction with a load of the TOC - -- from the stack to restore the TOC. - maybeNOP = case gcp of - GCP32ELF -> nilOL - -- See Section 3.9.4 of OpenPower ABI - GCPAIX -> unitOL NOP - -- See Section 3.5.11 of PPC64 ELF v1.9 - GCP64ELF 1 -> unitOL NOP - -- See Section 2.3.6 of PPC64 ELF v2 - GCP64ELF 2 -> unitOL NOP - _ -> panic "maybeNOP: Unknown PowerPC 64-bit ABI" - - passArguments [] _ _ stackOffset accumCode accumUsed = return (stackOffset, accumCode, accumUsed) - passArguments ((arg,arg_ty,_):args) gprs fprs stackOffset - accumCode accumUsed | isWord64 arg_ty - && target32Bit (targetPlatform dflags) = - do - ChildCode64 code vr_lo <- iselExpr64 arg - let vr_hi = getHiVRegFromLo vr_lo - - case gcp of - GCPAIX -> - do let storeWord vr (gpr:_) _ = MR gpr vr - storeWord vr [] offset - = ST II32 vr (AddrRegImm sp (ImmInt offset)) - passArguments args - (drop 2 gprs) - fprs - (stackOffset+8) - (accumCode `appOL` code - `snocOL` storeWord vr_hi gprs stackOffset - `snocOL` storeWord vr_lo (drop 1 gprs) (stackOffset+4)) - ((take 2 gprs) ++ accumUsed) - GCP32ELF -> - do let stackOffset' = roundTo 8 stackOffset - stackCode = accumCode `appOL` code - `snocOL` ST II32 vr_hi (AddrRegImm sp (ImmInt stackOffset')) - `snocOL` ST II32 vr_lo (AddrRegImm sp (ImmInt (stackOffset'+4))) - regCode hireg loreg = - accumCode `appOL` code - `snocOL` MR hireg vr_hi - `snocOL` MR loreg vr_lo - - case gprs of - hireg : loreg : regs | even (length gprs) -> - passArguments args regs fprs stackOffset - (regCode hireg loreg) (hireg : loreg : accumUsed) - _skipped : hireg : loreg : regs -> - passArguments args regs fprs stackOffset - (regCode hireg loreg) (hireg : loreg : accumUsed) - _ -> -- only one or no regs left - passArguments args [] fprs (stackOffset'+8) - stackCode accumUsed - GCP64ELF _ -> panic "passArguments: 32 bit code" - - passArguments ((arg,rep,hint):args) gprs fprs stackOffset accumCode accumUsed - | reg : _ <- regs = do - register <- getRegister arg_pro - let code = case register of - Fixed _ freg fcode -> fcode `snocOL` MR reg freg - Any _ acode -> acode reg - stackOffsetRes = case gcp of - -- The PowerOpen ABI requires that we - -- reserve stack slots for register - -- parameters - GCPAIX -> stackOffset + stackBytes - -- ... the SysV ABI 32-bit doesn't. - GCP32ELF -> stackOffset - -- ... but SysV ABI 64-bit does. - GCP64ELF _ -> stackOffset + stackBytes - passArguments args - (drop nGprs gprs) - (drop nFprs fprs) - stackOffsetRes - (accumCode `appOL` code) - (reg : accumUsed) - | otherwise = do - (vr, code) <- getSomeReg arg_pro - passArguments args - (drop nGprs gprs) - (drop nFprs fprs) - (stackOffset' + stackBytes) - (accumCode `appOL` code - `snocOL` ST format_pro vr stackSlot) - accumUsed - where - arg_pro - | isBitsType rep = CmmMachOp (conv_op (typeWidth rep) (wordWidth dflags)) [arg] - | otherwise = arg - format_pro - | isBitsType rep = intFormat (wordWidth dflags) - | otherwise = cmmTypeFormat rep - conv_op = case hint of - SignedHint -> MO_SS_Conv - _ -> MO_UU_Conv - - stackOffset' = case gcp of - GCPAIX -> - -- The 32bit PowerOPEN ABI is happy with - -- 32bit-alignment ... - stackOffset - GCP32ELF - -- ... the SysV ABI requires 8-byte - -- alignment for doubles. - | isFloatType rep && typeWidth rep == W64 -> - roundTo 8 stackOffset - | otherwise -> - stackOffset - GCP64ELF _ -> - -- Everything on the stack is mapped to - -- 8-byte aligned doublewords - stackOffset - stackOffset'' - | isFloatType rep && typeWidth rep == W32 = - case gcp of - -- The ELF v1 ABI Section 3.2.3 requires: - -- "Single precision floating point values - -- are mapped to the second word in a single - -- doubleword" - GCP64ELF 1 -> stackOffset' + 4 - _ -> stackOffset' - | otherwise = stackOffset' - - stackSlot = AddrRegImm sp (ImmInt stackOffset'') - (nGprs, nFprs, stackBytes, regs) - = case gcp of - GCPAIX -> - case cmmTypeFormat rep of - II8 -> (1, 0, 4, gprs) - II16 -> (1, 0, 4, gprs) - II32 -> (1, 0, 4, gprs) - -- The PowerOpen ABI requires that we skip a - -- corresponding number of GPRs when we use - -- the FPRs. - -- - -- E.g. for a `double` two GPRs are skipped, - -- whereas for a `float` one GPR is skipped - -- when parameters are assigned to - -- registers. - -- - -- The PowerOpen ABI specification can be found at - -- ftp://www.sourceware.org/pub/binutils/ppc-docs/ppc-poweropen/ - FF32 -> (1, 1, 4, fprs) - FF64 -> (2, 1, 8, fprs) - II64 -> panic "genCCall' passArguments II64" - - GCP32ELF -> - case cmmTypeFormat rep of - II8 -> (1, 0, 4, gprs) - II16 -> (1, 0, 4, gprs) - II32 -> (1, 0, 4, gprs) - -- ... the SysV ABI doesn't. - FF32 -> (0, 1, 4, fprs) - FF64 -> (0, 1, 8, fprs) - II64 -> panic "genCCall' passArguments II64" - GCP64ELF _ -> - case cmmTypeFormat rep of - II8 -> (1, 0, 8, gprs) - II16 -> (1, 0, 8, gprs) - II32 -> (1, 0, 8, gprs) - II64 -> (1, 0, 8, gprs) - -- The ELFv1 ABI requires that we skip a - -- corresponding number of GPRs when we use - -- the FPRs. - FF32 -> (1, 1, 8, fprs) - FF64 -> (1, 1, 8, fprs) - - moveResult reduceToFF32 = - case dest_regs of - [] -> nilOL - [dest] - | reduceToFF32 && isFloat32 rep -> unitOL (FRSP r_dest f1) - | isFloat32 rep || isFloat64 rep -> unitOL (MR r_dest f1) - | isWord64 rep && target32Bit (targetPlatform dflags) - -> toOL [MR (getHiVRegFromLo r_dest) r3, - MR r_dest r4] - | otherwise -> unitOL (MR r_dest r3) - where rep = cmmRegType dflags (CmmLocal dest) - r_dest = getRegisterReg platform (CmmLocal dest) - _ -> panic "genCCall' moveResult: Bad dest_regs" - - outOfLineMachOp mop = - do - dflags <- getDynFlags - mopExpr <- cmmMakeDynamicReference dflags CallReference $ - mkForeignLabel functionName Nothing ForeignLabelInThisPackage IsFunction - let mopLabelOrExpr = case mopExpr of - CmmLit (CmmLabel lbl) -> Left lbl - _ -> Right mopExpr - return (mopLabelOrExpr, reduce) - where - (functionName, reduce) = case mop of - MO_F32_Exp -> (fsLit "exp", True) - MO_F32_ExpM1 -> (fsLit "expm1", True) - MO_F32_Log -> (fsLit "log", True) - MO_F32_Log1P -> (fsLit "log1p", True) - MO_F32_Sqrt -> (fsLit "sqrt", True) - MO_F32_Fabs -> unsupported - - MO_F32_Sin -> (fsLit "sin", True) - MO_F32_Cos -> (fsLit "cos", True) - MO_F32_Tan -> (fsLit "tan", True) - - MO_F32_Asin -> (fsLit "asin", True) - MO_F32_Acos -> (fsLit "acos", True) - MO_F32_Atan -> (fsLit "atan", True) - - MO_F32_Sinh -> (fsLit "sinh", True) - MO_F32_Cosh -> (fsLit "cosh", True) - MO_F32_Tanh -> (fsLit "tanh", True) - MO_F32_Pwr -> (fsLit "pow", True) - - MO_F32_Asinh -> (fsLit "asinh", True) - MO_F32_Acosh -> (fsLit "acosh", True) - MO_F32_Atanh -> (fsLit "atanh", True) - - MO_F64_Exp -> (fsLit "exp", False) - MO_F64_ExpM1 -> (fsLit "expm1", False) - MO_F64_Log -> (fsLit "log", False) - MO_F64_Log1P -> (fsLit "log1p", False) - MO_F64_Sqrt -> (fsLit "sqrt", False) - MO_F64_Fabs -> unsupported - - MO_F64_Sin -> (fsLit "sin", False) - MO_F64_Cos -> (fsLit "cos", False) - MO_F64_Tan -> (fsLit "tan", False) - - MO_F64_Asin -> (fsLit "asin", False) - MO_F64_Acos -> (fsLit "acos", False) - MO_F64_Atan -> (fsLit "atan", False) - - MO_F64_Sinh -> (fsLit "sinh", False) - MO_F64_Cosh -> (fsLit "cosh", False) - MO_F64_Tanh -> (fsLit "tanh", False) - MO_F64_Pwr -> (fsLit "pow", False) - - MO_F64_Asinh -> (fsLit "asinh", False) - MO_F64_Acosh -> (fsLit "acosh", False) - MO_F64_Atanh -> (fsLit "atanh", False) - - MO_UF_Conv w -> (fsLit $ word2FloatLabel w, False) - - MO_Memcpy _ -> (fsLit "memcpy", False) - MO_Memset _ -> (fsLit "memset", False) - MO_Memmove _ -> (fsLit "memmove", False) - MO_Memcmp _ -> (fsLit "memcmp", False) - - MO_BSwap w -> (fsLit $ bSwapLabel w, False) - MO_BRev w -> (fsLit $ bRevLabel w, False) - MO_PopCnt w -> (fsLit $ popCntLabel w, False) - MO_Pdep w -> (fsLit $ pdepLabel w, False) - MO_Pext w -> (fsLit $ pextLabel w, False) - MO_Clz _ -> unsupported - MO_Ctz _ -> unsupported - MO_AtomicRMW {} -> unsupported - MO_Cmpxchg w -> (fsLit $ cmpxchgLabel w, False) - MO_AtomicRead _ -> unsupported - MO_AtomicWrite _ -> unsupported - - MO_S_Mul2 {} -> unsupported - MO_S_QuotRem {} -> unsupported - MO_U_QuotRem {} -> unsupported - MO_U_QuotRem2 {} -> unsupported - MO_Add2 {} -> unsupported - MO_AddWordC {} -> unsupported - MO_SubWordC {} -> unsupported - MO_AddIntC {} -> unsupported - MO_SubIntC {} -> unsupported - MO_U_Mul2 {} -> unsupported - MO_ReadBarrier -> unsupported - MO_WriteBarrier -> unsupported - MO_Touch -> unsupported - MO_Prefetch_Data _ -> unsupported - unsupported = panic ("outOfLineCmmOp: " ++ show mop - ++ " not supported") - --- ----------------------------------------------------------------------------- --- Generating a table-branch - -genSwitch :: DynFlags -> CmmExpr -> SwitchTargets -> NatM InstrBlock -genSwitch dflags expr targets - | OSAIX <- platformOS (targetPlatform dflags) - = do - (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset) - let fmt = archWordFormat $ target32Bit $ targetPlatform dflags - sha = if target32Bit $ targetPlatform dflags then 2 else 3 - tmp <- getNewRegNat fmt - lbl <- getNewLabelNat - dynRef <- cmmMakeDynamicReference dflags DataReference lbl - (tableReg,t_code) <- getSomeReg $ dynRef - let code = e_code `appOL` t_code `appOL` toOL [ - SL fmt tmp reg (RIImm (ImmInt sha)), - LD fmt tmp (AddrRegReg tableReg tmp), - MTCTR tmp, - BCTR ids (Just lbl) [] - ] - return code - - | (positionIndependent dflags) || (not $ target32Bit $ targetPlatform dflags) - = do - (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset) - let fmt = archWordFormat $ target32Bit $ targetPlatform dflags - sha = if target32Bit $ targetPlatform dflags then 2 else 3 - tmp <- getNewRegNat fmt - lbl <- getNewLabelNat - dynRef <- cmmMakeDynamicReference dflags DataReference lbl - (tableReg,t_code) <- getSomeReg $ dynRef - let code = e_code `appOL` t_code `appOL` toOL [ - SL fmt tmp reg (RIImm (ImmInt sha)), - LD fmt tmp (AddrRegReg tableReg tmp), - ADD tmp tmp (RIReg tableReg), - MTCTR tmp, - BCTR ids (Just lbl) [] - ] - return code - | otherwise - = do - (reg,e_code) <- getSomeReg (cmmOffset dflags expr offset) - let fmt = archWordFormat $ target32Bit $ targetPlatform dflags - sha = if target32Bit $ targetPlatform dflags then 2 else 3 - tmp <- getNewRegNat fmt - lbl <- getNewLabelNat - let code = e_code `appOL` toOL [ - SL fmt tmp reg (RIImm (ImmInt sha)), - ADDIS tmp tmp (HA (ImmCLbl lbl)), - LD fmt tmp (AddrRegImm tmp (LO (ImmCLbl lbl))), - MTCTR tmp, - BCTR ids (Just lbl) [] - ] - return code - where (offset, ids) = switchTargetsToTable targets - -generateJumpTableForInstr :: DynFlags -> Instr - -> Maybe (NatCmmDecl RawCmmStatics Instr) -generateJumpTableForInstr dflags (BCTR ids (Just lbl) _) = - let jumpTable - | (positionIndependent dflags) - || (not $ target32Bit $ targetPlatform dflags) - = map jumpTableEntryRel ids - | otherwise = map (jumpTableEntry dflags) ids - where jumpTableEntryRel Nothing - = CmmStaticLit (CmmInt 0 (wordWidth dflags)) - jumpTableEntryRel (Just blockid) - = CmmStaticLit (CmmLabelDiffOff blockLabel lbl 0 - (wordWidth dflags)) - where blockLabel = blockLbl blockid - in Just (CmmData (Section ReadOnlyData lbl) (RawCmmStatics lbl jumpTable)) -generateJumpTableForInstr _ _ = Nothing - --- ----------------------------------------------------------------------------- --- 'condIntReg' and 'condFltReg': condition codes into registers - --- Turn those condition codes into integers now (when they appear on --- the right hand side of an assignment). - - - -condReg :: NatM CondCode -> NatM Register -condReg getCond = do - CondCode _ cond cond_code <- getCond - dflags <- getDynFlags - let - code dst = cond_code - `appOL` negate_code - `appOL` toOL [ - MFCR dst, - RLWINM dst dst (bit + 1) 31 31 - ] - - negate_code | do_negate = unitOL (CRNOR bit bit bit) - | otherwise = nilOL - - (bit, do_negate) = case cond of - LTT -> (0, False) - LE -> (1, True) - EQQ -> (2, False) - GE -> (0, True) - GTT -> (1, False) - - NE -> (2, True) - - LU -> (0, False) - LEU -> (1, True) - GEU -> (0, True) - GU -> (1, False) - _ -> panic "PPC.CodeGen.codeReg: no match" - - format = archWordFormat $ target32Bit $ targetPlatform dflags - return (Any format code) - -condIntReg :: Cond -> Width -> CmmExpr -> CmmExpr -> NatM Register -condIntReg cond width x y = condReg (condIntCode cond width x y) -condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register -condFltReg cond x y = condReg (condFltCode cond x y) - - - --- ----------------------------------------------------------------------------- --- 'trivial*Code': deal with trivial instructions - --- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode', --- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions. --- Only look for constants on the right hand side, because that's --- where the generic optimizer will have put them. - --- Similarly, for unary instructions, we don't have to worry about --- matching an StInt as the argument, because genericOpt will already --- have handled the constant-folding. - - - -{- -Wolfgang's PowerPC version of The Rules: - -A slightly modified version of The Rules to take advantage of the fact -that PowerPC instructions work on all registers and don't implicitly -clobber any fixed registers. - -* The only expression for which getRegister returns Fixed is (CmmReg reg). - -* If getRegister returns Any, then the code it generates may modify only: - (a) fresh temporaries - (b) the destination register - It may *not* modify global registers, unless the global - register happens to be the destination register. - It may not clobber any other registers. In fact, only ccalls clobber any - fixed registers. - Also, it may not modify the counter register (used by genCCall). - - Corollary: If a getRegister for a subexpression returns Fixed, you need - not move it to a fresh temporary before evaluating the next subexpression. - The Fixed register won't be modified. - Therefore, we don't need a counterpart for the x86's getStableReg on PPC. - -* SDM's First Rule is valid for PowerPC, too: subexpressions can depend on - the value of the destination register. --} - -trivialCode - :: Width - -> Bool - -> (Reg -> Reg -> RI -> Instr) - -> CmmExpr - -> CmmExpr - -> NatM Register - -trivialCode rep signed instr x (CmmLit (CmmInt y _)) - | Just imm <- makeImmediate rep signed y - = do - (src1, code1) <- getSomeReg x - let code dst = code1 `snocOL` instr dst src1 (RIImm imm) - return (Any (intFormat rep) code) - -trivialCode rep _ instr x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - let code dst = code1 `appOL` code2 `snocOL` instr dst src1 (RIReg src2) - return (Any (intFormat rep) code) - -shiftMulCode - :: Width - -> Bool - -> (Format-> Reg -> Reg -> RI -> Instr) - -> CmmExpr - -> CmmExpr - -> NatM Register -shiftMulCode width sign instr x (CmmLit (CmmInt y _)) - | Just imm <- makeImmediate width sign y - = do - (src1, code1) <- getSomeReg x - let format = intFormat width - let ins_fmt = intFormat (max W32 width) - let code dst = code1 `snocOL` instr ins_fmt dst src1 (RIImm imm) - return (Any format code) - -shiftMulCode width _ instr x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - let format = intFormat width - let ins_fmt = intFormat (max W32 width) - let code dst = code1 `appOL` code2 - `snocOL` instr ins_fmt dst src1 (RIReg src2) - return (Any format code) - -trivialCodeNoImm' :: Format -> (Reg -> Reg -> Reg -> Instr) - -> CmmExpr -> CmmExpr -> NatM Register -trivialCodeNoImm' format instr x y = do - (src1, code1) <- getSomeReg x - (src2, code2) <- getSomeReg y - let code dst = code1 `appOL` code2 `snocOL` instr dst src1 src2 - return (Any format code) - -trivialCodeNoImm :: Format -> (Format -> Reg -> Reg -> Reg -> Instr) - -> CmmExpr -> CmmExpr -> NatM Register -trivialCodeNoImm format instr x y - = trivialCodeNoImm' format (instr format) x y - -srCode :: Width -> Bool -> (Format-> Reg -> Reg -> RI -> Instr) - -> CmmExpr -> CmmExpr -> NatM Register -srCode width sgn instr x (CmmLit (CmmInt y _)) - | Just imm <- makeImmediate width sgn y - = do - let op_len = max W32 width - extend = if sgn then extendSExpr else extendUExpr - (src1, code1) <- getSomeReg (extend width op_len x) - let code dst = code1 `snocOL` - instr (intFormat op_len) dst src1 (RIImm imm) - return (Any (intFormat width) code) - -srCode width sgn instr x y = do - let op_len = max W32 width - extend = if sgn then extendSExpr else extendUExpr - (src1, code1) <- getSomeReg (extend width op_len x) - (src2, code2) <- getSomeReg (extendUExpr width op_len y) - -- Note: Shift amount `y` is unsigned - let code dst = code1 `appOL` code2 `snocOL` - instr (intFormat op_len) dst src1 (RIReg src2) - return (Any (intFormat width) code) - -divCode :: Width -> Bool -> CmmExpr -> CmmExpr -> NatM Register -divCode width sgn x y = do - let op_len = max W32 width - extend = if sgn then extendSExpr else extendUExpr - (src1, code1) <- getSomeReg (extend width op_len x) - (src2, code2) <- getSomeReg (extend width op_len y) - let code dst = code1 `appOL` code2 `snocOL` - DIV (intFormat op_len) sgn dst src1 src2 - return (Any (intFormat width) code) - - -trivialUCode :: Format - -> (Reg -> Reg -> Instr) - -> CmmExpr - -> NatM Register -trivialUCode rep instr x = do - (src, code) <- getSomeReg x - let code' dst = code `snocOL` instr dst src - return (Any rep code') - --- There is no "remainder" instruction on the PPC, so we have to do --- it the hard way. --- The "sgn" parameter is the signedness for the division instruction - -remainderCode :: Width -> Bool -> Reg -> CmmExpr -> CmmExpr - -> NatM (Reg -> InstrBlock) -remainderCode rep sgn reg_q arg_x arg_y = do - let op_len = max W32 rep - fmt = intFormat op_len - extend = if sgn then extendSExpr else extendUExpr - (x_reg, x_code) <- getSomeReg (extend rep op_len arg_x) - (y_reg, y_code) <- getSomeReg (extend rep op_len arg_y) - return $ \reg_r -> y_code `appOL` x_code - `appOL` toOL [ DIV fmt sgn reg_q x_reg y_reg - , MULL fmt reg_r reg_q (RIReg y_reg) - , SUBF reg_r reg_r x_reg - ] - - -coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register -coerceInt2FP fromRep toRep x = do - dflags <- getDynFlags - let arch = platformArch $ targetPlatform dflags - coerceInt2FP' arch fromRep toRep x - -coerceInt2FP' :: Arch -> Width -> Width -> CmmExpr -> NatM Register -coerceInt2FP' ArchPPC fromRep toRep x = do - (src, code) <- getSomeReg x - lbl <- getNewLabelNat - itmp <- getNewRegNat II32 - ftmp <- getNewRegNat FF64 - dflags <- getDynFlags - dynRef <- cmmMakeDynamicReference dflags DataReference lbl - Amode addr addr_code <- getAmode D dynRef - let - code' dst = code `appOL` maybe_exts `appOL` toOL [ - LDATA (Section ReadOnlyData lbl) $ RawCmmStatics lbl - [CmmStaticLit (CmmInt 0x43300000 W32), - CmmStaticLit (CmmInt 0x80000000 W32)], - XORIS itmp src (ImmInt 0x8000), - ST II32 itmp (spRel dflags 3), - LIS itmp (ImmInt 0x4330), - ST II32 itmp (spRel dflags 2), - LD FF64 ftmp (spRel dflags 2) - ] `appOL` addr_code `appOL` toOL [ - LD FF64 dst addr, - FSUB FF64 dst ftmp dst - ] `appOL` maybe_frsp dst - - maybe_exts = case fromRep of - W8 -> unitOL $ EXTS II8 src src - W16 -> unitOL $ EXTS II16 src src - W32 -> nilOL - _ -> panic "PPC.CodeGen.coerceInt2FP: no match" - - maybe_frsp dst - = case toRep of - W32 -> unitOL $ FRSP dst dst - W64 -> nilOL - _ -> panic "PPC.CodeGen.coerceInt2FP: no match" - - return (Any (floatFormat toRep) code') - --- On an ELF v1 Linux we use the compiler doubleword in the stack frame --- this is the TOC pointer doubleword on ELF v2 Linux. The latter is only --- set right before a call and restored right after return from the call. --- So it is fine. -coerceInt2FP' (ArchPPC_64 _) fromRep toRep x = do - (src, code) <- getSomeReg x - dflags <- getDynFlags - let - code' dst = code `appOL` maybe_exts `appOL` toOL [ - ST II64 src (spRel dflags 3), - LD FF64 dst (spRel dflags 3), - FCFID dst dst - ] `appOL` maybe_frsp dst - - maybe_exts = case fromRep of - W8 -> unitOL $ EXTS II8 src src - W16 -> unitOL $ EXTS II16 src src - W32 -> unitOL $ EXTS II32 src src - W64 -> nilOL - _ -> panic "PPC.CodeGen.coerceInt2FP: no match" - - maybe_frsp dst - = case toRep of - W32 -> unitOL $ FRSP dst dst - W64 -> nilOL - _ -> panic "PPC.CodeGen.coerceInt2FP: no match" - - return (Any (floatFormat toRep) code') - -coerceInt2FP' _ _ _ _ = panic "PPC.CodeGen.coerceInt2FP: unknown arch" - - -coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register -coerceFP2Int fromRep toRep x = do - dflags <- getDynFlags - let arch = platformArch $ targetPlatform dflags - coerceFP2Int' arch fromRep toRep x - -coerceFP2Int' :: Arch -> Width -> Width -> CmmExpr -> NatM Register -coerceFP2Int' ArchPPC _ toRep x = do - dflags <- getDynFlags - -- the reps don't really matter: F*->FF64 and II32->I* are no-ops - (src, code) <- getSomeReg x - tmp <- getNewRegNat FF64 - let - code' dst = code `appOL` toOL [ - -- convert to int in FP reg - FCTIWZ tmp src, - -- store value (64bit) from FP to stack - ST FF64 tmp (spRel dflags 2), - -- read low word of value (high word is undefined) - LD II32 dst (spRel dflags 3)] - return (Any (intFormat toRep) code') - -coerceFP2Int' (ArchPPC_64 _) _ toRep x = do - dflags <- getDynFlags - -- the reps don't really matter: F*->FF64 and II64->I* are no-ops - (src, code) <- getSomeReg x - tmp <- getNewRegNat FF64 - let - code' dst = code `appOL` toOL [ - -- convert to int in FP reg - FCTIDZ tmp src, - -- store value (64bit) from FP to compiler word on stack - ST FF64 tmp (spRel dflags 3), - LD II64 dst (spRel dflags 3)] - return (Any (intFormat toRep) code') - -coerceFP2Int' _ _ _ _ = panic "PPC.CodeGen.coerceFP2Int: unknown arch" - --- Note [.LCTOC1 in PPC PIC code] --- The .LCTOC1 label is defined to point 32768 bytes into the GOT table --- to make the most of the PPC's 16-bit displacements. --- As 16-bit signed offset is used (usually via addi/lwz instructions) --- first element will have '-32768' offset against .LCTOC1. - --- Note [implicit register in PPC PIC code] --- PPC generates calls by labels in assembly --- in form of: --- bl puts+32768@plt --- in this form it's not seen directly (by GHC NCG) --- that r30 (PicBaseReg) is used, --- but r30 is a required part of PLT code setup: --- puts+32768@plt: --- lwz r11,-30484(r30) ; offset in .LCTOC1 --- mtctr r11 --- bctr |