summaryrefslogtreecommitdiff
path: root/compiler/nativeGen/SPARC/CodeGen.hs
diff options
context:
space:
mode:
authorIan Lynagh <igloo@earth.li>2012-02-14 21:26:18 +0000
committerIan Lynagh <igloo@earth.li>2012-02-14 23:08:26 +0000
commit7bfb7bfc6da981ef827b1a166c8cbfb5b29a25a4 (patch)
tree64f62969824858ce141d89bc52ffc7e71ed236f9 /compiler/nativeGen/SPARC/CodeGen.hs
parent8c0196b48d043fe16eb5b2d343f5544b7fdd5004 (diff)
downloadhaskell-7bfb7bfc6da981ef827b1a166c8cbfb5b29a25a4.tar.gz
Define a quotRem CallishMachOp; fixes #5598
This means we no longer do a division twice when we are using quotRem (on platforms on which the op is supported; currently only amd64).
Diffstat (limited to 'compiler/nativeGen/SPARC/CodeGen.hs')
-rw-r--r--compiler/nativeGen/SPARC/CodeGen.hs499
1 files changed, 410 insertions, 89 deletions
diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs
index 4c295f11d5..f8e71f4aef 100644
--- a/compiler/nativeGen/SPARC/CodeGen.hs
+++ b/compiler/nativeGen/SPARC/CodeGen.hs
@@ -6,18 +6,11 @@
--
-----------------------------------------------------------------------------
-{-# OPTIONS -fno-warn-tabs #-}
--- The above warning supression flag is a temporary kludge.
--- While working on this module you are encouraged to remove it and
--- detab the module (please do the detabbing in a separate patch). See
--- http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces
--- for details
-
-module SPARC.CodeGen (
- cmmTopCodeGen,
- generateJumpTableForInstr,
- InstrBlock
-)
+module SPARC.CodeGen (
+ cmmTopCodeGen,
+ generateJumpTableForInstr,
+ InstrBlock
+)
where
@@ -26,18 +19,19 @@ where
#include "../includes/MachDeps.h"
-- NCG stuff:
+import SPARC.Base
import SPARC.CodeGen.Sanity
import SPARC.CodeGen.Amode
import SPARC.CodeGen.CondCode
import SPARC.CodeGen.Gen64
import SPARC.CodeGen.Gen32
-import SPARC.CodeGen.CCall
import SPARC.CodeGen.Base
-import SPARC.Ppr ()
+import SPARC.Ppr ()
import SPARC.Instr
import SPARC.Imm
import SPARC.AddrMode
import SPARC.Regs
+import SPARC.Stack
import Instruction
import Size
import NCGMonad
@@ -45,17 +39,23 @@ import NCGMonad
-- Our intermediate code:
import BlockId
import OldCmm
+import OldCmmUtils
+import PIC
+import Reg
import CLabel
+import CPrim
-- The rest:
+import BasicTypes
import DynFlags
-import StaticFlags ( opt_PIC )
+import FastString
+import StaticFlags ( opt_PIC )
import OrdList
import Outputable
import Platform
import Unique
-import Control.Monad ( mapAndUnzipM )
+import Control.Monad ( mapAndUnzipM )
-- | Top level code generation
cmmTopCodeGen :: RawCmmDecl
@@ -77,10 +77,10 @@ cmmTopCodeGen (CmmData sec dat) = do
-- | Do code generation on a single block of CMM code.
--- code generation may introduce new basic block boundaries, which
--- are indicated by the NEWBLOCK instruction. We must split up the
--- instruction stream into basic blocks again. Also, we extract
--- LDATAs here too.
+-- code generation may introduce new basic block boundaries, which
+-- are indicated by the NEWBLOCK instruction. We must split up the
+-- instruction stream into basic blocks again. Also, we extract
+-- LDATAs here too.
basicBlockCodeGen :: Platform
-> CmmBasicBlock
-> NatM ( [NatBasicBlock Instr]
@@ -89,22 +89,22 @@ basicBlockCodeGen :: Platform
basicBlockCodeGen platform cmm@(BasicBlock id stmts) = do
instrs <- stmtsToInstrs stmts
let
- (top,other_blocks,statics)
- = foldrOL mkBlocks ([],[],[]) instrs
-
- mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
- = ([], BasicBlock id instrs : blocks, statics)
+ (top,other_blocks,statics)
+ = foldrOL mkBlocks ([],[],[]) instrs
- mkBlocks (LDATA sec dat) (instrs,blocks,statics)
- = (instrs, blocks, CmmData sec dat:statics)
+ mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
+ = ([], BasicBlock id instrs : blocks, statics)
- mkBlocks instr (instrs,blocks,statics)
- = (instr:instrs, blocks, statics)
+ mkBlocks (LDATA sec dat) (instrs,blocks,statics)
+ = (instrs, blocks, CmmData sec dat:statics)
- -- do intra-block sanity checking
- blocksChecked
- = map (checkBlock platform cmm)
- $ BasicBlock id top : other_blocks
+ mkBlocks instr (instrs,blocks,statics)
+ = (instr:instrs, blocks, statics)
+
+ -- do intra-block sanity checking
+ blocksChecked
+ = map (checkBlock platform cmm)
+ $ BasicBlock id top : other_blocks
return (blocksChecked, statics)
@@ -118,32 +118,32 @@ stmtsToInstrs stmts
stmtToInstrs :: CmmStmt -> NatM InstrBlock
stmtToInstrs stmt = case stmt of
- CmmNop -> return nilOL
+ CmmNop -> return nilOL
CmmComment s -> return (unitOL (COMMENT s))
CmmAssign reg src
- | isFloatType ty -> assignReg_FltCode size reg src
- | isWord64 ty -> assignReg_I64Code reg src
- | otherwise -> assignReg_IntCode size reg src
- where ty = cmmRegType reg
- size = cmmTypeSize ty
+ | isFloatType ty -> assignReg_FltCode size reg src
+ | isWord64 ty -> assignReg_I64Code reg src
+ | otherwise -> assignReg_IntCode size reg src
+ where ty = cmmRegType reg
+ size = cmmTypeSize ty
CmmStore addr src
- | isFloatType ty -> assignMem_FltCode size addr src
- | isWord64 ty -> assignMem_I64Code addr src
- | otherwise -> assignMem_IntCode size addr src
- where ty = cmmExprType src
- size = cmmTypeSize ty
+ | isFloatType ty -> assignMem_FltCode size addr src
+ | isWord64 ty -> assignMem_I64Code addr src
+ | otherwise -> assignMem_IntCode size addr src
+ where ty = cmmExprType src
+ size = cmmTypeSize ty
CmmCall target result_regs args _
-> genCCall target result_regs args
- CmmBranch id -> genBranch id
- CmmCondBranch arg id -> genCondJump id arg
- CmmSwitch arg ids -> genSwitch arg ids
- CmmJump arg _ -> genJump arg
+ CmmBranch id -> genBranch id
+ CmmCondBranch arg id -> genCondJump id arg
+ CmmSwitch arg ids -> genSwitch arg ids
+ CmmJump arg _ -> genJump arg
- CmmReturn
+ CmmReturn
-> panic "stmtToInstrs: return statement should have been cps'd away"
@@ -198,8 +198,8 @@ assignReg_IntCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
assignReg_IntCode _ reg src = do
r <- getRegister src
return $ case r of
- Any _ code -> code dst
- Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg freg) dst
+ Any _ code -> code dst
+ Fixed _ freg fcode -> fcode `snocOL` OR False g0 (RIReg freg) dst
where
dst = getRegisterReg reg
@@ -212,23 +212,23 @@ assignMem_FltCode pk addr src = do
(src__2, code2) <- getSomeReg src
tmp1 <- getNewRegNat pk
let
- pk__2 = cmmExprType src
- code__2 = code1 `appOL` code2 `appOL`
- if sizeToWidth pk == typeWidth pk__2
+ pk__2 = cmmExprType src
+ code__2 = code1 `appOL` code2 `appOL`
+ if sizeToWidth pk == typeWidth pk__2
then unitOL (ST pk src__2 dst__2)
- else toOL [ FxTOy (cmmTypeSize pk__2) pk src__2 tmp1
- , ST pk tmp1 dst__2]
+ else toOL [ FxTOy (cmmTypeSize pk__2) pk src__2 tmp1
+ , ST pk tmp1 dst__2]
return code__2
-- Floating point assignment to a register/temporary
assignReg_FltCode :: Size -> CmmReg -> CmmExpr -> NatM InstrBlock
assignReg_FltCode pk dstCmmReg srcCmmExpr = do
srcRegister <- getRegister srcCmmExpr
- let dstReg = getRegisterReg dstCmmReg
+ let dstReg = getRegisterReg dstCmmReg
return $ case srcRegister of
- Any _ code -> code dstReg
- Fixed _ srcFixedReg srcCode -> srcCode `snocOL` FMOV pk srcFixedReg dstReg
+ Any _ code -> code dstReg
+ Fixed _ srcFixedReg srcCode -> srcCode `snocOL` FMOV pk srcFixedReg dstReg
@@ -243,7 +243,7 @@ genJump (CmmLit (CmmLabel lbl))
genJump tree
= do
(target, code) <- getSomeReg tree
- return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP)
+ return (code `snocOL` JMP (AddrRegReg target g0) `snocOL` NOP)
-- -----------------------------------------------------------------------------
-- Unconditional branches
@@ -272,7 +272,7 @@ allocator.
genCondJump
- :: BlockId -- the branch target
+ :: BlockId -- the branch target
-> CmmExpr -- the condition on which to branch
-> NatM InstrBlock
@@ -281,7 +281,7 @@ genCondJump
genCondJump bid bool = do
CondCode is_float cond code <- getCondCode bool
return (
- code `appOL`
+ code `appOL`
toOL (
if is_float
then [NOP, BF cond False bid, NOP]
@@ -296,34 +296,355 @@ genCondJump bid bool = do
genSwitch :: CmmExpr -> [Maybe BlockId] -> NatM InstrBlock
genSwitch expr ids
- | opt_PIC
- = error "MachCodeGen: sparc genSwitch PIC not finished\n"
-
- | otherwise
- = do (e_reg, e_code) <- getSomeReg expr
-
- base_reg <- getNewRegNat II32
- offset_reg <- getNewRegNat II32
- dst <- getNewRegNat II32
-
- label <- getNewLabelNat
-
- return $ e_code `appOL`
- toOL
- [ -- load base of jump table
- SETHI (HI (ImmCLbl label)) base_reg
- , OR False base_reg (RIImm $ LO $ ImmCLbl label) base_reg
-
- -- the addrs in the table are 32 bits wide..
- , SLL e_reg (RIImm $ ImmInt 2) offset_reg
-
- -- load and jump to the destination
- , LD II32 (AddrRegReg base_reg offset_reg) dst
- , JMP_TBL (AddrRegImm dst (ImmInt 0)) ids label
- , NOP ]
+ | opt_PIC
+ = error "MachCodeGen: sparc genSwitch PIC not finished\n"
+
+ | otherwise
+ = do (e_reg, e_code) <- getSomeReg expr
+
+ base_reg <- getNewRegNat II32
+ offset_reg <- getNewRegNat II32
+ dst <- getNewRegNat II32
+
+ label <- getNewLabelNat
+
+ return $ e_code `appOL`
+ toOL
+ [ -- load base of jump table
+ SETHI (HI (ImmCLbl label)) base_reg
+ , OR False base_reg (RIImm $ LO $ ImmCLbl label) base_reg
+
+ -- the addrs in the table are 32 bits wide..
+ , SLL e_reg (RIImm $ ImmInt 2) offset_reg
+
+ -- load and jump to the destination
+ , LD II32 (AddrRegReg base_reg offset_reg) dst
+ , JMP_TBL (AddrRegImm dst (ImmInt 0)) ids label
+ , NOP ]
generateJumpTableForInstr :: Instr -> Maybe (NatCmmDecl CmmStatics Instr)
generateJumpTableForInstr (JMP_TBL _ ids label) =
- let jumpTable = map jumpTableEntry ids
- in Just (CmmData ReadOnlyData (Statics label jumpTable))
+ let jumpTable = map jumpTableEntry ids
+ in Just (CmmData ReadOnlyData (Statics label jumpTable))
generateJumpTableForInstr _ = Nothing
+
+
+
+-- -----------------------------------------------------------------------------
+-- Generating C calls
+
+{-
+ Now the biggest nightmare---calls. Most of the nastiness is buried in
+ @get_arg@, which moves the arguments to the correct registers/stack
+ locations. Apart from that, the code is easy.
+
+ The SPARC calling convention is an absolute
+ nightmare. The first 6x32 bits of arguments are mapped into
+ %o0 through %o5, and the remaining arguments are dumped to the
+ stack, beginning at [%sp+92]. (Note that %o6 == %sp.)
+
+ If we have to put args on the stack, move %o6==%sp down by
+ the number of words to go on the stack, to ensure there's enough space.
+
+ According to Fraser and Hanson's lcc book, page 478, fig 17.2,
+ 16 words above the stack pointer is a word for the address of
+ a structure return value. I use this as a temporary location
+ for moving values from float to int regs. Certainly it isn't
+ safe to put anything in the 16 words starting at %sp, since
+ this area can get trashed at any time due to window overflows
+ caused by signal handlers.
+
+ A final complication (if the above isn't enough) is that
+ we can't blithely calculate the arguments one by one into
+ %o0 .. %o5. Consider the following nested calls:
+
+ fff a (fff b c)
+
+ Naive code moves a into %o0, and (fff b c) into %o1. Unfortunately
+ the inner call will itself use %o0, which trashes the value put there
+ in preparation for the outer call. Upshot: we need to calculate the
+ args into temporary regs, and move those to arg regs or onto the
+ stack only immediately prior to the call proper. Sigh.
+-}
+
+genCCall
+ :: CmmCallTarget -- function to call
+ -> [HintedCmmFormal] -- where to put the result
+ -> [HintedCmmActual] -- arguments (of mixed type)
+ -> NatM InstrBlock
+
+
+
+-- On SPARC under TSO (Total Store Ordering), writes earlier in the instruction stream
+-- are guaranteed to take place before writes afterwards (unlike on PowerPC).
+-- Ref: Section 8.4 of the SPARC V9 Architecture manual.
+--
+-- In the SPARC case we don't need a barrier.
+--
+genCCall (CmmPrim (MO_WriteBarrier)) _ _
+ = do return nilOL
+
+genCCall (CmmPrim op) results args
+ | Just stmts <- expandCallishMachOp op results args
+ = stmtsToInstrs stmts
+
+genCCall target dest_regs argsAndHints
+ = do
+ -- need to remove alignment information
+ let argsAndHints' | (CmmPrim mop) <- target,
+ (mop == MO_Memcpy ||
+ mop == MO_Memset ||
+ mop == MO_Memmove)
+ = init argsAndHints
+
+ | otherwise
+ = argsAndHints
+
+ -- strip hints from the arg regs
+ let args :: [CmmExpr]
+ args = map hintlessCmm argsAndHints'
+
+
+ -- work out the arguments, and assign them to integer regs
+ argcode_and_vregs <- mapM arg_to_int_vregs args
+ let (argcodes, vregss) = unzip argcode_and_vregs
+ let vregs = concat vregss
+
+ let n_argRegs = length allArgRegs
+ let n_argRegs_used = min (length vregs) n_argRegs
+
+
+ -- deal with static vs dynamic call targets
+ callinsns <- case target of
+ CmmCallee (CmmLit (CmmLabel lbl)) _ ->
+ return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
+
+ CmmCallee expr _
+ -> do (dyn_c, [dyn_r]) <- arg_to_int_vregs expr
+ return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
+
+ CmmPrim mop
+ -> do res <- outOfLineMachOp mop
+ lblOrMopExpr <- case res of
+ Left lbl -> do
+ return (unitOL (CALL (Left (litToImm (CmmLabel lbl))) n_argRegs_used False))
+
+ Right mopExpr -> do
+ (dyn_c, [dyn_r]) <- arg_to_int_vregs mopExpr
+ return (dyn_c `snocOL` CALL (Right dyn_r) n_argRegs_used False)
+
+ return lblOrMopExpr
+
+ let argcode = concatOL argcodes
+
+ let (move_sp_down, move_sp_up)
+ = let diff = length vregs - n_argRegs
+ nn = if odd diff then diff + 1 else diff -- keep 8-byte alignment
+ in if nn <= 0
+ then (nilOL, nilOL)
+ else (unitOL (moveSp (-1*nn)), unitOL (moveSp (1*nn)))
+
+ let transfer_code
+ = toOL (move_final vregs allArgRegs extraStackArgsHere)
+
+ dflags <- getDynFlags
+ return
+ $ argcode `appOL`
+ move_sp_down `appOL`
+ transfer_code `appOL`
+ callinsns `appOL`
+ unitOL NOP `appOL`
+ move_sp_up `appOL`
+ assign_code (targetPlatform dflags) dest_regs
+
+
+-- | Generate code to calculate an argument, and move it into one
+-- or two integer vregs.
+arg_to_int_vregs :: CmmExpr -> NatM (OrdList Instr, [Reg])
+arg_to_int_vregs arg
+
+ -- If the expr produces a 64 bit int, then we can just use iselExpr64
+ | isWord64 (cmmExprType arg)
+ = do (ChildCode64 code r_lo) <- iselExpr64 arg
+ let r_hi = getHiVRegFromLo r_lo
+ return (code, [r_hi, r_lo])
+
+ | otherwise
+ = do (src, code) <- getSomeReg arg
+ let pk = cmmExprType arg
+
+ case cmmTypeSize pk of
+
+ -- Load a 64 bit float return value into two integer regs.
+ FF64 -> do
+ v1 <- getNewRegNat II32
+ v2 <- getNewRegNat II32
+
+ let code2 =
+ code `snocOL`
+ FMOV FF64 src f0 `snocOL`
+ ST FF32 f0 (spRel 16) `snocOL`
+ LD II32 (spRel 16) v1 `snocOL`
+ ST FF32 f1 (spRel 16) `snocOL`
+ LD II32 (spRel 16) v2
+
+ return (code2, [v1,v2])
+
+ -- Load a 32 bit float return value into an integer reg
+ FF32 -> do
+ v1 <- getNewRegNat II32
+
+ let code2 =
+ code `snocOL`
+ ST FF32 src (spRel 16) `snocOL`
+ LD II32 (spRel 16) v1
+
+ return (code2, [v1])
+
+ -- Move an integer return value into its destination reg.
+ _ -> do
+ v1 <- getNewRegNat II32
+
+ let code2 =
+ code `snocOL`
+ OR False g0 (RIReg src) v1
+
+ return (code2, [v1])
+
+
+-- | Move args from the integer vregs into which they have been
+-- marshalled, into %o0 .. %o5, and the rest onto the stack.
+--
+move_final :: [Reg] -> [Reg] -> Int -> [Instr]
+
+-- all args done
+move_final [] _ _
+ = []
+
+-- out of aregs; move to stack
+move_final (v:vs) [] offset
+ = ST II32 v (spRel offset)
+ : move_final vs [] (offset+1)
+
+-- move into an arg (%o[0..5]) reg
+move_final (v:vs) (a:az) offset
+ = OR False g0 (RIReg v) a
+ : move_final vs az offset
+
+
+-- | Assign results returned from the call into their
+-- desination regs.
+--
+assign_code :: Platform -> [CmmHinted LocalReg] -> OrdList Instr
+
+assign_code _ [] = nilOL
+
+assign_code platform [CmmHinted dest _hint]
+ = let rep = localRegType dest
+ width = typeWidth rep
+ r_dest = getRegisterReg (CmmLocal dest)
+
+ result
+ | isFloatType rep
+ , W32 <- width
+ = unitOL $ FMOV FF32 (regSingle $ fReg 0) r_dest
+
+ | isFloatType rep
+ , W64 <- width
+ = unitOL $ FMOV FF64 (regSingle $ fReg 0) r_dest
+
+ | not $ isFloatType rep
+ , W32 <- width
+ = unitOL $ mkRegRegMoveInstr platform (regSingle $ oReg 0) r_dest
+
+ | not $ isFloatType rep
+ , W64 <- width
+ , r_dest_hi <- getHiVRegFromLo r_dest
+ = toOL [ mkRegRegMoveInstr platform (regSingle $ oReg 0) r_dest_hi
+ , mkRegRegMoveInstr platform (regSingle $ oReg 1) r_dest]
+
+ | otherwise
+ = panic "SPARC.CodeGen.GenCCall: no match"
+
+ in result
+
+assign_code _ _
+ = panic "SPARC.CodeGen.GenCCall: no match"
+
+
+
+-- | Generate a call to implement an out-of-line floating point operation
+outOfLineMachOp
+ :: CallishMachOp
+ -> NatM (Either CLabel CmmExpr)
+
+outOfLineMachOp mop
+ = do let functionName
+ = outOfLineMachOp_table mop
+
+ dflags <- getDynFlags
+ mopExpr <- cmmMakeDynamicReference dflags addImportNat CallReference
+ $ mkForeignLabel functionName Nothing ForeignLabelInExternalPackage IsFunction
+
+ let mopLabelOrExpr
+ = case mopExpr of
+ CmmLit (CmmLabel lbl) -> Left lbl
+ _ -> Right mopExpr
+
+ return mopLabelOrExpr
+
+
+-- | Decide what C function to use to implement a CallishMachOp
+--
+outOfLineMachOp_table
+ :: CallishMachOp
+ -> FastString
+
+outOfLineMachOp_table mop
+ = case mop of
+ MO_F32_Exp -> fsLit "expf"
+ MO_F32_Log -> fsLit "logf"
+ MO_F32_Sqrt -> fsLit "sqrtf"
+ MO_F32_Pwr -> fsLit "powf"
+
+ MO_F32_Sin -> fsLit "sinf"
+ MO_F32_Cos -> fsLit "cosf"
+ MO_F32_Tan -> fsLit "tanf"
+
+ MO_F32_Asin -> fsLit "asinf"
+ MO_F32_Acos -> fsLit "acosf"
+ MO_F32_Atan -> fsLit "atanf"
+
+ MO_F32_Sinh -> fsLit "sinhf"
+ MO_F32_Cosh -> fsLit "coshf"
+ MO_F32_Tanh -> fsLit "tanhf"
+
+ MO_F64_Exp -> fsLit "exp"
+ MO_F64_Log -> fsLit "log"
+ MO_F64_Sqrt -> fsLit "sqrt"
+ MO_F64_Pwr -> fsLit "pow"
+
+ MO_F64_Sin -> fsLit "sin"
+ MO_F64_Cos -> fsLit "cos"
+ MO_F64_Tan -> fsLit "tan"
+
+ MO_F64_Asin -> fsLit "asin"
+ MO_F64_Acos -> fsLit "acos"
+ MO_F64_Atan -> fsLit "atan"
+
+ MO_F64_Sinh -> fsLit "sinh"
+ MO_F64_Cosh -> fsLit "cosh"
+ MO_F64_Tanh -> fsLit "tanh"
+
+ MO_Memcpy -> fsLit "memcpy"
+ MO_Memset -> fsLit "memset"
+ MO_Memmove -> fsLit "memmove"
+
+ MO_PopCnt w -> fsLit $ popCntLabel w
+
+ MO_S_QuotRem {} -> unsupported
+ MO_WriteBarrier -> unsupported
+ MO_Touch -> unsupported
+ where unsupported = panic ("outOfLineCmmOp: " ++ show mop
+ ++ " not supported here")
+