1 files changed, 625 insertions, 0 deletions
diff --git a/compiler/nativeGen/SPARC/CodeGen/Gen32.hs b/compiler/nativeGen/SPARC/CodeGen/Gen32.hs
new file mode 100644
index 0000000000..9a623d9218
--- /dev/null
+++ b/compiler/nativeGen/SPARC/CodeGen/Gen32.hs
@@ -0,0 +1,625 @@
+
+-- | Evaluation of 32 bit values.
+module SPARC.CodeGen.Gen32 (
+	getSomeReg,
+	getRegister
+)
+
+where
+
+import SPARC.CodeGen.CondCode
+import SPARC.CodeGen.Amode
+import SPARC.CodeGen.Gen64
+import SPARC.CodeGen.Base
+import SPARC.Stack
+import SPARC.Instr
+import SPARC.Cond
+import SPARC.AddrMode
+import SPARC.Imm
+import SPARC.Regs
+import SPARC.Base
+import NCGMonad
+import Size
+import Reg
+
+import Cmm
+import BlockId
+
+import OrdList
+import Outputable
+
+-- | The dual to getAnyReg: compute an expression into a register, but
+-- 	we don't mind which one it is.
+getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
+getSomeReg expr = do
+  r <- getRegister expr
+  case r of
+    Any rep code -> do
+	tmp <- getNewRegNat rep
+	return (tmp, code tmp)
+    Fixed _ reg code -> 
+	return (reg, code)
+
+
+
+-- | Make code to evaluate a 32 bit expression.
+--
+getRegister :: CmmExpr -> NatM Register
+
+getRegister (CmmReg reg) 
+  = return (Fixed (cmmTypeSize (cmmRegType reg)) 
+		  (getRegisterReg reg) nilOL)
+
+getRegister tree@(CmmRegOff _ _) 
+  = getRegister (mangleIndexTree tree)
+
+getRegister (CmmMachOp (MO_UU_Conv W64 W32)
+             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_SS_Conv W64 W32)
+             [CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 (getHiVRegFromLo rlo) code
+
+getRegister (CmmMachOp (MO_UU_Conv W64 W32) [x]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 rlo code
+
+getRegister (CmmMachOp (MO_SS_Conv W64 W32) [x]) = do
+  ChildCode64 code rlo <- iselExpr64 x
+  return $ Fixed II32 rlo code       
+
+
+-- Load a literal float into a float register.
+--	The actual literal is stored in a new data area, and we load it 
+--	at runtime.
+getRegister (CmmLit (CmmFloat f W32)) = do
+
+    -- a label for the new data area
+    lbl <- getNewLabelNat
+    tmp <- getNewRegNat II32
+
+    let code dst = toOL [
+            -- the data area         
+	    LDATA ReadOnlyData
+	                [CmmDataLabel lbl,
+			 CmmStaticLit (CmmFloat f W32)],
+
+            -- load the literal
+	    SETHI (HI (ImmCLbl lbl)) tmp,
+	    LD II32 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst] 
+
+    return (Any FF32 code)
+
+getRegister (CmmLit (CmmFloat d W64)) = do
+    lbl <- getNewLabelNat
+    tmp <- getNewRegNat II32
+    let code dst = toOL [
+	    LDATA ReadOnlyData
+	                [CmmDataLabel lbl,
+			 CmmStaticLit (CmmFloat d W64)],
+	    SETHI (HI (ImmCLbl lbl)) tmp,
+	    LD II64 (AddrRegImm tmp (LO (ImmCLbl lbl))) dst] 
+    return (Any FF64 code)
+
+getRegister (CmmMachOp mop [x]) -- unary MachOps
+  = case mop of
+      MO_F_Neg W32     -> trivialUFCode FF32 (FNEG FF32) x
+      MO_F_Neg W64     -> trivialUFCode FF64 (FNEG FF64) x
+
+      MO_S_Neg rep     -> trivialUCode (intSize rep) (SUB False False g0) x
+      MO_Not rep       -> trivialUCode (intSize rep) (XNOR False g0) x
+
+      MO_FF_Conv W64 W32-> coerceDbl2Flt x
+      MO_FF_Conv W32 W64-> coerceFlt2Dbl x
+
+      MO_FS_Conv from to -> coerceFP2Int from to x
+      MO_SF_Conv from to -> coerceInt2FP from to x
+
+      -- Conversions which are a nop on sparc
+      MO_UU_Conv from to
+	| from == to    -> conversionNop (intSize to)  x
+      MO_UU_Conv W32 W8 -> trivialCode W8 (AND False) x (CmmLit (CmmInt 255 W8))
+      MO_UU_Conv W32 to -> conversionNop (intSize to) x
+      MO_SS_Conv W32 to -> conversionNop (intSize to) x
+
+      MO_UU_Conv W8  to@W32  -> conversionNop (intSize to)  x
+      MO_UU_Conv W16 to@W32  -> conversionNop (intSize to)  x
+      MO_UU_Conv W8  to@W16  -> conversionNop (intSize to)  x
+
+      -- sign extension
+      MO_SS_Conv W8  W32  -> integerExtend W8  W32 x
+      MO_SS_Conv W16 W32  -> integerExtend W16 W32 x
+      MO_SS_Conv W8  W16  -> integerExtend W8  W16 x
+
+      _ 		  -> panic ("Unknown unary mach op: " ++ show mop)
+
+
+getRegister (CmmMachOp mop [x, y]) -- dyadic PrimOps
+  = case mop of
+      MO_Eq _ 		-> condIntReg EQQ x y
+      MO_Ne _ 		-> condIntReg NE x y
+
+      MO_S_Gt _		-> condIntReg GTT x y
+      MO_S_Ge _		-> condIntReg GE x y
+      MO_S_Lt _		-> condIntReg LTT x y
+      MO_S_Le _		-> condIntReg LE x y
+	      
+      MO_U_Gt W32  	-> condIntReg GTT x y
+      MO_U_Ge W32  	-> condIntReg GE x y
+      MO_U_Lt W32  	-> condIntReg LTT x y
+      MO_U_Le W32  	-> condIntReg LE x y
+
+      MO_U_Gt W16 	-> condIntReg GU  x y
+      MO_U_Ge W16	-> condIntReg GEU x y
+      MO_U_Lt W16 	-> condIntReg LU  x y
+      MO_U_Le W16 	-> condIntReg LEU x y
+
+      MO_Add W32 	-> trivialCode W32 (ADD False False) x y
+      MO_Sub W32 	-> trivialCode W32 (SUB False False) x y
+
+      MO_S_MulMayOflo rep -> imulMayOflo rep x y
+
+      MO_S_Quot W32 	-> idiv True  False x y
+      MO_U_Quot W32 	-> idiv False False x y
+       
+      MO_S_Rem  W32	-> irem True  x y
+      MO_U_Rem	W32	-> irem False x y
+       
+      MO_F_Eq _ 	-> condFltReg EQQ x y
+      MO_F_Ne _ 	-> condFltReg NE x y
+
+      MO_F_Gt _ 	-> condFltReg GTT x y
+      MO_F_Ge _ 	-> condFltReg GE x y 
+      MO_F_Lt _ 	-> condFltReg LTT x y
+      MO_F_Le _ 	-> condFltReg LE x y
+
+      MO_F_Add  w	-> trivialFCode w FADD x y
+      MO_F_Sub  w  	-> trivialFCode w FSUB x y
+      MO_F_Mul  w  	-> trivialFCode w FMUL x y
+      MO_F_Quot w  	-> trivialFCode w FDIV x y
+
+      MO_And rep   	-> trivialCode rep (AND False) x y
+      MO_Or  rep    	-> trivialCode rep (OR  False) x y
+      MO_Xor rep   	-> trivialCode rep (XOR False) x y
+
+      MO_Mul rep 	-> trivialCode rep (SMUL False) x y
+
+      MO_Shl rep   	-> trivialCode rep SLL  x y
+      MO_U_Shr rep   	-> trivialCode rep SRL x y
+      MO_S_Shr rep   	-> trivialCode rep SRA x y
+
+      _			-> pprPanic "getRegister(sparc) - binary CmmMachOp (1)" (pprMachOp mop)
+  where
+
+
+getRegister (CmmLoad mem pk) = do
+    Amode src code <- getAmode mem
+    let
+	code__2 dst 	= code `snocOL` LD (cmmTypeSize pk) src dst
+    return (Any (cmmTypeSize pk) code__2)
+
+getRegister (CmmLit (CmmInt i _))
+  | fits13Bits i
+  = let
+    	src = ImmInt (fromInteger i)
+    	code dst = unitOL (OR False g0 (RIImm src) dst)
+    in
+    	return (Any II32 code)
+
+getRegister (CmmLit lit)
+  = let imm = litToImm lit
+    	code dst = toOL [
+    	    SETHI (HI imm) dst,
+    	    OR False dst (RIImm (LO imm)) dst]
+    in return (Any II32 code)
+
+
+getRegister _
+	= panic "SPARC.CodeGen.Gen32.getRegister: no match"
+
+
+-- | sign extend and widen
+integerExtend 
+	:: Width 		-- ^ width of source expression
+	-> Width 		-- ^ width of result
+	-> CmmExpr 		-- ^ source expression
+	-> NatM Register	
+
+integerExtend from to expr
+ = do	-- load the expr into some register
+ 	(reg, e_code) 	<- getSomeReg expr
+	tmp		<- getNewRegNat II32
+	let bitCount
+		= case (from, to) of
+			(W8,  W32)	-> 24
+			(W16, W32)	-> 16
+			(W8,  W16)	-> 24
+			_		-> panic "SPARC.CodeGen.Gen32: no match"
+ 	let code dst
+		= e_code 	
+
+		-- local shift word left to load the sign bit
+		`snocOL`  SLL reg (RIImm (ImmInt bitCount)) tmp
+			
+		-- arithmetic shift right to sign extend
+		`snocOL`  SRA tmp (RIImm (ImmInt bitCount)) dst
+			
+	return (Any (intSize to) code)
+				
+
+conversionNop
+	:: Size -> CmmExpr -> NatM Register
+conversionNop new_rep expr
+ = do	e_code <- getRegister expr
+	return (setSizeOfRegister e_code new_rep)
+
+
+
+-- | Generate an integer division instruction.
+idiv :: Bool -> Bool -> CmmExpr -> CmmExpr -> NatM Register
+	
+-- For unsigned division with a 32 bit numerator, 
+--		we can just clear the Y register.
+idiv False cc x y 
+ = do
+	(a_reg, a_code)		<- getSomeReg x
+       	(b_reg, b_code)		<- getSomeReg y
+	
+	let code dst
+		= 	a_code 
+		`appOL`	b_code  
+		`appOL`	toOL
+			[ WRY  g0 g0
+			, UDIV cc a_reg (RIReg b_reg) dst]
+			
+	return (Any II32 code)
+    	
+
+-- For _signed_ division with a 32 bit numerator,
+--		we have to sign extend the numerator into the Y register.
+idiv True cc x y 
+ = do
+	(a_reg, a_code)		<- getSomeReg x
+       	(b_reg, b_code)		<- getSomeReg y
+	
+	tmp			<- getNewRegNat II32
+	
+	let code dst
+		= 	a_code 
+		`appOL`	b_code  
+		`appOL`	toOL
+			[ SRA  a_reg (RIImm (ImmInt 16)) tmp		-- sign extend
+			, SRA  tmp   (RIImm (ImmInt 16)) tmp
+
+			, WRY  tmp g0				
+			, SDIV cc a_reg (RIReg b_reg) dst]
+			
+	return (Any II32 code)
+
+
+-- | Do an integer remainder.
+--
+--	 NOTE:	The SPARC v8 architecture manual says that integer division
+--		instructions _may_ generate a remainder, depending on the implementation.
+--		If so it is _recommended_ that the remainder is placed in the Y register.
+--
+--          The UltraSparc 2007 manual says Y is _undefined_ after division.
+--
+--		The SPARC T2 doesn't store the remainder, not sure about the others. 
+--		It's probably best not to worry about it, and just generate our own
+--		remainders. 
+--
+irem :: Bool -> CmmExpr -> CmmExpr -> NatM Register
+
+-- For unsigned operands: 
+--		Division is between a 64 bit numerator and a 32 bit denominator, 
+--		so we still have to clear the Y register.
+irem False x y 
+ = do
+    	(a_reg, a_code)	<- getSomeReg x
+	(b_reg, b_code)	<- getSomeReg y
+
+	tmp_reg		<- getNewRegNat II32
+
+	let code dst
+		= 	a_code
+		`appOL`	b_code
+		`appOL`	toOL
+			[ WRY	g0 g0
+			, UDIV  False         a_reg (RIReg b_reg) tmp_reg
+			, UMUL  False       tmp_reg (RIReg b_reg) tmp_reg
+			, SUB   False False   a_reg (RIReg tmp_reg) dst]
+    
+    	return	(Any II32 code)
+
+    
+
+-- For signed operands:
+--		Make sure to sign extend into the Y register, or the remainder
+--		will have the wrong sign when the numerator is negative.
+--
+--	TODO:	When sign extending, GCC only shifts the a_reg right by 17 bits,
+--		not the full 32. Not sure why this is, something to do with overflow?
+--		If anyone cares enough about the speed of signed remainder they
+--		can work it out themselves (then tell me). -- BL 2009/01/20
+irem True x y 
+ = do
+    	(a_reg, a_code)	<- getSomeReg x
+	(b_reg, b_code)	<- getSomeReg y
+	
+	tmp1_reg	<- getNewRegNat II32
+	tmp2_reg	<- getNewRegNat II32
+		
+	let code dst
+		=	a_code
+		`appOL`	b_code
+		`appOL`	toOL
+			[ SRA	a_reg      (RIImm (ImmInt 16)) tmp1_reg	-- sign extend
+			, SRA	tmp1_reg   (RIImm (ImmInt 16)) tmp1_reg	-- sign extend
+			, WRY	tmp1_reg g0
+
+			, SDIV  False          a_reg (RIReg b_reg)    tmp2_reg	
+			, SMUL  False       tmp2_reg (RIReg b_reg)    tmp2_reg
+			, SUB   False False    a_reg (RIReg tmp2_reg) dst]
+			
+	return (Any II32 code)
+   
+
+imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
+imulMayOflo rep a b 
+ = do
+	(a_reg, a_code) <- getSomeReg a
+	(b_reg, b_code) <- getSomeReg b
+	res_lo <- getNewRegNat II32
+	res_hi <- getNewRegNat II32
+
+	let shift_amt  = case rep of
+			  W32 -> 31
+			  W64 -> 63
+			  _ -> panic "shift_amt"
+	
+	let code dst = a_code `appOL` b_code `appOL`
+                       toOL [
+                           SMUL False a_reg (RIReg b_reg) res_lo,
+                           RDY res_hi,
+                           SRA res_lo (RIImm (ImmInt shift_amt)) res_lo,
+                           SUB False False res_lo (RIReg res_hi) dst
+                        ]
+	return (Any II32 code)
+
+
+-- -----------------------------------------------------------------------------
+-- 'trivial*Code': deal with trivial instructions
+
+-- Trivial (dyadic: 'trivialCode', floating-point: 'trivialFCode',
+-- unary: 'trivialUCode', unary fl-pt:'trivialUFCode') instructions.
+-- Only look for constants on the right hand side, because that's
+-- where the generic optimizer will have put them.
+
+-- Similarly, for unary instructions, we don't have to worry about
+-- matching an StInt as the argument, because genericOpt will already
+-- have handled the constant-folding.
+
+trivialCode
+	:: Width
+	-> (Reg -> RI -> Reg -> Instr)
+	-> CmmExpr
+	-> CmmExpr
+	-> NatM Register
+	
+trivialCode _ instr x (CmmLit (CmmInt y _))
+  | fits13Bits y
+  = do
+      (src1, code) <- getSomeReg x
+      let
+    	src2 = ImmInt (fromInteger y)
+    	code__2 dst = code `snocOL` instr src1 (RIImm src2) dst
+      return (Any II32 code__2)
+
+
+trivialCode _ instr x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    let
+    	code__2 dst = code1 `appOL` code2 `snocOL`
+    	    	      instr src1 (RIReg src2) dst
+    return (Any II32 code__2)
+
+
+trivialFCode 
+	:: Width
+	-> (Size -> Reg -> Reg -> Reg -> Instr)
+	-> CmmExpr
+	-> CmmExpr
+	-> NatM Register
+
+trivialFCode pk instr x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    tmp <- getNewRegNat FF64
+    let
+    	promote x = FxTOy FF32 FF64 x tmp
+
+    	pk1   = cmmExprType x
+    	pk2   = cmmExprType y
+
+    	code__2 dst =
+    	    	if pk1 `cmmEqType` pk2 then
+    	            code1 `appOL` code2 `snocOL`
+    	    	    instr (floatSize pk) src1 src2 dst
+    	    	else if typeWidth pk1 == W32 then
+    	    	    code1 `snocOL` promote src1 `appOL` code2 `snocOL`
+    	    	    instr FF64 tmp src2 dst
+    	    	else
+    	    	    code1 `appOL` code2 `snocOL` promote src2 `snocOL`
+    	    	    instr FF64 src1 tmp dst
+    return (Any (cmmTypeSize $ if pk1 `cmmEqType` pk2 then pk1 else cmmFloat W64) 
+    		code__2)
+
+
+
+trivialUCode
+	:: Size
+	-> (RI -> Reg -> Instr)
+	-> CmmExpr
+	-> NatM Register
+	
+trivialUCode size instr x = do
+    (src, code) <- getSomeReg x
+    let
+    	code__2 dst = code `snocOL` instr (RIReg src) dst
+    return (Any size code__2)
+
+
+trivialUFCode 
+	:: Size
+	-> (Reg -> Reg -> Instr)
+	-> CmmExpr
+	-> NatM Register 
+	
+trivialUFCode pk instr x = do
+    (src, code) <- getSomeReg x
+    let
+    	code__2 dst = code `snocOL` instr src dst
+    return (Any pk code__2)
+
+
+
+
+-- Coercions -------------------------------------------------------------------
+
+-- | Coerce a integer value to floating point
+coerceInt2FP :: Width -> Width -> CmmExpr -> NatM Register
+coerceInt2FP width1 width2 x = do
+    (src, code) <- getSomeReg x
+    let
+    	code__2 dst = code `appOL` toOL [
+    	    ST (intSize width1) src (spRel (-2)),
+    	    LD (intSize width1) (spRel (-2)) dst,
+    	    FxTOy (intSize width1) (floatSize width2) dst dst]
+    return (Any (floatSize $ width2) code__2)
+
+
+
+-- | Coerce a floating point value to integer
+--
+--   NOTE: On sparc v9 there are no instructions to move a value from an
+--	   FP register directly to an int register, so we have to use a load/store.
+--
+coerceFP2Int :: Width -> Width -> CmmExpr -> NatM Register
+coerceFP2Int width1 width2 x 
+ = do	let fsize1	= floatSize width1
+	    fsize2	= floatSize width2
+	
+            isize2	= intSize   width2
+
+	(fsrc, code)	<- getSomeReg x
+	fdst		<- getNewRegNat fsize2
+    
+	let code2 dst	
+		= 	code
+		`appOL` toOL
+			-- convert float to int format, leaving it in a float reg.
+			[ FxTOy fsize1 isize2 fsrc fdst
+
+			-- store the int into mem, then load it back to move
+			--	it into an actual int reg.
+			, ST    fsize2 fdst (spRel (-2))
+			, LD	isize2 (spRel (-2)) dst]
+
+	return (Any isize2 code2)
+
+
+-- | Coerce a double precision floating point value to single precision.
+coerceDbl2Flt :: CmmExpr -> NatM Register
+coerceDbl2Flt x = do
+    (src, code) <- getSomeReg x
+    return (Any FF32 (\dst -> code `snocOL` FxTOy FF64 FF32 src dst)) 
+
+
+-- | Coerce a single precision floating point value to double precision
+coerceFlt2Dbl :: CmmExpr -> NatM Register
+coerceFlt2Dbl x = do
+    (src, code) <- getSomeReg x
+    return (Any FF64 (\dst -> code `snocOL` FxTOy FF32 FF64 src dst))
+
+
+
+
+-- Condition Codes -------------------------------------------------------------
+--
+-- Evaluate a comparision, and get the result into a register.
+-- 
+-- Do not fill the delay slots here. you will confuse the register allocator.
+--
+condIntReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
+condIntReg EQQ x (CmmLit (CmmInt 0 _)) = do
+    (src, code) <- getSomeReg x
+    let
+	code__2 dst = code `appOL` toOL [
+    	    SUB False True g0 (RIReg src) g0,
+    	    SUB True False g0 (RIImm (ImmInt (-1))) dst]
+    return (Any II32 code__2)
+
+condIntReg EQQ x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    let
+    	code__2 dst = code1 `appOL` code2 `appOL` toOL [
+    	    XOR False src1 (RIReg src2) dst,
+    	    SUB False True g0 (RIReg dst) g0,
+    	    SUB True False g0 (RIImm (ImmInt (-1))) dst]
+    return (Any II32 code__2)
+
+condIntReg NE x (CmmLit (CmmInt 0 _)) = do
+    (src, code) <- getSomeReg x
+    let
+    	code__2 dst = code `appOL` toOL [
+    	    SUB False True g0 (RIReg src) g0,
+    	    ADD True False g0 (RIImm (ImmInt 0)) dst]
+    return (Any II32 code__2)
+
+condIntReg NE x y = do
+    (src1, code1) <- getSomeReg x
+    (src2, code2) <- getSomeReg y
+    let
+	code__2 dst = code1 `appOL` code2 `appOL` toOL [
+    	    XOR False src1 (RIReg src2) dst,
+    	    SUB False True g0 (RIReg dst) g0,
+    	    ADD True False g0 (RIImm (ImmInt 0)) dst]
+    return (Any II32 code__2)
+
+condIntReg cond x y = do
+    bid1@(BlockId _) <- getBlockIdNat
+    bid2@(BlockId _) <- getBlockIdNat
+    CondCode _ cond cond_code <- condIntCode cond x y
+    let
+	code__2 dst = cond_code `appOL` toOL [
+	    BI cond False bid1, NOP,
+	    OR False g0 (RIImm (ImmInt 0)) dst,
+	    BI ALWAYS False bid2, NOP,
+	    NEWBLOCK bid1,
+	    OR False g0 (RIImm (ImmInt 1)) dst,
+	    NEWBLOCK bid2]
+    return (Any II32 code__2)
+
+
+condFltReg :: Cond -> CmmExpr -> CmmExpr -> NatM Register
+condFltReg cond x y = do
+    bid1@(BlockId _) <- getBlockIdNat
+    bid2@(BlockId _) <- getBlockIdNat
+
+    CondCode _ cond cond_code <- condFltCode cond x y
+    let
+    	code__2 dst = cond_code `appOL` toOL [ 
+    	    NOP,
+	    BF cond False bid1, NOP,
+	    OR False g0 (RIImm (ImmInt 0)) dst,
+	    BI ALWAYS False bid2, NOP,
+	    NEWBLOCK bid1,
+	    OR False g0 (RIImm (ImmInt 1)) dst,
+	    NEWBLOCK bid2]
+    return (Any II32 code__2)