diff options
| author | sewardj <unknown> | 2000-01-28 18:07:56 +0000 | 
|---|---|---|
| committer | sewardj <unknown> | 2000-01-28 18:07:56 +0000 | 
| commit | c39373f1371fd1e46ea91be262f00c277b31f8e5 (patch) | |
| tree | 74dcc8e9b0060821c2dfa121580657d7d74432d9 /ghc/compiler/nativeGen | |
| parent | f1553c47e89e858cd4576732582e6230730adf53 (diff) | |
| download | haskell-c39373f1371fd1e46ea91be262f00c277b31f8e5.tar.gz | |
[project @ 2000-01-28 18:07:55 by sewardj]
Modifications to make x86 register spilling to work reasonably.  It
should work ok most of the time, although there is still a remote
possibility that the allocator simply will be unable to complete
spilling, and will just give up.
-- Incrementally try with 0, 1, 2 and 3 spill regs, so as not to
   unduly restrict the supply of regs in code which doesn't need spilling.
-- Remove the use of %ecx for shift values, so it is always available
   as the first-choice spill temporary.  For code which doesn't do
   int division, make %edx and %eax available for spilling too.
   Shifts by a non-constant amount (very rare) are now done by
   a short test-and-jump sequence, so that %ecx is not tied up.
-- x86 FP: do sin, cos, tan in-line so we get the same answers as gcc.
-- Moved a little code around to remove recursive dependencies.
-- Fix a subtle bug in x86 regUsage, which could cause underestimation
   of live ranges.
Diffstat (limited to 'ghc/compiler/nativeGen')
| -rw-r--r-- | ghc/compiler/nativeGen/AsmCodeGen.lhs | 4 | ||||
| -rw-r--r-- | ghc/compiler/nativeGen/AsmRegAlloc.lhs | 115 | ||||
| -rw-r--r-- | ghc/compiler/nativeGen/MachCode.lhs | 86 | ||||
| -rw-r--r-- | ghc/compiler/nativeGen/MachMisc.hi-boot | 3 | ||||
| -rw-r--r-- | ghc/compiler/nativeGen/MachMisc.hi-boot-5 | 3 | ||||
| -rw-r--r-- | ghc/compiler/nativeGen/MachMisc.lhs | 5 | ||||
| -rw-r--r-- | ghc/compiler/nativeGen/MachRegs.lhs | 39 | ||||
| -rw-r--r-- | ghc/compiler/nativeGen/PprMach.lhs | 34 | ||||
| -rw-r--r-- | ghc/compiler/nativeGen/RegAllocInfo.lhs | 162 | 
9 files changed, 329 insertions, 122 deletions
| diff --git a/ghc/compiler/nativeGen/AsmCodeGen.lhs b/ghc/compiler/nativeGen/AsmCodeGen.lhs index aa5d4e485c..31c3825b81 100644 --- a/ghc/compiler/nativeGen/AsmCodeGen.lhs +++ b/ghc/compiler/nativeGen/AsmCodeGen.lhs @@ -21,7 +21,7 @@ import AbsCSyn		( AbstractC, MagicId )  import AsmRegAlloc	( runRegAllocate )  import OrdList		( OrdList )  import PrimOp		( commutableOp, PrimOp(..) ) -import RegAllocInfo	( mkMRegsState, MRegsState ) +import RegAllocInfo	( mkMRegsState, MRegsState, findReservedRegs )  import Stix		( StixTree(..), StixReg(..),                             pprStixTrees, CodeSegment(..) )  import PrimRep		( isFloatingRep, PrimRep(..) ) @@ -130,7 +130,7 @@ might be needed.  scheduleMachCode :: [InstrList] -> [[Instr]]  scheduleMachCode -  = map (runRegAllocate freeRegsState reservedRegs) +  = map (runRegAllocate freeRegsState findReservedRegs)    where      freeRegsState = mkMRegsState (extractMappedRegNos freeRegs)  \end{code} diff --git a/ghc/compiler/nativeGen/AsmRegAlloc.lhs b/ghc/compiler/nativeGen/AsmRegAlloc.lhs index 9a6fca0ca4..2ddb991243 100644 --- a/ghc/compiler/nativeGen/AsmRegAlloc.lhs +++ b/ghc/compiler/nativeGen/AsmRegAlloc.lhs @@ -31,24 +31,38 @@ things the hard way.  \begin{code}  runRegAllocate      :: MRegsState -    -> [RegNo] +    -> ([Instr] -> [[RegNo]])      -> InstrList      -> [Instr] -runRegAllocate regs reserve_regs instrs +runRegAllocate regs find_reserve_regs instrs    = case simpleAlloc of -	Just x  -> x -	Nothing -> hairyAlloc +	Just simple -> simple +	Nothing     -> tryHairy reserves    where -    flatInstrs	= flattenOrdList instrs -    simpleAlloc = simpleRegAlloc regs [] emptyFM   flatInstrs -    hairyAlloc	= hairyRegAlloc  regs reserve_regs flatInstrs +    tryHairy []  +       = error "nativeGen: register allocator: too difficult!  Try -fvia-C.\n" +    tryHairy (resv:resvs) +       = case hairyAlloc resv of +            Just success -> success +            Nothing      -> fooble resvs (tryHairy resvs) + +    fooble [] x = x +    fooble (resvs:_) x = trace ("nativeGen: spilling with "  +                                ++ show (length resvs - 2) ++  +                                " int temporaries") x + +    reserves         = find_reserve_regs flatInstrs +    flatInstrs       = flattenOrdList instrs +    simpleAlloc      = simpleRegAlloc regs [] emptyFM   flatInstrs +    hairyAlloc resvd = hairyRegAlloc  regs resvd flatInstrs -runHairyRegAllocate		-- use only hairy for i386! + +runHairyRegAllocate      :: MRegsState      -> [RegNo]      -> InstrList -    -> [Instr] +    -> Maybe [Instr]  runHairyRegAllocate regs reserve_regs instrs    = hairyRegAlloc regs reserve_regs flatInstrs @@ -83,7 +97,8 @@ simpleRegAlloc free live env (instr:instrs)    where      instr3 = patchRegs instr (lookup env2) -    (srcs, dsts) = case regUsage instr of (RU s d) -> (regSetToList s, regSetToList d) +    (srcs, dsts) = case regUsage instr of  +                      (RU s d) -> (regSetToList s, regSetToList d)      lookup env x = case lookupFM env x of Just y -> y; Nothing -> x @@ -121,40 +136,49 @@ Here is the ``clever'' bit. First go backward (i.e. left), looking for  the last use of dynamic registers. Then go forward (i.e. right), filling  registers with static placements. +hairyRegAlloc takes reserve_regs as the regs to use as spill +temporaries.  First it tries to allocate using all regs except +reserve_regs.  If that fails, it inserts spill code and tries again to +allocate regs, but this time with the spill temporaries available. +Even this might not work if there are insufficient spill temporaries: +in the worst case on x86, we'd need 3 of them, for insns like +addl (reg1,reg2,4) reg3, since this insn uses all 3 regs as input. +  \begin{code}  hairyRegAlloc      :: MRegsState      -> [RegNo]      -> [Instr] -    -> [Instr] +    -> Maybe [Instr]  hairyRegAlloc regs reserve_regs instrs = -  case mapAccumB (doRegAlloc reserve_regs) (RH regs' 1 emptyFM) noFuture instrs of  -   (RH _ mloc1 _, _, instrs') -     | mloc1 == 1 -> instrs' -     | otherwise  -> -      let -       instrs_patched' = patchMem instrs' -       instrs_patched  = flattenOrdList instrs_patched' -      in -      case mapAccumB do_RegAlloc_Nil (RH regs'' mloc1 emptyFM) noFuture instrs_patched of -	 ((RH _ mloc2 _),_,instrs'')  -	    | mloc2 == mloc1 -> instrs''  -            | otherwise      -> instrs'' -	       --pprPanic "runRegAllocate" (ppr mloc2 <+> ppr mloc1) +  case mapAccumB (doRegAlloc reserve_regs)  +                 (RH regs' 1 emptyFM) noFuture instrs of  +     (RH _ mloc1 _, _, instrs') +        -- succeeded w/out using reserves +        | mloc1 == 1 -> Just instrs' +        -- failed, and no reserves avail, so pointless to attempt spilling  +        | null reserve_regs -> Nothing +        -- failed, but we have reserves, so attempt to do spilling +        | otherwise   +        -> let instrs_patched' = patchMem instrs' +               instrs_patched  = flattenOrdList instrs_patched' +           in +               case mapAccumB (doRegAlloc []) (RH regs'' mloc1 emptyFM)  +                    noFuture instrs_patched of +                  ((RH _ mloc2 _),_,instrs'')  +                     -- successfully allocated the patched code +        	     | mloc2 == mloc1 -> Just instrs'' +                     -- no; we have to give up +                     | otherwise      -> Nothing  +                       -- instrs'' +	               -- pprPanic "runRegAllocate" (ppr mloc2 <+> ppr mloc1)    where      regs'  = regs `useMRegs` reserve_regs      regs'' = mkMRegsState reserve_regs -do_RegAlloc_Nil = doRegAlloc [] -- out here to avoid CAF (sigh) -do_RegAlloc_Nil -    :: RegHistory MRegsState -    -> RegFuture -    -> Instr -    -> (RegHistory MRegsState, RegFuture, Instr) - -noFuture :: RegFuture -noFuture = RF emptyRegSet (FL emptyRegSet emptyFM) emptyFM +    noFuture :: RegFuture +    noFuture = RF emptyRegSet (FL emptyRegSet emptyFM) emptyFM  \end{code}  Here we patch instructions that reference ``registers'' which are really in @@ -225,7 +249,8 @@ getUsage (RF next_in_use future reg_conflicts) instr  	       (RL in_use future') = regLiveness instr (RL next_in_use future)  	       live_through = in_use `minusRegSet` dsts  	       last_used = [ r | r <- regSetToList srcs, -			     not (r `elementOfRegSet` (fstFL future) || r `elementOfRegSet` in_use)] +			     not (r `elementOfRegSet` (fstFL future)  +                                  || r `elementOfRegSet` in_use)]  	       in_use' = srcs `unionRegSets` live_through @@ -245,7 +270,9 @@ getUsage (RF next_in_use future reg_conflicts) instr  		  Nothing        -> live_dynamics  		  Just conflicts -> conflicts `unionRegSets` live_dynamics -	       live_dynamics = mkRegSet [ r | r@(UnmappedReg _ _) <- regSetToList live_through ] +	       live_dynamics  +                  = mkRegSet [ r | r@(UnmappedReg _ _)  +                                      <- regSetToList live_through ]  doRegAlloc'      :: [RegNo] @@ -254,7 +281,8 @@ doRegAlloc'      -> Instr      -> (RegHistory MRegsState, Instr) -doRegAlloc' reserved (RH frs loc env) (RI in_use srcs dsts lastu conflicts) instr = +doRegAlloc' reserved (RH frs loc env)  +                     (RI in_use srcs dsts lastu conflicts) instr =      (RH frs'' loc' env'', patchRegs instr dynToStatic) @@ -264,14 +292,17 @@ doRegAlloc' reserved (RH frs loc env) (RI in_use srcs dsts lastu conflicts) inst        free :: [RegNo]        free = extractMappedRegNos (map dynToStatic lastu) -      -- (1) free registers that are used last as source operands in this instruction -      frs_not_in_use = frs `useMRegs` (extractMappedRegNos (regSetToList in_use)) +      -- (1) free registers that are used last as  +      --     source operands in this instruction +      frs_not_in_use = frs `useMRegs`  +                       (extractMappedRegNos (regSetToList in_use))        frs' = (frs_not_in_use `freeMRegs` free) `useMRegs` reserved        -- (2) allocate new registers for the destination operands        -- allocate registers for new dynamics -      new_dynamix = [ r | r@(UnmappedReg _ _) <- regSetToList dsts, r `not_elem` keysFM env ] +      new_dynamix = [ r | r@(UnmappedReg _ _) <- regSetToList dsts,  +                          r `not_elem` keysFM env ]        (frs'', loc', new) = foldr allocateNewRegs (frs', loc, []) new_dynamix @@ -283,14 +314,16 @@ doRegAlloc' reserved (RH frs loc env) (RI in_use srcs dsts lastu conflicts) inst        dynToStatic dyn@(UnmappedReg _ _) =  	case lookupFM env' dyn of  	    Just r -> r -	    Nothing -> trace "Lost register; possibly a floating point type error in a _ccall_?" dyn +	    Nothing -> trace ("Lost register; possibly a floating point" +                              ++" type error in a _ccall_?") dyn        dynToStatic other = other        allocateNewRegs :: Reg                         -> (MRegsState, Int, [(Reg, Reg)])   		      -> (MRegsState, Int, [(Reg, Reg)]) -      allocateNewRegs d@(UnmappedReg _ pk) (fs, mem, lst) = (fs', mem', (d, f) : lst) +      allocateNewRegs d@(UnmappedReg _ pk) (fs, mem, lst)  +         = (fs', mem', (d, f) : lst)  	where   	 (fs', f, mem') =   	   case acceptable fs of diff --git a/ghc/compiler/nativeGen/MachCode.lhs b/ghc/compiler/nativeGen/MachCode.lhs index a4bd7772e1..b38b24ba9c 100644 --- a/ghc/compiler/nativeGen/MachCode.lhs +++ b/ghc/compiler/nativeGen/MachCode.lhs @@ -34,7 +34,6 @@ import UniqSupply	( returnUs, thenUs, mapUs, mapAndUnzipUs,  			  mapAccumLUs, UniqSM  			)  import Outputable -import PprMach 		( pprSize )  \end{code}  Code extractor for an entire stix tree---stix statement level. @@ -499,6 +498,15 @@ getRegister (StPrim primop [x]) -- unary PrimOps        FloatSqrtOp  -> trivialUFCode FloatRep  (GSQRT F) x        DoubleSqrtOp -> trivialUFCode DoubleRep (GSQRT DF) x +      FloatSinOp  -> trivialUFCode FloatRep  (GSIN F) x +      DoubleSinOp -> trivialUFCode DoubleRep (GSIN DF) x + +      FloatCosOp  -> trivialUFCode FloatRep  (GCOS F) x +      DoubleCosOp -> trivialUFCode DoubleRep (GCOS DF) x + +      FloatTanOp  -> trivialUFCode FloatRep  (GTAN F) x +      DoubleTanOp -> trivialUFCode DoubleRep (GTAN DF) x +        Double2FloatOp -> trivialUFCode FloatRep  GDTOF x        Float2DoubleOp -> trivialUFCode DoubleRep GFTOD x @@ -523,9 +531,9 @@ getRegister (StPrim primop [x]) -- unary PrimOps  	      FloatExpOp    -> (True,  SLIT("exp"))  	      FloatLogOp    -> (True,  SLIT("log")) -	      FloatSinOp    -> (True,  SLIT("sin")) -	      FloatCosOp    -> (True,  SLIT("cos")) -	      FloatTanOp    -> (True,  SLIT("tan")) +	      --FloatSinOp    -> (True,  SLIT("sin")) +	      --FloatCosOp    -> (True,  SLIT("cos")) +	      --FloatTanOp    -> (True,  SLIT("tan"))  	      FloatAsinOp   -> (True,  SLIT("asin"))  	      FloatAcosOp   -> (True,  SLIT("acos")) @@ -538,9 +546,9 @@ getRegister (StPrim primop [x]) -- unary PrimOps  	      DoubleExpOp   -> (False, SLIT("exp"))  	      DoubleLogOp   -> (False, SLIT("log")) -	      DoubleSinOp   -> (False, SLIT("sin")) -	      DoubleCosOp   -> (False, SLIT("cos")) -	      DoubleTanOp   -> (False, SLIT("tan")) +	      --DoubleSinOp   -> (False, SLIT("sin")) +	      --DoubleCosOp   -> (False, SLIT("cos")) +	      --DoubleTanOp   -> (False, SLIT("tan"))  	      DoubleAsinOp  -> (False, SLIT("asin"))  	      DoubleAcosOp  -> (False, SLIT("acos")) @@ -674,6 +682,65 @@ getRegister (StPrim primop [x, y]) -- dyadic PrimOps        {- Case2: shift length is complex (non-immediate) -}      shift_code instr x y{-amount-} +     = getRegister x   `thenUs` \ register1 -> +       getRegister y   `thenUs` \ register2 -> +       getUniqLabelNCG `thenUs` \ lbl_test3 -> +       getUniqLabelNCG `thenUs` \ lbl_test2 -> +       getUniqLabelNCG `thenUs` \ lbl_test1 -> +       getUniqLabelNCG `thenUs` \ lbl_test0 -> +       getUniqLabelNCG `thenUs` \ lbl_after -> +       getNewRegNCG IntRep   `thenUs` \ tmp -> +       let code__2 dst +              = let src_val  = registerName register1 dst +                    code_val = registerCode register1 dst +                    src_amt  = registerName register2 tmp +                    code_amt = registerCode register2 tmp +                    r_dst    = OpReg dst +                    r_tmp    = OpReg tmp +                in +                    code_val . +                    code_amt . +                    mkSeqInstrs [ +                       COMMENT (_PK_ "begin shift sequence"), +                       MOV L (OpReg src_val) r_dst, +                       MOV L (OpReg src_amt) r_tmp, + +                       BT L (ImmInt 4) r_tmp, +                       JXX GEU lbl_test3, +                       instr (OpImm (ImmInt 16)) r_dst, + +                       LABEL lbl_test3, +                       BT L (ImmInt 3) r_tmp, +                       JXX GEU lbl_test2, +                       instr (OpImm (ImmInt 8)) r_dst, + +                       LABEL lbl_test2, +                       BT L (ImmInt 2) r_tmp, +                       JXX GEU lbl_test1, +                       instr (OpImm (ImmInt 4)) r_dst, + +                       LABEL lbl_test1, +                       BT L (ImmInt 1) r_tmp, +                       JXX GEU lbl_test0, +                       instr (OpImm (ImmInt 2)) r_dst, + +                       LABEL lbl_test0, +                       BT L (ImmInt 0) r_tmp, +                       JXX GEU lbl_after, +                       instr (OpImm (ImmInt 1)) r_dst, +                       LABEL lbl_after, +                                            +                       COMMENT (_PK_ "end shift sequence") +                    ] +       in +       returnUs (Any IntRep code__2) + +{- +     -- since ECX is always used as a spill temporary, we can't +     -- use it here to do non-immediate shifts.  No big deal -- +     -- they are only very rare, and we can give an equivalent +     -- insn sequence which doesn't use ECX. +     -- DO NOT USE THIS CODE, SINCE IT IS INCOMPATIBLE WITH THE SPILLER       = getRegister y		`thenUs` \ register1 ->           getRegister x		`thenUs` \ register2 ->         let @@ -699,6 +766,7 @@ getRegister (StPrim primop [x, y]) -- dyadic PrimOps  		      mkSeqInstr (instr (OpReg ecx) (OpReg eax))         in         returnUs (Fixed IntRep eax code__2) +-}      --------------------      add_code :: Size -> StixTree -> StixTree -> UniqSM Register @@ -2441,10 +2509,10 @@ condIntReg cond x y  	code = condCode condition  	cond = condName condition  	-- ToDo: if dst is eax, ebx, ecx, or edx we would not need the move. -	code__2 dst = code . mkSeqInstrs [ +	code__2 dst = code . mkSeqInstrs [COMMENT (_PK_ "aaaaa"),  	    SETCC cond (OpReg tmp),  	    AND L (OpImm (ImmInt 1)) (OpReg tmp), -	    MOV L (OpReg tmp) (OpReg dst)] +	    MOV L (OpReg tmp) (OpReg dst) ,COMMENT (_PK_ "bbbbb")]      in      returnUs (Any IntRep code__2) diff --git a/ghc/compiler/nativeGen/MachMisc.hi-boot b/ghc/compiler/nativeGen/MachMisc.hi-boot index 91f6330b06..242c93afe3 100644 --- a/ghc/compiler/nativeGen/MachMisc.hi-boot +++ b/ghc/compiler/nativeGen/MachMisc.hi-boot @@ -1,7 +1,8 @@  _interface_ MachMisc 1  _exports_ -MachMisc fixedHdrSize fmtAsmLbl underscorePrefix; +MachMisc Instr fixedHdrSize fmtAsmLbl underscorePrefix;  _declarations_  1 fixedHdrSize _:_ PrelBase.Int ;;  2 fmtAsmLbl _:_ PrelBase.String -> PrelBase.String ;;  1 underscorePrefix _:_ PrelBase.Bool ;; +1 data Instr;
\ No newline at end of file diff --git a/ghc/compiler/nativeGen/MachMisc.hi-boot-5 b/ghc/compiler/nativeGen/MachMisc.hi-boot-5 index 6fb5f9e1a6..8c2a6f29c0 100644 --- a/ghc/compiler/nativeGen/MachMisc.hi-boot-5 +++ b/ghc/compiler/nativeGen/MachMisc.hi-boot-5 @@ -1,5 +1,6 @@  __interface MachMisc 1 0 where -__export MachMisc fixedHdrSize fmtAsmLbl underscorePrefix; +__export MachMisc Instr fixedHdrSize fmtAsmLbl underscorePrefix;  1 fixedHdrSize :: PrelBase.Int ;  2 fmtAsmLbl :: PrelBase.String -> PrelBase.String ;  1 underscorePrefix :: PrelBase.Bool ; +1 data Instr ; diff --git a/ghc/compiler/nativeGen/MachMisc.lhs b/ghc/compiler/nativeGen/MachMisc.lhs index d31af20307..893bf873e4 100644 --- a/ghc/compiler/nativeGen/MachMisc.lhs +++ b/ghc/compiler/nativeGen/MachMisc.lhs @@ -507,6 +507,7 @@ current translation.  	      | SAR	      Size Operand Operand -- 1st operand must be an Imm or CL  	      | SHR	      Size Operand Operand -- 1st operand must be an Imm or CL  	      | NOP +              | BT            Size Imm Operand  -- Float Arithmetic. -- ToDo for 386 @@ -539,6 +540,9 @@ current translation.       	      | GABS	      Size Reg Reg -- src, dst      	      | GNEG	      Size Reg Reg -- src, dst      	      | GSQRT	      Size Reg Reg -- src, dst +    	      | GSIN	      Size Reg Reg -- src, dst +    	      | GCOS	      Size Reg Reg -- src, dst +    	      | GTAN	      Size Reg Reg -- src, dst                | GFREE         -- do ffree on all x86 regs; an ugly hack  -- Comparison @@ -598,6 +602,7 @@ is_G_instr instr  	GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True      	GCMP _ _ _ -> True; GABS _ _ _ -> True      	GNEG _ _ _ -> True; GSQRT _ _ _ -> True +        GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True;          GFREE -> panic "is_G_instr: GFREE (!)"          other -> False diff --git a/ghc/compiler/nativeGen/MachRegs.lhs b/ghc/compiler/nativeGen/MachRegs.lhs index f32024ffd4..446e7dd794 100644 --- a/ghc/compiler/nativeGen/MachRegs.lhs +++ b/ghc/compiler/nativeGen/MachRegs.lhs @@ -26,13 +26,13 @@ module MachRegs (  	callClobberedRegs,  	callerSaves,  	extractMappedRegNos, +        mappedRegNo,  	freeMappedRegs,  	freeReg, freeRegs,  	getNewRegNCG,  	magicIdRegMaybe,  	mkReg,  	realReg, -	reservedRegs,  	saveLoc,  	spRel,  	stgReg, @@ -336,6 +336,10 @@ extractMappedRegNos regs    where      ex (MappedReg i) acc = IBOX(i) : acc  -- we'll take it      ex _	     acc = acc		  -- leave it out + +mappedRegNo :: Reg -> RegNo +mappedRegNo (MappedReg i) = IBOX(i) +mappedRegNo _             = pprPanic "mappedRegNo" empty  \end{code}  ** Machine-specific Reg stuff: ** @@ -733,40 +737,7 @@ magicIdRegMaybe HpLim		   	= Just (FixedReg ILIT(REG_HpLim))  magicIdRegMaybe _		   	= Nothing  \end{code} -%************************************************************************ -%*									* -\subsection{Free, reserved, call-clobbered, and argument registers} -%*									* -%************************************************************************ - -@freeRegs@ is the list of registers we can use in register allocation. -@freeReg@ (below) says if a particular register is free. - -With a per-instruction clobber list, we might be able to get some of -these back, but it's probably not worth the hassle. - -@callClobberedRegs@ ... the obvious. - -@argRegs@: assuming a call with N arguments, what registers will be -used to hold arguments?  (NB: it doesn't know whether the arguments -are integer or floating-point...) -  \begin{code} -reservedRegs :: [RegNo] -reservedRegs -#if alpha_TARGET_ARCH -  = [NCG_Reserved_I1, NCG_Reserved_I2, -     NCG_Reserved_F1, NCG_Reserved_F2] -#endif -#if i386_TARGET_ARCH -  = [{-certainly cannot afford any!-}] -#endif -#if sparc_TARGET_ARCH -  = [NCG_Reserved_I1, NCG_Reserved_I2, -     NCG_Reserved_F1, NCG_Reserved_F2, -     NCG_Reserved_D1, NCG_Reserved_D2] -#endif -  -------------------------------  freeRegs :: [Reg]  freeRegs diff --git a/ghc/compiler/nativeGen/PprMach.lhs b/ghc/compiler/nativeGen/PprMach.lhs index e35e22cc9a..6232f3751b 100644 --- a/ghc/compiler/nativeGen/PprMach.lhs +++ b/ghc/compiler/nativeGen/PprMach.lhs @@ -941,7 +941,7 @@ pprInstr v@(MOV size s@(OpReg src) d@(OpReg dst)) -- hack  #ifdef DEBUG      (<>) (ptext SLIT("# warning: ")) (pprSizeOpOp SLIT("mov") size s d)  #else -    (ptext SLIT("")) +    empty  #endif  pprInstr (MOV size src dst)    = pprSizeOpOp SLIT("mov") size src dst @@ -977,9 +977,9 @@ pprInstr (XOR size src dst) = pprSizeOpOp SLIT("xor")  size src dst  pprInstr (NOT size op) = pprSizeOp SLIT("not") size op  pprInstr (NEGI size op) = pprSizeOp SLIT("neg") size op -pprInstr (SHL size imm dst) = pprSizeByteOpOp SLIT("shl")  size imm dst -pprInstr (SAR size imm dst) = pprSizeByteOpOp SLIT("sar")  size imm dst -pprInstr (SHR size imm dst) = pprSizeByteOpOp SLIT("shr")  size imm dst +pprInstr (SHL size imm dst) = {-pprSizeByteOpOp-} pprSizeOpOp SLIT("shl")  size imm dst +pprInstr (SAR size imm dst) = {-pprSizeByteOpOp-} pprSizeOpOp SLIT("sar")  size imm dst +pprInstr (SHR size imm dst) = {-pprSizeByteOpOp-} pprSizeOpOp SLIT("shr")  size imm dst  pprInstr (CMP size src dst) = pprSizeOpOp SLIT("cmp")  size src dst  pprInstr (TEST size src dst) = pprSizeOpOp SLIT("test")  size src dst @@ -989,6 +989,7 @@ pprInstr PUSHA = ptext SLIT("\tpushal")  pprInstr POPA = ptext SLIT("\tpopal")  pprInstr (NOP) = ptext SLIT("\tnop") +pprInstr (BT size imm src) = pprSizeImmOp SLIT("bt") size imm src  pprInstr (CLTD) = ptext SLIT("\tcltd")  pprInstr (SETCC cond op) = pprCondInstr SLIT("set") cond (pprOperand B op) @@ -1047,6 +1048,15 @@ pprInstr g@(GNEG sz src dst)     = pprG g (hcat [gtab, gpush src 0, text " ; fchs ; ", gpop dst 1])  pprInstr g@(GSQRT sz src dst)     = pprG g (hcat [gtab, gpush src 0, text " ; fsqrt ; ", gpop dst 1]) +pprInstr g@(GSIN sz src dst) +   = pprG g (hcat [gtab, gpush src 0, text " ; fsin ; ", gpop dst 1]) +pprInstr g@(GCOS sz src dst) +   = pprG g (hcat [gtab, gpush src 0, text " ; fcos ; ", gpop dst 1]) + +pprInstr g@(GTAN sz src dst) +   = pprG g (hcat [gtab, text "ffree %st(6) ; ", +                   gpush src 0, text " ; fptan ; ",  +                   text " fstp %st(0) ; ", gpop dst 1])  pprInstr g@(GADD sz src1 src2 dst)     = pprG g (hcat [gtab, gpush src1 0,  @@ -1106,6 +1116,9 @@ pprGInstr (GCMP sz src dst) = pprSizeRegReg SLIT("gcmp") sz src dst  pprGInstr (GABS sz src dst) = pprSizeRegReg SLIT("gabs") sz src dst  pprGInstr (GNEG sz src dst) = pprSizeRegReg SLIT("gneg") sz src dst  pprGInstr (GSQRT sz src dst) = pprSizeRegReg SLIT("gsqrt") sz src dst +pprGInstr (GSIN sz src dst) = pprSizeRegReg SLIT("gsin") sz src dst +pprGInstr (GCOS sz src dst) = pprSizeRegReg SLIT("gcos") sz src dst +pprGInstr (GTAN sz src dst) = pprSizeRegReg SLIT("gtan") sz src dst  pprGInstr (GADD sz src1 src2 dst) = pprSizeRegRegReg SLIT("gadd") sz src1 src2 dst  pprGInstr (GSUB sz src1 src2 dst) = pprSizeRegRegReg SLIT("gsub") sz src1 src2 dst @@ -1124,6 +1137,19 @@ pprOperand s (OpReg r) = pprReg s r  pprOperand s (OpImm i) = pprDollImm i  pprOperand s (OpAddr ea) = pprAddr ea +pprSizeImmOp :: FAST_STRING -> Size -> Imm -> Operand -> SDoc +pprSizeImmOp name size imm op1 +  = hcat [ +        char '\t', +	ptext name, +	pprSize size, +	space, +	char '$', +	pprImm imm, +	comma, +	pprOperand size op1 +    ] +	  pprSizeOp :: FAST_STRING -> Size -> Operand -> SDoc  pprSizeOp name size op1    = hcat [ diff --git a/ghc/compiler/nativeGen/RegAllocInfo.lhs b/ghc/compiler/nativeGen/RegAllocInfo.lhs index eab566ca3e..c1bd50c7eb 100644 --- a/ghc/compiler/nativeGen/RegAllocInfo.lhs +++ b/ghc/compiler/nativeGen/RegAllocInfo.lhs @@ -35,6 +35,7 @@ module RegAllocInfo (  	patchRegs,  	regLiveness,  	spillReg, +	IF_ARCH_i386(findReservedRegs COMMA,)  	RegSet,  	elementOfRegSet, @@ -64,7 +65,6 @@ import OrdList		( mkUnitList )  import PrimRep		( PrimRep(..) )  import UniqSet		-- quite a bit of it  import Outputable -import PprMach		( pprInstr )  \end{code}  %************************************************************************ @@ -354,22 +354,24 @@ regUsage instr = case instr of  #if i386_TARGET_ARCH  regUsage instr = case instr of -    MOV  sz src dst	-> usage2 src dst -    MOVZxL sz src dst	-> usage2 src dst -    MOVSxL sz src dst	-> usage2 src dst -    LEA  sz src dst	-> usage2 src dst -    ADD  sz src dst	-> usage2 src dst -    SUB  sz src dst	-> usage2 src dst -    IMUL sz src dst	-> usage2 src dst +    MOV  sz src dst	-> usage2  src dst +    MOVZxL sz src dst	-> usage2  src dst +    MOVSxL sz src dst	-> usage2  src dst +    LEA  sz src dst	-> usage2  src dst +    ADD  sz src dst	-> usage2s src dst +    SUB  sz src dst	-> usage2s src dst +    IMUL sz src dst	-> usage2s src dst      IDIV sz src		-> usage (eax:edx:opToReg src) [eax,edx] -    AND  sz src dst	-> usage2 src dst -    OR   sz src dst	-> usage2 src dst -    XOR  sz src dst	-> usage2 src dst +    AND  sz src dst	-> usage2s src dst +    OR   sz src dst	-> usage2s src dst +    XOR  sz src dst	-> usage2s src dst      NOT  sz op		-> usage1 op      NEGI sz op		-> usage1 op -    SHL  sz dst len	-> usage2 dst len -- len is either an Imm or ecx. -    SAR  sz dst len	-> usage2 dst len -- len is either an Imm or ecx. -    SHR  sz len dst	-> usage2 dst len -- len is either an Imm or ecx. +    SHL  sz len dst	-> usage2s len dst -- len is either an Imm or ecx. +    SAR  sz len dst	-> usage2s len dst -- len is either an Imm or ecx. +    SHR  sz len dst	-> usage2s len dst -- len is either an Imm or ecx. +    BT   sz imm src	-> usage (opToReg src) [] +      PUSH sz op		-> usage (opToReg op) []      POP  sz op		-> usage [] (opToReg op)      TEST sz src dst	-> usage (opToReg src ++ opToReg dst) [] @@ -403,21 +405,35 @@ regUsage instr = case instr of      GABS sz src dst	-> usage [src] [dst]      GNEG sz src dst	-> usage [src] [dst]      GSQRT sz src dst	-> usage [src] [dst] +    GSIN sz src dst	-> usage [src] [dst] +    GCOS sz src dst	-> usage [src] [dst] +    GTAN sz src dst	-> usage [src] [dst]      COMMENT _		-> noUsage      SEGMENT _ 		-> noUsage      LABEL _		-> noUsage      ASCII _ _		-> noUsage      DATA _ _		-> noUsage -    _			-> error ("regUsage(x86): " ++ showSDoc (pprInstr instr)) +    _			-> pprPanic "regUsage(x86) " empty +   where +    -- 2 operand form in which the second operand is purely a destination      usage2 :: Operand -> Operand -> RegUsage      usage2 op (OpReg reg) = usage (opToReg op) [reg]      usage2 op (OpAddr ea) = usage (opToReg op ++ addrToRegs ea) []      usage2 op (OpImm imm) = usage (opToReg op) [] + +    -- 2 operand form in which the second operand is also an input +    usage2s :: Operand -> Operand -> RegUsage +    usage2s op (OpReg reg) = usage (opToReg op ++ [reg]) [reg] +    usage2s op (OpAddr ea) = usage (opToReg op ++ addrToRegs ea) [] +    usage2s op (OpImm imm) = usage (opToReg op) [] + +    -- 1 operand form in which the operand is both used and written      usage1 :: Operand -> RegUsage      usage1 (OpReg reg)    = usage [reg] [reg]      usage1 (OpAddr ea)    = usage (addrToRegs ea) [] +      allFPRegs = [fake0,fake1,fake2,fake3,fake4,fake5]      --callClobberedRegs = [ eax, ecx, edx ] -- according to gcc, anyway. @@ -442,6 +458,14 @@ regUsage instr = case instr of      interesting (FixedReg _) = False      interesting _ = True + +-- Allow the spiller to decide whether or not it can use  +-- %eax and %edx as spill temporaries. +hasFixedEAXorEDX instr = case instr of +    IDIV _ _ -> True +    CLTD     -> True +    other    -> False +  #endif {- i386_TARGET_ARCH -}  -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  #if sparc_TARGET_ARCH @@ -495,6 +519,71 @@ regUsage instr = case instr of  #endif {- sparc_TARGET_ARCH -}  \end{code} + +%************************************************************************ +%*									* +\subsection{Free, reserved, call-clobbered, and argument registers} +%*									* +%************************************************************************ + +@freeRegs@ is the list of registers we can use in register allocation. +@freeReg@ (below) says if a particular register is free. + +With a per-instruction clobber list, we might be able to get some of +these back, but it's probably not worth the hassle. + +@callClobberedRegs@ ... the obvious. + +@argRegs@: assuming a call with N arguments, what registers will be +used to hold arguments?  (NB: it doesn't know whether the arguments +are integer or floating-point...) + +findReservedRegs tells us which regs can be used as spill temporaries. +The list of instructions for which we are attempting allocation is +supplied.  This is so that we can (at least for x86) examine it to +discover which registers are being used in a fixed way -- for example, +%eax and %edx are used by integer division, so they can't be used as +spill temporaries.  However, most instruction lists don't do integer +division, so we don't want to rule them out altogether. + +findReservedRegs returns not a list of spill temporaries, but a list +of list of them.  This is so that the allocator can attempt allocating +with at first no spill temps, then if that fails, increasing numbers. +For x86 it is important that we minimise the number of regs reserved +as spill temporaries, since there are so few.  For Alpha and Sparc +this isn't a concern; we just ignore the supplied code list and return +a singleton list which we know will satisfy all spill demands. + +\begin{code} +findReservedRegs :: [Instr] -> [[RegNo]] +findReservedRegs instrs +#if alpha_TARGET_ARCH +  = [[NCG_Reserved_I1, NCG_Reserved_I2, +      NCG_Reserved_F1, NCG_Reserved_F2]] +#endif +#if sparc_TARGET_ARCH +  = [[NCG_Reserved_I1, NCG_Reserved_I2, +      NCG_Reserved_F1, NCG_Reserved_F2, +      NCG_Reserved_D1, NCG_Reserved_D2]] +#endif +#if i386_TARGET_ARCH +    -- Sigh.  This is where it gets complicated. +  = -- first of all, try without any at all. +    map (map mappedRegNo) ( +    [ [], +    -- if that doesn't work, try one integer reg (which might fail) +    -- and two float regs (which will always fix any float insns) +      [ecx, fake4,fake5] +    ] +    -- dire straits (but still correct): see if we can bag %eax and %edx +    ++ if   any hasFixedEAXorEDX instrs +       then []  -- bummer +       else [ [ecx,edx,fake4,fake5], +              [ecx,edx,eax,fake4,fake5] ] +    ) +#endif +\end{code} +  %************************************************************************  %*									*  \subsection{@RegLiveness@ type; @regLiveness@ function} @@ -655,6 +744,7 @@ patchRegs instr env = case instr of      SHL  sz imm dst 	-> patch2 (SHL  sz) imm dst      SAR  sz imm dst 	-> patch2 (SAR  sz) imm dst      SHR  sz imm dst 	-> patch2 (SHR  sz) imm dst +    BT   sz imm src     -> patch1 (BT sz imm) src      TEST sz src dst	-> patch2 (TEST sz) src dst      CMP  sz src dst	-> patch2 (CMP  sz) src dst      PUSH sz op		-> patch1 (PUSH sz) op @@ -684,6 +774,9 @@ patchRegs instr env = case instr of      GABS sz src dst	-> GABS sz (env src) (env dst)      GNEG sz src dst	-> GNEG sz (env src) (env dst)      GSQRT sz src dst	-> GSQRT sz (env src) (env dst) +    GSIN sz src dst	-> GSIN sz (env src) (env dst) +    GCOS sz src dst	-> GCOS sz (env src) (env dst) +    GTAN sz src dst	-> GTAN sz (env src) (env dst)      COMMENT _		-> instr      SEGMENT _ 		-> instr @@ -693,7 +786,8 @@ patchRegs instr env = case instr of      JXX _ _		-> instr      CALL _		-> instr      CLTD		-> instr -    _			-> error ("patchInstr: " ++ showSDoc (pprInstr instr)) --instr +    _			-> pprPanic "patchInstr(x86)" empty +    where      patch1 insn op      = insn (patchOp op)      patch2 insn src dst = insn (patchOp src) (patchOp dst) @@ -761,7 +855,7 @@ patchRegs instr env = case instr of  Spill to memory, and load it back... -JRS, 000122: on x86, don't spill directly below the stack pointer, since  +JRS, 000122: on x86, don't spill directly above the stack pointer, since   some insn sequences (int <-> conversions) use this as a temp location.  Leave 16 bytes of slop. @@ -769,36 +863,44 @@ Leave 16 bytes of slop.  spillReg, loadReg :: Reg -> Reg -> InstrList  spillReg dyn (MemoryReg i pk) -  | i >= 0  -- JRS paranoia -  = let -	sz = primRepToSize pk +  | i >= 0 -- JRS paranoia +  = let	sz = primRepToSize pk      in      mkUnitList (  	{-Alpha: spill below the stack pointer (?)-}  	 IF_ARCH_alpha( ST sz dyn (spRel i)  	{-I386: spill above stack pointer leaving 2 words/spill-} -	,IF_ARCH_i386 ( if pk == FloatRep || pk == DoubleRep -                        then GST DF dyn (spRel (16 + 2 * i)) -                        else MOV sz (OpReg dyn) (OpAddr (spRel (16 + 2 * i))) +	,IF_ARCH_i386 ( let loc | i < 60    = 4 + 2 * i +                                | otherwise = -2000 - 2 * i +                        in +                        if pk == FloatRep || pk == DoubleRep +                        then GST DF dyn (spRel loc) +                        else MOV sz (OpReg dyn) (OpAddr (spRel loc))  	{-SPARC: spill below frame pointer leaving 2 words/spill-}  	,IF_ARCH_sparc( ST sz dyn (fpRel (-2 * i))          ,)))      ) - +  | otherwise +  = pprPanic "spillReg:" (text "invalid spill location: " <> int i) +     ----------------------------  loadReg (MemoryReg i pk) dyn -  | i >= 0  -- JRS paranoia -  = let -	sz = primRepToSize pk +  | i >= 0 -- JRS paranoia +  = let	sz = primRepToSize pk      in      mkUnitList (  	 IF_ARCH_alpha( LD  sz dyn (spRel i) -	,IF_ARCH_i386 ( if   pk == FloatRep || pk == DoubleRep -                        then GLD DF (spRel (16 + 2 * i)) dyn -                        else MOV sz (OpAddr (spRel (16 + 2 * i))) (OpReg dyn) +	,IF_ARCH_i386 ( let loc | i < 60    = 4 + 2 * i +                                | otherwise = -2000 - 2 * i +                        in +                        if   pk == FloatRep || pk == DoubleRep +                        then GLD DF (spRel loc) dyn +                        else MOV sz (OpAddr (spRel loc)) (OpReg dyn)  	,IF_ARCH_sparc( LD  sz (fpRel (-2 * i)) dyn  	,)))      ) +  | otherwise +  = pprPanic "loadReg:" (text "invalid spill location: " <> int i)  \end{code} | 
