%
% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
%
% $Id: CgTailCall.lhs,v 1.43 2005/06/21 10:44:41 simonmar Exp $
%
%********************************************************
%*							*
\section[CgTailCall]{Tail calls: converting @StgApps@}
%*							*
%********************************************************

\begin{code}
module CgTailCall (
	cgTailCall, performTailCall,
	performReturn, performPrimReturn,
	emitKnownConReturnCode, emitAlgReturnCode,
	returnUnboxedTuple, ccallReturnUnboxedTuple,
	pushUnboxedTuple,
	tailCallPrimOp,

	pushReturnAddress
    ) where

#include "HsVersions.h"

import CgMonad
import CgBindery	( getArgAmodes, getCgIdInfo, CgIdInfo, maybeLetNoEscape,
			  idInfoToAmode, cgIdInfoId, cgIdInfoLF,
			  cgIdInfoArgRep )
import CgInfoTbls	( entryCode, emitDirectReturnInstr, dataConTagZ,
			  emitVectoredReturnInstr, closureInfoPtr )
import CgCallConv
import CgStackery	( setRealSp, mkStkAmodes, adjustStackHW,
			  getSpRelOffset )
import CgHeapery	( setRealHp, getHpRelOffset )
import CgUtils		( emitSimultaneously )
import CgTicky
import ClosureInfo
import SMRep		( CgRep, isVoidArg, separateByPtrFollowness )
import Cmm	
import CmmUtils
import CLabel		( CLabel, mkRtsPrimOpLabel, mkSeqInfoLabel )
import Type		( isUnLiftedType )
import Id		( Id, idName, idUnique, idType )
import DataCon		( DataCon, dataConTyCon )
import StgSyn		( StgArg )
import TyCon            ( TyCon )
import PrimOp		( PrimOp )
import Outputable

import Monad		( when )

-----------------------------------------------------------------------------
-- Tail Calls

cgTailCall :: Id -> [StgArg] -> Code

-- Here's the code we generate for a tail call.  (NB there may be no
-- arguments, in which case this boils down to just entering a variable.)
-- 
--    *	Put args in the top locations of the stack.
--    *	Adjust the stack ptr
--    *	Make R1 point to the function closure if necessary.
--    *	Perform the call.
--
-- Things to be careful about:
--
--    *	Don't overwrite stack locations before you have finished with
-- 	them (remember you need the function and the as-yet-unmoved
-- 	arguments).
--    *	Preferably, generate no code to replace x by x on the stack (a
-- 	common situation in tail-recursion).
--    *	Adjust the stack high water mark appropriately.
-- 
-- Treat unboxed locals exactly like literals (above) except use the addr
-- mode for the local instead of (CLit lit) in the assignment.

cgTailCall fun args
  = do	{ fun_info <- getCgIdInfo fun

	; if isUnLiftedType (idType fun)
	  then 	-- Primitive return
		ASSERT( null args )
	    do	{ fun_amode <- idInfoToAmode fun_info
		; performPrimReturn (cgIdInfoArgRep fun_info) fun_amode } 

	  else -- Normal case, fun is boxed
	    do  { arg_amodes <- getArgAmodes args
		; performTailCall fun_info arg_amodes noStmts }
	}
		

-- -----------------------------------------------------------------------------
-- The guts of a tail-call

performTailCall 
	:: CgIdInfo		-- The function
	-> [(CgRep,CmmExpr)]	-- Args
	-> CmmStmts		-- Pending simultaneous assignments
				--  *** GUARANTEED to contain only stack assignments.
	-> Code

performTailCall fun_info arg_amodes pending_assts
  | Just join_sp <- maybeLetNoEscape fun_info
  = 	   -- A let-no-escape is slightly different, because we
	   -- arrange the stack arguments into pointers and non-pointers
	   -- to make the heap check easier.  The tail-call sequence
	   -- is very similar to returning an unboxed tuple, so we
	   -- share some code.
     do	{ (final_sp, arg_assts) <- pushUnboxedTuple join_sp arg_amodes
	; emitSimultaneously (pending_assts `plusStmts` arg_assts)
	; let lbl = enterReturnPtLabel (idUnique (cgIdInfoId fun_info))
	; doFinalJump final_sp True {- Is LNE -} (jumpToLbl lbl) }

  | otherwise
  = do 	{ fun_amode <- idInfoToAmode fun_info
	; let node_asst = oneStmt (CmmAssign nodeReg fun_amode)
	      opt_node_asst | nodeMustPointToIt lf_info = node_asst
			    | otherwise		        = noStmts
	; EndOfBlockInfo sp _ <- getEndOfBlockInfo
	; this_pkg <- getThisPackage

	; case (getCallMethod this_pkg fun_name lf_info (length arg_amodes)) of

	    -- Node must always point to things we enter
	    EnterIt -> do
		{ emitSimultaneously (node_asst `plusStmts` pending_assts) 
		; let target = entryCode (closureInfoPtr (CmmReg nodeReg))
		; doFinalJump sp False (stmtC (CmmJump target [])) }
    
	    -- A function, but we have zero arguments.  It is already in WHNF,
	    -- so we can just return it.  
	    -- As with any return, Node must point to it.
	    ReturnIt -> do
		{ emitSimultaneously (node_asst `plusStmts` pending_assts)
		; doFinalJump sp False emitDirectReturnInstr }
    
	    -- A real constructor.  Don't bother entering it, 
	    -- just do the right sort of return instead.
	    -- As with any return, Node must point to it.
	    ReturnCon con -> do
		{ emitSimultaneously (node_asst `plusStmts` pending_assts)
		; doFinalJump sp False (emitKnownConReturnCode con) }

	    JumpToIt lbl -> do
		{ emitSimultaneously (opt_node_asst `plusStmts` pending_assts)
		; doFinalJump sp False (jumpToLbl lbl) }
    
	    -- A slow function call via the RTS apply routines
	    -- Node must definitely point to the thing
	    SlowCall -> do 
		{  when (not (null arg_amodes)) $ do
		   { if (isKnownFun lf_info) 
			then tickyKnownCallTooFewArgs
			else tickyUnknownCall
		   ; tickySlowCallPat (map fst arg_amodes) 
		   }

		; let (apply_lbl, args, extra_args) 
			= constructSlowCall arg_amodes

		; directCall sp apply_lbl args extra_args 
			(node_asst `plusStmts` pending_assts)
		}
    
	    -- A direct function call (possibly with some left-over arguments)
	    DirectEntry lbl arity -> do
		{ if arity == length arg_amodes
			then tickyKnownCallExact
			else do tickyKnownCallExtraArgs
				tickySlowCallPat (map fst (drop arity arg_amodes))

 		; let
		     -- The args beyond the arity go straight on the stack
		     (arity_args, extra_args) = splitAt arity arg_amodes
     
		; directCall sp lbl arity_args extra_args
			(opt_node_asst `plusStmts` pending_assts)
	        }
	}
  where
    fun_name  = idName (cgIdInfoId fun_info)
    lf_info   = cgIdInfoLF fun_info


directCall sp lbl args extra_args assts = do
  let
	-- First chunk of args go in registers
	(reg_arg_amodes, stk_args) = assignCallRegs args
     
	-- Any "extra" arguments are placed in frames on the
	-- stack after the other arguments.
	slow_stk_args = slowArgs extra_args

	reg_assts = assignToRegs reg_arg_amodes
  --
  (final_sp, stk_assts) <- mkStkAmodes sp (stk_args ++ slow_stk_args)

  emitSimultaneously (reg_assts     `plusStmts`
		      stk_assts     `plusStmts`
		      assts)

  doFinalJump final_sp False (jumpToLbl lbl)

-- -----------------------------------------------------------------------------
-- The final clean-up before we do a jump at the end of a basic block.
-- This code is shared by tail-calls and returns.

doFinalJump :: VirtualSpOffset -> Bool -> Code -> Code 
doFinalJump final_sp is_let_no_escape jump_code
  = do	{ -- Adjust the high-water mark if necessary
	  adjustStackHW final_sp

	-- Push a return address if necessary (after the assignments
	-- above, in case we clobber a live stack location)
	--
	-- DONT push the return address when we're about to jump to a
	-- let-no-escape: the final tail call in the let-no-escape
	-- will do this.
	; eob <- getEndOfBlockInfo
	; whenC (not is_let_no_escape) (pushReturnAddress eob)

	    -- Final adjustment of Sp/Hp
	; adjustSpAndHp final_sp

	    -- and do the jump
	; jump_code }

-- -----------------------------------------------------------------------------
-- A general return (just a special case of doFinalJump, above)

performReturn :: Code		-- The code to execute to actually do the return
	      -> Code

performReturn finish_code
  = do  { EndOfBlockInfo args_sp sequel <- getEndOfBlockInfo
	; doFinalJump args_sp False{-not a LNE-} finish_code }

-- -----------------------------------------------------------------------------
-- Primitive Returns
-- Just load the return value into the right register, and return.

performPrimReturn :: CgRep -> CmmExpr	-- The thing to return
		  -> Code
performPrimReturn rep amode
  =  do { whenC (not (isVoidArg rep))
		(stmtC (CmmAssign ret_reg amode))
	; performReturn emitDirectReturnInstr }
  where
    ret_reg = dataReturnConvPrim rep

-- -----------------------------------------------------------------------------
-- Algebraic constructor returns

-- Constructor is built on the heap; Node is set.
-- All that remains is to do the right sort of jump.

emitKnownConReturnCode :: DataCon -> Code
emitKnownConReturnCode con
  = emitAlgReturnCode (dataConTyCon con)
		      (CmmLit (mkIntCLit (dataConTagZ con)))
			-- emitAlgReturnCode requires zero-indexed tag

emitAlgReturnCode :: TyCon -> CmmExpr -> Code
-- emitAlgReturnCode is used both by emitKnownConReturnCode,
-- and by by PrimOps that return enumerated types (i.e.
-- all the comparison operators).
emitAlgReturnCode tycon tag
 =  do	{ case ctrlReturnConvAlg tycon of
	    VectoredReturn fam_sz -> do { tickyVectoredReturn fam_sz
	 			     	; emitVectoredReturnInstr tag }
	    UnvectoredReturn _    -> emitDirectReturnInstr 
	}


-- ---------------------------------------------------------------------------
-- Unboxed tuple returns

-- These are a bit like a normal tail call, except that:
--
--   - The tail-call target is an info table on the stack
--
--   - We separate stack arguments into pointers and non-pointers,
--     to make it easier to leave things in a sane state for a heap check.
--     This is OK because we can never partially-apply an unboxed tuple,
--     unlike a function.  The same technique is used when calling
--     let-no-escape functions, because they also can't be partially
--     applied.

returnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code
returnUnboxedTuple amodes
  = do 	{ eob@(EndOfBlockInfo args_sp sequel) <- getEndOfBlockInfo
	; tickyUnboxedTupleReturn (length amodes)
	; (final_sp, assts) <- pushUnboxedTuple args_sp amodes
	; emitSimultaneously assts
	; doFinalJump final_sp False{-not a LNE-} emitDirectReturnInstr }

pushUnboxedTuple :: VirtualSpOffset		-- Sp at which to start pushing
		 -> [(CgRep, CmmExpr)]		-- amodes of the components
		 -> FCode (VirtualSpOffset,	-- final Sp
			   CmmStmts)		-- assignments (regs+stack)

pushUnboxedTuple sp [] 
  = return (sp, noStmts)
pushUnboxedTuple sp amodes
  = do	{ let	(reg_arg_amodes, stk_arg_amodes) = assignReturnRegs amodes
	
		-- separate the rest of the args into pointers and non-pointers
		(ptr_args, nptr_args) = separateByPtrFollowness stk_arg_amodes
		reg_arg_assts = assignToRegs reg_arg_amodes
		
	    -- push ptrs, then nonptrs, on the stack
	; (ptr_sp,   ptr_assts)  <- mkStkAmodes sp ptr_args
	; (final_sp, nptr_assts) <- mkStkAmodes ptr_sp nptr_args

	; returnFC (final_sp,
	  	    reg_arg_assts `plusStmts` 
		    ptr_assts `plusStmts` nptr_assts) }
    
		  
-- -----------------------------------------------------------------------------
-- Returning unboxed tuples.  This is mainly to support _ccall_GC_, where
-- we want to do things in a slightly different order to normal:
-- 
-- 		- push return address
-- 		- adjust stack pointer
-- 		- r = call(args...)
-- 		- assign regs for unboxed tuple (usually just R1 = r)
-- 		- return to continuation
-- 
-- The return address (i.e. stack frame) must be on the stack before
-- doing the call in case the call ends up in the garbage collector.
-- 
-- Sadly, the information about the continuation is lost after we push it
-- (in order to avoid pushing it again), so we end up doing a needless
-- indirect jump (ToDo).

ccallReturnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code -> Code
ccallReturnUnboxedTuple amodes before_jump
  = do 	{ eob@(EndOfBlockInfo args_sp _) <- getEndOfBlockInfo

	-- Push a return address if necessary
	; pushReturnAddress eob
	; setEndOfBlockInfo (EndOfBlockInfo args_sp OnStack)
	    (do	{ adjustSpAndHp args_sp
		; before_jump
  		; returnUnboxedTuple amodes })
    }

-- -----------------------------------------------------------------------------
-- Calling an out-of-line primop

tailCallPrimOp :: PrimOp -> [StgArg] -> Code
tailCallPrimOp op args
 = do	{	-- We're going to perform a normal-looking tail call, 
		-- except that *all* the arguments will be in registers.
		-- Hence the ASSERT( null leftovers )
	  arg_amodes <- getArgAmodes args
	; let (arg_regs, leftovers) = assignPrimOpCallRegs arg_amodes
	      jump_to_primop = jumpToLbl (mkRtsPrimOpLabel op)

	; ASSERT(null leftovers) -- no stack-resident args
 	  emitSimultaneously (assignToRegs arg_regs)

	; EndOfBlockInfo args_sp _ <- getEndOfBlockInfo
	; doFinalJump args_sp False{-not a LNE-} jump_to_primop }

-- -----------------------------------------------------------------------------
-- Return Addresses

-- We always push the return address just before performing a tail call
-- or return.  The reason we leave it until then is because the stack
-- slot that the return address is to go into might contain something
-- useful.
-- 
-- If the end of block info is 'CaseAlts', then we're in the scrutinee of a
-- case expression and the return address is still to be pushed.
-- 
-- There are cases where it doesn't look necessary to push the return
-- address: for example, just before doing a return to a known
-- continuation.  However, the continuation will expect to find the
-- return address on the stack in case it needs to do a heap check.

pushReturnAddress :: EndOfBlockInfo -> Code

pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _ False))
  = do	{ sp_rel <- getSpRelOffset args_sp
	; stmtC (CmmStore sp_rel (mkLblExpr lbl)) }

-- For a polymorphic case, we have two return addresses to push: the case
-- return, and stg_seq_frame_info which turns a possible vectored return
-- into a direct one.
pushReturnAddress (EndOfBlockInfo args_sp sequel@(CaseAlts lbl _ _ True))
  = do	{ sp_rel <- getSpRelOffset (args_sp-1)
	; stmtC (CmmStore sp_rel (mkLblExpr lbl))
	; sp_rel <- getSpRelOffset args_sp
	; stmtC (CmmStore sp_rel (CmmLit (CmmLabel mkSeqInfoLabel))) }

pushReturnAddress _ = nopC

-- -----------------------------------------------------------------------------
-- Misc.

jumpToLbl :: CLabel -> Code
-- Passes no argument to the destination procedure
jumpToLbl lbl = stmtC (CmmJump (CmmLit (CmmLabel lbl)) [{- No args -}])

assignToRegs :: [(CmmExpr, GlobalReg)] -> CmmStmts
assignToRegs reg_args 
  = mkStmts [ CmmAssign (CmmGlobal reg_id) expr
	    | (expr, reg_id) <- reg_args ] 
\end{code}


%************************************************************************
%*									*
\subsection[CgStackery-adjust]{Adjusting the stack pointers}
%*									*
%************************************************************************

This function adjusts the stack and heap pointers just before a tail
call or return.  The stack pointer is adjusted to its final position
(i.e. to point to the last argument for a tail call, or the activation
record for a return).  The heap pointer may be moved backwards, in
cases where we overallocated at the beginning of the basic block (see
CgCase.lhs for discussion).

These functions {\em do not} deal with high-water-mark adjustment.
That's done by functions which allocate stack space.

\begin{code}
adjustSpAndHp :: VirtualSpOffset 	-- New offset for Arg stack ptr
	      -> Code
adjustSpAndHp newRealSp 
  = do	{ -- Adjust stack, if necessary.
	  -- NB: the conditional on the monad-carried realSp
	  --     is out of line (via codeOnly), to avoid a black hole
	; new_sp <- getSpRelOffset newRealSp
	; checkedAbsC (CmmAssign spReg new_sp)	-- Will generate no code in the case
	; setRealSp newRealSp			-- where realSp==newRealSp

	  -- Adjust heap.  The virtual heap pointer may be less than the real Hp
	  -- because the latter was advanced to deal with the worst-case branch
	  -- of the code, and we may be in a better-case branch.  In that case,
 	  -- move the real Hp *back* and retract some ticky allocation count.
	; hp_usg <- getHpUsage
	; let rHp = realHp hp_usg
	      vHp = virtHp hp_usg
	; new_hp <- getHpRelOffset vHp
	; checkedAbsC (CmmAssign hpReg new_hp)	-- Generates nothing when vHp==rHp
	; tickyAllocHeap (vHp - rHp)		-- ...ditto
	; setRealHp vHp
	}
\end{code}