summaryrefslogtreecommitdiff
path: root/compiler/codeGen
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2012-02-03 15:03:06 +0000
committerSimon Marlow <marlowsd@gmail.com>2012-02-08 15:50:43 +0000
commit76999b605423f530ec17562d772eda1c1672db53 (patch)
tree3ca1208d02ca753ffe07e13fe1bdbc1388d7e9cf /compiler/codeGen
parentcd38928495f9186646273432151259f3d654b7e2 (diff)
downloadhaskell-76999b605423f530ec17562d772eda1c1672db53.tar.gz
New stack layout algorithm
Also: - improvements to code generation: push slow-call continuations on the stack instead of generating explicit continuations - remove unused CmmInfo wrapper type (replace with CmmInfoTable) - squash Area and AreaId together, remove now-unused RegSlot - comment out old unused stack-allocation code that no longer compiles after removal of RegSlot
Diffstat (limited to 'compiler/codeGen')
-rw-r--r--compiler/codeGen/CgInfoTbls.hs28
-rw-r--r--compiler/codeGen/CgMonad.lhs4
-rw-r--r--compiler/codeGen/StgCmmBind.hs2
-rw-r--r--compiler/codeGen/StgCmmExpr.hs73
-rw-r--r--compiler/codeGen/StgCmmForeign.hs2
-rw-r--r--compiler/codeGen/StgCmmHeap.hs4
-rw-r--r--compiler/codeGen/StgCmmLayout.hs164
-rw-r--r--compiler/codeGen/StgCmmMonad.hs18
8 files changed, 222 insertions, 73 deletions
diff --git a/compiler/codeGen/CgInfoTbls.hs b/compiler/codeGen/CgInfoTbls.hs
index 25ba154d12..ed5c5261d7 100644
--- a/compiler/codeGen/CgInfoTbls.hs
+++ b/compiler/codeGen/CgInfoTbls.hs
@@ -67,10 +67,9 @@ emitClosureCodeAndInfoTable cl_info args body
-- Convert from 'ClosureInfo' to 'CmmInfo'.
-- Not used for return points. (The 'smRepClosureTypeInt' call would panic.)
-mkCmmInfo :: ClosureInfo -> FCode CmmInfo
+mkCmmInfo :: ClosureInfo -> FCode CmmInfoTable
mkCmmInfo cl_info
- = return (CmmInfo gc_target Nothing $
- CmmInfoTable { cit_lbl = infoTableLabelFromCI cl_info,
+ = return (CmmInfoTable { cit_lbl = infoTableLabelFromCI cl_info,
cit_rep = closureSMRep cl_info,
cit_prof = prof,
cit_srt = closureSRT cl_info })
@@ -80,14 +79,6 @@ mkCmmInfo cl_info
ty_descr_w8 = stringToWord8s (closureTypeDescr cl_info)
val_descr_w8 = stringToWord8s (closureValDescr cl_info)
- -- The gc_target is to inform the CPS pass when it inserts a stack check.
- -- Since that pass isn't used yet we'll punt for now.
- -- When the CPS pass is fully integrated, this should
- -- be replaced by the label that any heap check jumped to,
- -- so that branch can be shared by both the heap (from codeGen)
- -- and stack checks (from the CPS pass).
- gc_target = panic "TODO: gc_target"
-
-------------------------------------------------------------------------
--
-- Generating the info table and code for a return point
@@ -106,8 +97,7 @@ emitReturnTarget name stmts
; blks <- cgStmtsToBlocks stmts
; frame <- mkStackLayout
; let smrep = mkStackRep (mkLiveness frame)
- info = CmmInfo gc_target Nothing info_tbl
- info_tbl = CmmInfoTable { cit_lbl = info_lbl
+ info = CmmInfoTable { cit_lbl = info_lbl
, cit_prof = NoProfilingInfo
, cit_rep = smrep
, cit_srt = srt_info }
@@ -119,14 +109,6 @@ emitReturnTarget name stmts
info_lbl = mkReturnInfoLabel uniq
entry_lbl = mkReturnPtLabel uniq
- -- The gc_target is to inform the CPS pass when it inserts a stack check.
- -- Since that pass isn't used yet we'll punt for now.
- -- When the CPS pass is fully integrated, this should
- -- be replaced by the label that any heap check jumped to,
- -- so that branch can be shared by both the heap (from codeGen)
- -- and stack checks (from the CPS pass).
- gc_target = panic "TODO: gc_target"
-
-- Build stack layout information from the state of the 'FCode' monad.
-- Should go away once 'codeGen' starts using the CPS conversion
-- pass to handle the stack. Until then, this is really just
@@ -378,8 +360,8 @@ funInfoTable info_ptr
emitInfoTableAndCode
:: CLabel -- Label of entry or ret
- -> CmmInfo -- ...the info table
- -> [CmmFormal] -- ...args
+ -> CmmInfoTable -- ...the info table
+ -> [CmmFormal] -- ...args
-> [CmmBasicBlock] -- ...and body
-> Code
diff --git a/compiler/codeGen/CgMonad.lhs b/compiler/codeGen/CgMonad.lhs
index 59f6accf9d..6e164ce9ee 100644
--- a/compiler/codeGen/CgMonad.lhs
+++ b/compiler/codeGen/CgMonad.lhs
@@ -728,7 +728,7 @@ emitDecl decl
= do { state <- getState
; setState $ state { cgs_tops = cgs_tops state `snocOL` decl } }
-emitProc :: CmmInfo -> CLabel -> [CmmFormal] -> [CmmBasicBlock] -> Code
+emitProc :: CmmInfoTable -> CLabel -> [CmmFormal] -> [CmmBasicBlock] -> Code
emitProc info lbl [] blocks
= do { let proc_block = CmmProc info lbl (ListGraph blocks)
; state <- getState
@@ -740,7 +740,7 @@ emitSimpleProc :: CLabel -> Code -> Code
emitSimpleProc lbl code
= do { stmts <- getCgStmts code
; blks <- cgStmtsToBlocks stmts
- ; emitProc (CmmInfo Nothing Nothing CmmNonInfoTable) lbl [] blks }
+ ; emitProc CmmNonInfoTable lbl [] blks }
-- Get all the CmmTops (there should be no stmts)
-- Return a single Cmm which may be split from other Cmms by
diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs
index 724f28d142..0222299ff2 100644
--- a/compiler/codeGen/StgCmmBind.hs
+++ b/compiler/codeGen/StgCmmBind.hs
@@ -596,7 +596,7 @@ pushUpdateFrame es body
offset <- foldM push updfr es
withUpdFrameOff offset body
where push off e =
- do emitStore (CmmStackSlot (CallArea Old) base) e
+ do emitStore (CmmStackSlot Old base) e
return base
where base = off + widthInBytes (cmmExprWidth e)
diff --git a/compiler/codeGen/StgCmmExpr.hs b/compiler/codeGen/StgCmmExpr.hs
index 3b12b2a4b7..fe41de83fa 100644
--- a/compiler/codeGen/StgCmmExpr.hs
+++ b/compiler/codeGen/StgCmmExpr.hs
@@ -532,16 +532,9 @@ cgTailCall fun_id fun_info args = do
ReturnIt -> emitReturn [fun] -- ToDo: does ReturnIt guarantee tagged?
EnterIt -> ASSERT( null args ) -- Discarding arguments
- do { let entry = entryCode (closureInfoPtr fun)
- ; [ret,call] <- forkAlts [
- getCode $
- emitReturn [fun], -- Is tagged; no need to untag
- getCode $ do -- Not tagged
- emitCall (NativeNodeCall, NativeReturn) entry [fun]
- ]
- ; emit =<< mkCmmIfThenElse (cmmIsTagged fun) ret call }
-
- SlowCall -> do -- A slow function call via the RTS apply routines
+ emitEnter fun
+
+ SlowCall -> do -- A slow function call via the RTS apply routines
{ tickySlowCall lf_info args
; emitComment $ mkFastString "slowCall"
; slowCall fun args }
@@ -565,6 +558,66 @@ cgTailCall fun_id fun_info args = do
node_points = nodeMustPointToIt lf_info
+emitEnter :: CmmExpr -> FCode ()
+emitEnter fun = do
+ { adjustHpBackwards
+ ; sequel <- getSequel
+ ; updfr_off <- getUpdFrameOff
+ ; case sequel of
+ -- For a return, we have the option of generating a tag-test or
+ -- not. If the value is tagged, we can return directly, which
+ -- is quicker than entering the value. This is a code
+ -- size/speed trade-off: when optimising for speed rather than
+ -- size we could generate the tag test.
+ --
+ -- Right now, we do what the old codegen did, and omit the tag
+ -- test, just generating an enter.
+ Return _ -> do
+ { let entry = entryCode $ closureInfoPtr $ CmmReg nodeReg
+ ; emit $ mkForeignJump NativeNodeCall entry
+ [cmmUntag fun] updfr_off
+ }
+
+ -- The result will be scrutinised in the sequel. This is where
+ -- we generate a tag-test to avoid entering the closure if
+ -- possible.
+ --
+ -- The generated code will be something like this:
+ --
+ -- R1 = fun -- copyout
+ -- if (fun & 7 != 0) goto Lcall else goto Lret
+ -- Lcall:
+ -- call [fun] returns to Lret
+ -- Lret:
+ -- fun' = R1 -- copyin
+ -- ...
+ --
+ -- Note in particular that the label Lret is used as a
+ -- destination by both the tag-test and the call. This is
+ -- becase Lret will necessarily be a proc-point, and we want to
+ -- ensure that we generate only one proc-point for this
+ -- sequence.
+ --
+ AssignTo res_regs _ -> do
+ { lret <- newLabelC
+ ; lcall <- newLabelC
+ ; let area = Young lret
+ ; let (off, copyin) = copyInOflow NativeReturn area res_regs
+ (outArgs, copyout) = copyOutOflow NativeNodeCall Call area
+ [fun] updfr_off (0,[])
+ ; let entry = entryCode (closureInfoPtr fun)
+ the_call = toCall entry (Just lret) updfr_off off outArgs
+ ; emit $
+ copyout <*>
+ mkCbranch (cmmIsTagged fun) lret lcall <*>
+ outOfLine lcall the_call <*>
+ mkLabel lret <*>
+ copyin
+ }
+ }
+
+
+
{- Note [case on Bool]
~~~~~~~~~~~~~~~~~~~
A case on a Boolean value does two things:
diff --git a/compiler/codeGen/StgCmmForeign.hs b/compiler/codeGen/StgCmmForeign.hs
index f4be622092..3580481043 100644
--- a/compiler/codeGen/StgCmmForeign.hs
+++ b/compiler/codeGen/StgCmmForeign.hs
@@ -184,7 +184,7 @@ emitSaveThreadState :: BlockId -> FCode ()
emitSaveThreadState bid = do
-- CurrentTSO->stackobj->sp = Sp;
emitStore (cmmOffset (CmmLoad (cmmOffset stgCurrentTSO tso_stackobj) bWord) stack_SP)
- (CmmStackSlot (CallArea (Young bid)) (widthInBytes (typeWidth gcWord)))
+ (CmmStackSlot (Young bid) (widthInBytes (typeWidth gcWord)))
emit closeNursery
-- and save the current cost centre stack in the TSO when profiling:
when opt_SccProfilingOn $
diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs
index 2b0b6f895e..68d078fb28 100644
--- a/compiler/codeGen/StgCmmHeap.hs
+++ b/compiler/codeGen/StgCmmHeap.hs
@@ -418,8 +418,8 @@ altHeapCheck regs code
gc_call sp =
case rts_label regs of
- Just gc -> mkCall (CmmLit gc) (GC, GC) regs reg_exprs sp
- Nothing -> mkCall generic_gc (GC, GC) [] [] sp
+ Just gc -> mkCall (CmmLit gc) (GC, GC) regs reg_exprs sp (0,[])
+ Nothing -> mkCall generic_gc (GC, GC) [] [] sp (0,[])
rts_label [reg]
| isGcPtrType ty = Just (mkGcLabel "stg_gc_unpt_r1")
diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs
index 0299bc0f96..9ee9192794 100644
--- a/compiler/codeGen/StgCmmLayout.hs
+++ b/compiler/codeGen/StgCmmLayout.hs
@@ -15,7 +15,7 @@
module StgCmmLayout (
mkArgDescr,
- emitCall, emitReturn,
+ emitCall, emitReturn, adjustHpBackwards,
emitClosureProcAndInfoTable,
emitClosureAndInfoTable,
@@ -41,10 +41,12 @@ import StgCmmEnv
import StgCmmTicky
import StgCmmMonad
import StgCmmUtils
+import StgCmmProf
import MkGraph
import SMRep
import Cmm
+import CmmUtils
import CLabel
import StgSyn
import Id
@@ -53,6 +55,7 @@ import TyCon ( PrimRep(..) )
import BasicTypes ( Arity )
import DynFlags
import StaticFlags
+import Module
import Constants
import Util
@@ -93,10 +96,31 @@ emitCall convs@(callConv, _) fun args
; updfr_off <- getUpdFrameOff
; emitComment $ mkFastString ("emitCall: " ++ show sequel)
; case sequel of
- Return _ -> emit (mkForeignJump callConv fun args updfr_off)
- AssignTo res_regs _ -> emit =<< mkCall fun convs res_regs args updfr_off
+ Return _ ->
+ emit $ mkForeignJump callConv fun args updfr_off
+ AssignTo res_regs _ ->
+ emit =<< mkCall fun convs res_regs args updfr_off (0,[])
}
+emitCallWithExtraStack
+ :: (Convention, Convention) -> CmmExpr -> [CmmExpr]
+ -> (ByteOff, [(CmmExpr,ByteOff)]) -> FCode ()
+-- (cgCall fun args) makes a call to the entry-code of 'fun',
+-- passing 'args', and returning the results to the current sequel
+emitCallWithExtraStack convs@(callConv, _) fun args extra_stack
+ = do { adjustHpBackwards
+ ; sequel <- getSequel
+ ; updfr_off <- getUpdFrameOff
+ ; emitComment $ mkFastString ("emitCallWithExtraStack: " ++ show sequel)
+ ; case sequel of
+ Return _ ->
+ emit $ mkForeignJumpExtra callConv fun args updfr_off extra_stack
+ AssignTo res_regs _ -> do
+ emit =<< mkCall fun convs res_regs args updfr_off extra_stack
+ }
+
+
+
adjustHpBackwards :: FCode ()
-- This function adjusts and heap pointers just before a tail call or
-- return. At a call or return, the virtual heap pointer may be less
@@ -128,6 +152,19 @@ adjustHpBackwards
-- Making calls: directCall and slowCall
-------------------------------------------------------------------------
+-- General plan is:
+-- - we'll make *one* fast call, either to the function itself
+-- (directCall) or to stg_ap_<pat>_fast (slowCall)
+-- Any left-over arguments will be pushed on the stack,
+--
+-- e.g. Sp[old+8] = arg1
+-- Sp[old+16] = arg2
+-- Sp[old+32] = stg_ap_pp_info
+-- R2 = arg3
+-- R3 = arg4
+-- call f() return to Nothing updfr_off: 32
+
+
directCall :: CLabel -> Arity -> [StgArg] -> FCode ()
-- (directCall f n args)
-- calls f(arg1, ..., argn), and applies the result to the remaining args
@@ -140,8 +177,19 @@ directCall lbl arity stg_args
slowCall :: CmmExpr -> [StgArg] -> FCode ()
-- (slowCall fun args) applies fun to args, returning the results to Sequel
slowCall fun stg_args
- = do { cmm_args <- getNonVoidArgAmodes stg_args
- ; slow_call fun cmm_args (argsReps stg_args) }
+ = do { dflags <- getDynFlags
+ ; cmm_args <- getNonVoidArgAmodes stg_args
+ ; let platform = targetPlatform dflags
+ ; call <- getCode $ direct_call "slow_call"
+ (mkRtsApFastLabel rts_fun) arity cmm_args reps
+ ; emitComment $ mkFastString ("slow_call for " ++
+ showSDoc (pprPlatform platform fun) ++
+ " with pat " ++ showSDoc (ftext rts_fun))
+ ; emit (mkAssign nodeReg fun <*> call)
+ }
+ where
+ reps = argsReps stg_args
+ (rts_fun, arity) = slowCallPattern reps
--------------
direct_call :: String -> CLabel -> Arity -> [CmmExpr] -> [ArgRep] -> FCode ()
@@ -149,7 +197,7 @@ direct_call :: String -> CLabel -> Arity -> [CmmExpr] -> [ArgRep] -> FCode ()
-- the args exclude the void ones
-- NB2: 'arity' refers to the *reps*
direct_call caller lbl arity args reps
- | debugIsOn && arity > length reps -- Too few args
+ | debugIsOn && arity > length reps -- Too few args
= do -- Caller should ensure that there enough args!
dflags <- getDynFlags
let platform = targetPlatform dflags
@@ -157,33 +205,77 @@ direct_call caller lbl arity args reps
<+> pprPlatform platform lbl <+> ppr (length reps)
<+> pprPlatform platform args <+> ppr reps )
- | null rest_reps -- Precisely the right number of arguments
+
+ | null rest_reps -- Precisely the right number of arguments
= emitCall (NativeDirectCall, NativeReturn) target args
- | otherwise -- Over-saturated call
+ | otherwise -- Note [over-saturated calls]
= ASSERT( arity == length initial_reps )
- do { pap_id <- newTemp gcWord
- ; withSequel (AssignTo [pap_id] True)
- (emitCall (NativeDirectCall, NativeReturn) target fast_args)
- ; slow_call (CmmReg (CmmLocal pap_id))
- rest_args rest_reps }
+ emitCallWithExtraStack (NativeDirectCall, NativeReturn)
+ target fast_args (mkStkOffsets stack_args)
where
target = CmmLit (CmmLabel lbl)
(initial_reps, rest_reps) = splitAt arity reps
arg_arity = count isNonV initial_reps
(fast_args, rest_args) = splitAt arg_arity args
-
---------------
-slow_call :: CmmExpr -> [CmmExpr] -> [ArgRep] -> FCode ()
-slow_call fun args reps
- = do dflags <- getDynFlags
- let platform = targetPlatform dflags
- call <- getCode $ direct_call "slow_call" (mkRtsApFastLabel rts_fun) arity args reps
- emitComment $ mkFastString ("slow_call for " ++ showSDoc (pprPlatform platform fun) ++
- " with pat " ++ showSDoc (ftext rts_fun))
- emit (mkAssign nodeReg fun <*> call)
+ stack_args = slowArgs (zip rest_reps rest_args)
+
+
+{-
+Note [over-saturated calls]
+
+The natural thing to do for an over-saturated call would be to call
+the function with the correct number of arguments, and then apply the
+remaining arguments to the value returned, e.g.
+
+ f a b c d (where f has arity 2)
+ -->
+ r = call f(a,b)
+ call r(c,d)
+
+but this entails
+ - saving c and d on the stack
+ - making a continuation info table
+ - at the continuation, loading c and d off the stack into regs
+ - finally, call r
+
+Note that since there are a fixed number of different r's
+(e.g. stg_ap_pp_fast), we can also pre-compile continuations
+that correspond to each of them, rather than generating a fresh
+one for each over-saturated call.
+
+Not only does this generate much less code, it is faster too. We will
+generate something like:
+
+Sp[old+16] = c
+Sp[old+24] = d
+Sp[old+32] = stg_ap_pp_info
+call f(a,b) -- usual calling convention
+
+For the purposes of the CmmCall node, we count this extra stack as
+just more arguments that we are passing on the stack (cml_args).
+-}
+
+-- | 'slowArgs' takes a list of function arguments and prepares them for
+-- pushing on the stack for "extra" arguments to a function which requires
+-- fewer arguments than we currently have.
+slowArgs :: [(ArgRep,CmmExpr)] -> [(ArgRep,CmmExpr)]
+slowArgs [] = []
+slowArgs amodes
+ | opt_SccProfilingOn = save_cccs ++ this_pat ++ slowArgs rest
+ | otherwise = this_pat ++ slowArgs rest
where
- (rts_fun, arity) = slowCallPattern reps
+ (arg_pat, args, rest) = matchSlowPattern amodes
+ stg_ap_pat = mkCmmRetInfoLabel rtsPackageId arg_pat
+ this_pat = (N, mkLblExpr stg_ap_pat) : args
+ save_cccs = [(N, mkLblExpr save_cccs_lbl), (N, curCCS)]
+ save_cccs_lbl = mkCmmRetInfoLabel rtsPackageId (fsLit "stg_restore_cccs")
+
+matchSlowPattern :: [(ArgRep,CmmExpr)]
+ -> (FastString, [(ArgRep,CmmExpr)], [(ArgRep,CmmExpr)])
+matchSlowPattern amodes = (arg_pat, these, rest)
+ where (arg_pat, n) = slowCallPattern (map fst amodes)
+ (these, rest) = splitAt n amodes
-- These cases were found to cover about 99% of all slow calls:
slowCallPattern :: [ArgRep] -> (FastString, Arity)
@@ -206,6 +298,30 @@ slowCallPattern [] = (fsLit "stg_ap_0", 0)
-------------------------------------------------------------------------
+-- Fix the byte-offsets of a bunch of things to push on the stack
+
+-- This is used for pushing slow-call continuations.
+-- See Note [over-saturated calls].
+
+mkStkOffsets
+ :: [(ArgRep,CmmExpr)] -- things to make offsets for
+ -> ( ByteOff -- OUTPUTS: Topmost allocated word
+ , [(CmmExpr, ByteOff)] ) -- things with offsets (voids filtered out)
+mkStkOffsets things
+ = loop 0 [] (reverse things)
+ where
+ loop offset offs [] = (offset,offs)
+ loop offset offs ((V,_):things) = loop offset offs things
+ -- ignore Void arguments
+ loop offset offs ((rep,thing):things)
+ = loop thing_off ((thing, thing_off):offs) things
+ where
+ thing_off = offset + argRepSizeW rep * wORD_SIZE
+ -- offset of thing is offset+size, because we're
+ -- growing the stack *downwards* as the offsets increase.
+
+
+-------------------------------------------------------------------------
-- Classifying arguments: ArgRep
-------------------------------------------------------------------------
diff --git a/compiler/codeGen/StgCmmMonad.hs b/compiler/codeGen/StgCmmMonad.hs
index 6c5ab4c692..c64df7ecc5 100644
--- a/compiler/codeGen/StgCmmMonad.hs
+++ b/compiler/codeGen/StgCmmMonad.hs
@@ -611,7 +611,7 @@ emitLabel :: BlockId -> FCode ()
emitLabel id = emitCgStmt (CgLabel id)
emitComment :: FastString -> FCode ()
-#ifdef DEBUG
+#if 0 /* def DEBUG */
emitComment s = emitCgStmt (CgStmt (CmmComment s))
#else
emitComment s = return ()
@@ -688,20 +688,18 @@ mkCmmIfThen e tbranch = do
mkCall :: CmmExpr -> (Convention, Convention) -> [CmmFormal] -> [CmmActual]
- -> UpdFrameOffset -> FCode CmmAGraph
-mkCall f (callConv, retConv) results actuals updfr_off = do
+ -> UpdFrameOffset -> (ByteOff,[(CmmExpr,ByteOff)]) -> FCode CmmAGraph
+mkCall f (callConv, retConv) results actuals updfr_off extra_stack = do
k <- newLabelC
- let area = CallArea $ Young k
+ let area = Young k
(off, copyin) = copyInOflow retConv area results
- copyout = lastWithArgs Call area callConv actuals updfr_off
- (toCall f (Just k) updfr_off off)
+ copyout = mkCallReturnsTo f callConv actuals k off updfr_off extra_stack
return (copyout <*> mkLabel k <*> copyin)
-
mkCmmCall :: CmmExpr -> [CmmFormal] -> [CmmActual] -> UpdFrameOffset
-> FCode CmmAGraph
-mkCmmCall f results actuals
- = mkCall f (NativeDirectCall, NativeReturn) results actuals
+mkCmmCall f results actuals updfr_off
+ = mkCall f (NativeDirectCall, NativeReturn) results actuals updfr_off (0,[])
mkSafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual]
@@ -710,7 +708,7 @@ mkSafeCall :: ForeignTarget -> [CmmFormal] -> [CmmActual]
mkSafeCall t fs as upd i = do
k <- newLabelC
return
- ( mkStore (CmmStackSlot (CallArea (Young k)) (widthInBytes wordWidth))
+ ( mkStore (CmmStackSlot (Young k) (widthInBytes wordWidth))
(CmmLit (CmmBlock k))
<*> mkLast (CmmForeignCall {tgt=t, res=fs, args=as, succ=k, updfr=upd, intrbl=i})
<*> mkLabel k)