diff options
| author | hwloidl <unknown> | 2000-01-13 14:34:09 +0000 | 
|---|---|---|
| committer | hwloidl <unknown> | 2000-01-13 14:34:09 +0000 | 
| commit | 1b28d4e1f43185ad8c8e7407c66413e1b358402b (patch) | |
| tree | 9a69069b4adaf380e7bad6827e2e248b29f96024 | |
| parent | d3d20ba70003e869af4d9f44d70d1d403d131812 (diff) | |
| download | haskell-1b28d4e1f43185ad8c8e7407c66413e1b358402b.tar.gz | |
[project @ 2000-01-13 14:33:57 by hwloidl]
Merged GUM-4-04 branch into the main trunk. In particular merged GUM and
SMP code. Most of the GranSim code in GUM-4-04 still has to be carried over.
77 files changed, 19182 insertions, 521 deletions
diff --git a/ghc/compiler/absCSyn/Costs.lhs b/ghc/compiler/absCSyn/Costs.lhs index 24563c7bb0..7bbadff4f1 100644 --- a/ghc/compiler/absCSyn/Costs.lhs +++ b/ghc/compiler/absCSyn/Costs.lhs @@ -1,7 +1,9 @@  % -% (c) The GRASP/AQUA Project, Glasgow University, 1994-1998 -%     Hans Wolfgang Loidl +% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998  % +% $Id: Costs.lhs,v 1.20 2000/01/13 14:33:57 hwloidl Exp $ +% +% Only needed in a GranSim setup -- HWL  % ---------------------------------------------------------------------------  \section[Costs]{Evaluating the costs of computing some abstract C code} @@ -28,9 +30,11 @@ The meaning of the result tuple is:     instructions.  \end{itemize} -This function is needed in GrAnSim for parallelism. +This function is needed in GranSim for costing pieces of abstract C. -These are first suggestions for scaling the costs. But, this scaling should be done in the RTS rather than the compiler (this really should be tunable!): +These are first suggestions for scaling the costs. But, this scaling should +be done in the RTS rather than the compiler (this really should be +tunable!):  \begin{pseudocode} @@ -82,6 +86,7 @@ instance Num CostRes where   negate	 = mapOp negate   abs	 = mapOp abs   signum	 = mapOp signum + fromInteger _ = error "fromInteger not defined"  mapOp :: (Int -> Int) -> CostRes -> CostRes  mapOp g ( Cost (i, b, l, s, f) )  = Cost (g i, g b, g l, g s, g f) @@ -202,7 +207,10 @@ costs absC =     CSimultaneous absC	     -> costs absC -   CCheck _ amodes code	     -> Cost (2, 1, 0, 0, 0) +   CCheck _ amodes code	     -> Cost (2, 1, 0, 0, 0) -- ToDo: refine this by  +                                                     -- looking at the first arg  + +   CRetDirect _ _ _ _	     -> nullCosts     CMacroStmt	macro modes  -> stmtMacroCosts macro modes @@ -215,19 +223,28 @@ costs absC =    -- *** the next three [or so...] are DATA (those above are CODE) ***    -- as they are data rather than code they all have nullCosts	       -- HWL +   CCallTypedef _ _ _ _      -> nullCosts +     CStaticClosure _ _ _ _    -> nullCosts -   CClosureInfoAndCode _ _ _ _ -> nullCosts +   CSRT _ _                  -> nullCosts -   CRetDirect _ _ _ _	     -> nullCosts +   CBitmap _ _               -> nullCosts + +   CClosureInfoAndCode _ _ _ _ -> nullCosts     CRetVector _ _ _ _        -> nullCosts +   CClosureTbl _             -> nullCosts +     CCostCentreDecl _ _	     -> nullCosts +     CCostCentreStackDecl _    -> nullCosts     CSplitMarker		     -> nullCosts +   _ -> trace ("Costs.costs") nullCosts +  -- ---------------------------------------------------------------------------  addrModeCosts :: CAddrMode -> Side -> CostRes @@ -242,7 +259,11 @@ addrModeCosts addr_mode side =      CVal _ _ -> if lhs then Cost (0, 0, 0, 1, 0)  		       else Cost (0, 0, 1, 0, 0) -    CReg _   -> nullCosts	 {- loading from, storing to reg is free ! -} +    CAddr (CIndex _ n _ ) -> Cost (1, 0, 1, 0, 0) -- does pointer arithmetic + +    CAddr _ -> nullCosts + +    CReg _  -> nullCosts	 {- loading from, storing to reg is free ! -}  				 {- for costing CReg->Creg ops see special -}  				 {- case in costs fct -} @@ -277,6 +298,8 @@ addrModeCosts addr_mode side =      CMacroExpr _ macro mode_list -> exprMacroCosts side macro mode_list +    _ -> trace ("Costs.addrModeCosts") nullCosts +  -- ---------------------------------------------------------------------------  exprMacroCosts :: Side -> CExprMacro -> [CAddrMode] -> CostRes @@ -288,10 +311,11 @@ exprMacroCosts side macro mode_list =    in    arg_costs +    case macro of -    ENTRY_CODE -> nullCosts -    ARG_TAG -> nullCosts -- XXX -    GET_TAG -> nullCosts -- XXX -     +    ENTRY_CODE -> nullCosts -- nothing  +    ARG_TAG -> nullCosts -- nothing +    GET_TAG -> Cost (0, 0, 1, 0, 0)  -- indirect load +    UPD_FRAME_UPDATEE -> Cost (0, 0, 1, 0, 0)  -- indirect load +    _ -> trace ("Costs.exprMacroCosts") nullCosts  -- --------------------------------------------------------------------------- @@ -309,7 +333,9 @@ stmtMacroCosts macro modes =      UPD_CAF		  ->  Cost (7, 0, 1, 3, 0)	 {- SMupdate.lh	 -}      UPD_BH_UPDATABLE	  ->  Cost (3, 0, 0, 1, 0)	 {- SMupdate.lh	 -}      UPD_BH_SINGLE_ENTRY	  ->  Cost (3, 0, 0, 1, 0)	 {- SMupdate.lh	 -} -    PUSH_UPD_FRAME	  ->  Cost (3, 0, 0, 4, 0)	 {- SMupdate.lh	 -} +    PUSH_UPD_FRAME	  ->  Cost (3, 0, 0, 4, 0)	 {- Updates.h	 -} +    PUSH_SEQ_FRAME	  ->  Cost (2, 0, 0, 3, 0)	 {- StgMacros.h	 !-} +    UPDATE_SU_FROM_UPD_FRAME -> Cost (1, 0, 1, 0, 0)     {- StgMacros.h	 !-}      SET_TAG		  ->  nullCosts		    {- COptRegs.lh -}      GRAN_FETCH			->  nullCosts	  {- GrAnSim bookkeeping -}      GRAN_RESCHEDULE		->  nullCosts	  {- GrAnSim bookkeeping -} diff --git a/ghc/compiler/basicTypes/Module.lhs b/ghc/compiler/basicTypes/Module.lhs index 40c25f5778..af634fd86a 100644 --- a/ghc/compiler/basicTypes/Module.lhs +++ b/ghc/compiler/basicTypes/Module.lhs @@ -285,7 +285,8 @@ getAllFilesMatching :: SearchPath  		    -> (ModuleHiMap, ModuleHiMap)  		    -> (FilePath, String)   		    -> IO (ModuleHiMap, ModuleHiMap) -getAllFilesMatching dirs hims (dir_path, suffix) = ( do +getAllFilesMatching dirs hims (dir_path, suffix) =  + do      -- fpaths entries do not have dir_path prepended    fpaths  <- getDirectoryContents dir_path    is_dll <- catch @@ -297,7 +298,7 @@ getAllFilesMatching dirs hims (dir_path, suffix) = ( do  		)  		(\ _ {-don't care-} -> return NotDll)    return (foldl (addModules is_dll) hims fpaths) -  )  -- soft failure +  -- soft failure        `catch`           (\ err -> do  	      hPutStrLn stderr diff --git a/ghc/compiler/codeGen/CgCase.lhs b/ghc/compiler/codeGen/CgCase.lhs index 4e755ca7c1..e358b9bf55 100644 --- a/ghc/compiler/codeGen/CgCase.lhs +++ b/ghc/compiler/codeGen/CgCase.lhs @@ -1,7 +1,7 @@  %  % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998  % -% $Id: CgCase.lhs,v 1.36 1999/11/01 17:10:06 simonpj Exp $ +% $Id: CgCase.lhs,v 1.37 2000/01/13 14:33:57 hwloidl Exp $  %  %********************************************************  %*							* @@ -602,9 +602,10 @@ cgAlgDefault gc_flag is_fun uniq cc_slot must_label_branch    = 	-- We have arranged that Node points to the thing      restoreCurrentCostCentre cc_slot `thenFC` \restore_cc ->      getAbsC (absC restore_cc `thenC` -             (if opt_GranMacros && emit_yield -                then yield [node] False -                else absC AbsCNop)                            `thenC`      +             -- HWL: maybe need yield here +             --(if emit_yield +             --   then yield [node] True +             --   else absC AbsCNop)                            `thenC`       	     possibleHeapCheck gc_flag is_fun [node] [] Nothing (cgExpr rhs)  	-- Node is live, but doesn't need to point at the thing itself;  	-- it's ok for Node to point to an indirection or FETCH_ME @@ -633,9 +634,10 @@ cgAlgAlt gc_flag uniq cc_slot must_label_branch    =       restoreCurrentCostCentre cc_slot `thenFC` \restore_cc ->      getAbsC (absC restore_cc `thenC` -    	     (if opt_GranMacros && emit_yield -      		then yield [node] True		-- XXX live regs wrong -      		else absC AbsCNop)                               `thenC`      +             -- HWL: maybe need yield here +    	     -- (if emit_yield +      	     --    then yield [node] True		-- XXX live regs wrong +      	     --    else absC AbsCNop)                               `thenC`          	     (case gc_flag of  		NoGC   	    -> mapFCs bindNewToTemp args `thenFC` \_ -> nopC      		GCMayHappen -> bindConArgs con args @@ -667,9 +669,10 @@ cgUnboxedTupleAlt lbl cc_slot emit_yield (con,args,use_mask,rhs)          restoreCurrentCostCentre cc_slot `thenFC` \restore_cc ->  	absC restore_cc `thenC` -  	(if opt_GranMacros && emit_yield -  	    then yield live_regs True		-- XXX live regs wrong? -  	    else absC AbsCNop)                         `thenC`      +        -- HWL: maybe need yield here +  	-- (if emit_yield +  	--    then yield live_regs True		-- XXX live regs wrong? +  	--    else absC AbsCNop)                         `thenC`         	let   	      -- ToDo: could maybe use Nothing here if stack_res is False  	      -- since the heap-check can just return to the top of the  diff --git a/ghc/compiler/codeGen/CgClosure.lhs b/ghc/compiler/codeGen/CgClosure.lhs index c40320cc95..1b80beaf40 100644 --- a/ghc/compiler/codeGen/CgClosure.lhs +++ b/ghc/compiler/codeGen/CgClosure.lhs @@ -1,7 +1,7 @@  %  % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998  % -% $Id: CgClosure.lhs,v 1.38 1999/11/11 17:50:49 simonpj Exp $ +% $Id: CgClosure.lhs,v 1.39 2000/01/13 14:33:58 hwloidl Exp $  %  \section[CgClosure]{Code generation for closures} @@ -40,7 +40,8 @@ import CgUsages		( adjustSpAndHp, setRealAndVirtualSp, getVirtSp,  			  getSpRelOffset, getHpRelOffset  			)  import CLabel		( CLabel, mkClosureLabel, mkFastEntryLabel, -			  mkRednCountsLabel, mkInfoTableLabel +			  mkRednCountsLabel, mkInfoTableLabel, +                          pprCLabel  			)  import ClosureInfo	-- lots and lots of stuff  import CmdLineOpts	( opt_GranMacros, opt_SccProfilingOn, opt_DoTickyProfiling ) @@ -325,7 +326,12 @@ closureCodeBody binder_info closure_info cc all_args body  	--      	arg_regs = case entry_conv of  		DirectEntry lbl arity regs -> regs -		other 		            -> panic "closureCodeBody:arg_regs" +		other 		           -> trace ("*** closureCodeBody:arg_regs " ++ (pprHWL entry_conv) ++ "(HWL ignored; no args passed in regs)") [] + +        pprHWL :: EntryConvention -> String     +        pprHWL (ViaNode) = "ViaNode" +        pprHWL (StdEntry cl) = "StdEntry" +        pprHWL (DirectEntry cl i l) = "DirectEntry"  	num_arg_regs = length arg_regs @@ -350,7 +356,7 @@ closureCodeBody binder_info closure_info cc all_args body  	    mapCs bindNewToStack arg_offsets	  	    `thenC`  	    setRealAndVirtualSp sp_all_args		    `thenC` -	    argSatisfactionCheck closure_info		    `thenC` +	    argSatisfactionCheck closure_info	arg_regs	    `thenC`  	    -- OK, so there are enough args.  Now we need to stuff as  	    -- many of them in registers as the fast-entry code @@ -516,24 +522,24 @@ relative offset of this word tells how many words of arguments  are expected.  \begin{code} -argSatisfactionCheck :: ClosureInfo -> Code +argSatisfactionCheck :: ClosureInfo -> [MagicId] {-GRAN-} -> Code -argSatisfactionCheck closure_info +argSatisfactionCheck closure_info arg_regs    = nodeMustPointToIt (closureLFInfo closure_info)   `thenFC` \ node_points -> -    let -       emit_gran_macros = opt_GranMacros -    in +--      let +--         emit_gran_macros = opt_GranMacros +--      in      -- HWL  ngo' ngoq:      -- absC (CMacroStmt GRAN_FETCH []) 			`thenC`      -- forceHeapCheck [] node_points (absC AbsCNop)			`thenC` -    (if emit_gran_macros  -      then if node_points  -             then fetchAndReschedule  [] node_points  -             else yield [] node_points -      else absC AbsCNop)                       `thenC` +    --(if opt_GranMacros +    --  then if node_points  +    --         then fetchAndReschedule  arg_regs node_points  +    --         else yield arg_regs node_points +    --  else absC AbsCNop)                       `thenC`          getSpRelOffset 0 	`thenFC` \ (SpRel sp) ->  	let @@ -565,16 +571,13 @@ thunkWrapper closure_info lbl thunk_code    = 	-- Stack and heap overflow checks      nodeMustPointToIt (closureLFInfo closure_info) `thenFC` \ node_points -> -    let -       emit_gran_macros = opt_GranMacros -    in -	-- HWL: insert macros for GrAnSim; 2 versions depending on liveness of node -	-- (we prefer fetchAndReschedule-style context switches to yield ones) -    (if emit_gran_macros  -      then if node_points  -             then fetchAndReschedule  [] node_points  -             else yield [] node_points -      else absC AbsCNop)                       `thenC` +    -- HWL: insert macros for GrAnSim; 2 versions depending on liveness of node +    -- (we prefer fetchAndReschedule-style context switches to yield ones) +    (if opt_GranMacros +       then if node_points  +              then fetchAndReschedule [] node_points  +              else yield [] node_points +       else absC AbsCNop)                       `thenC`          -- stack and/or heap checks      thunkChecks lbl node_points ( @@ -597,13 +600,10 @@ funWrapper :: ClosureInfo 	-- Closure whose code body this is  funWrapper closure_info arg_regs stk_tags info_label fun_body    = 	-- Stack overflow check      nodeMustPointToIt (closureLFInfo closure_info)  	`thenFC` \ node_points -> -    let -       emit_gran_macros = opt_GranMacros -    in      -- HWL   chu' ngoq: -    (if emit_gran_macros -      then yield  arg_regs node_points -      else absC AbsCNop)                                 `thenC` +    (if opt_GranMacros +       then yield arg_regs node_points +       else absC AbsCNop)                                 `thenC`          -- heap and/or stack checks      fastEntryChecks arg_regs stk_tags info_label node_points ( diff --git a/ghc/compiler/codeGen/CgHeapery.lhs b/ghc/compiler/codeGen/CgHeapery.lhs index a4f6bc238c..566cfcbbdf 100644 --- a/ghc/compiler/codeGen/CgHeapery.lhs +++ b/ghc/compiler/codeGen/CgHeapery.lhs @@ -1,7 +1,7 @@  %  % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998  % -% $Id: CgHeapery.lhs,v 1.19 1999/10/13 16:39:15 simonmar Exp $ +% $Id: CgHeapery.lhs,v 1.20 2000/01/13 14:33:58 hwloidl Exp $  %  \section[CgHeapery]{Heap management functions} @@ -32,7 +32,7 @@ import ClosureInfo	( closureSize, closureGoodStuffSize,  			)  import PrimRep		( PrimRep(..), isFollowableRep )  import Unique		( Unique ) -import CmdLineOpts	( opt_SccProfilingOn ) +import CmdLineOpts	( opt_SccProfilingOn, opt_GranMacros )  import GlaExts  import Outputable @@ -78,6 +78,10 @@ fastEntryChecks regs tags ret node_points code       getTickyCtrLabel `thenFC` \ ticky_ctr ->       ( if all_pointers then -- heap checks are quite easy +          -- HWL: gran-yield immediately before heap check proper +          --(if node `elem` regs +          --   then yield regs True +          --   else absC AbsCNop ) `thenC`  	  absC (checking_code stk_words hp_words tag_assts   			free_reg (length regs) ticky_ctr) @@ -382,22 +386,22 @@ mkRegLiveness (VanillaReg rep i : regs) | isFollowableRep rep    =  ((int2Word# 1#) `shiftL#` (i -# 1#)) `or#` mkRegLiveness regs  mkRegLiveness (_ : regs)  =  mkRegLiveness regs +-- The two functions below are only used in a GranSim setup  -- Emit macro for simulating a fetch and then reschedule  fetchAndReschedule ::   [MagicId]               -- Live registers  			-> Bool                 -- Node reqd?  			-> Code -fetchAndReschedule regs node_reqd  = +fetchAndReschedule regs node_reqd  =         if (node `elem` regs || node_reqd)  	then fetch_code `thenC` reschedule_code  	else absC AbsCNop        where  	all_regs = if node_reqd then node:regs else regs -	liveness_mask = 0 {-XXX: mkLiveRegsMask all_regs-} - +        liveness_mask = mkRegLiveness regs  	reschedule_code = absC  (CMacroStmt GRAN_RESCHEDULE [ -				 mkIntCLit liveness_mask, +                                 mkIntCLit (IBOX(word2Int# liveness_mask)),   				 mkIntCLit (if node_reqd then 1 else 0)])  	 --HWL: generate GRAN_FETCH macro for GrAnSim @@ -423,15 +427,16 @@ yield ::   [MagicId]               -- Live registers               -> Bool                 -- Node reqd?               -> Code  -yield regs node_reqd = -      -- NB: node is not alive; that's why we use DO_YIELD rather than  -      --     GRAN_RESCHEDULE  -      yield_code -      where -        all_regs = if node_reqd then node:regs else regs -        liveness_mask = 0 {-XXX: mkLiveRegsMask all_regs-} - -        yield_code = absC (CMacroStmt GRAN_YIELD [mkIntCLit liveness_mask]) +yield regs node_reqd =  +   if opt_GranMacros && node_reqd +     then yield_code +     else absC AbsCNop +   where +     -- all_regs = if node_reqd then node:regs else regs +     liveness_mask = mkRegLiveness regs +     yield_code =  +       absC (CMacroStmt GRAN_YIELD  +                          [mkIntCLit (IBOX(word2Int# liveness_mask))])  \end{code}  %************************************************************************ diff --git a/ghc/compiler/codeGen/CgStackery.lhs b/ghc/compiler/codeGen/CgStackery.lhs index d97476e345..33a873a5c7 100644 --- a/ghc/compiler/codeGen/CgStackery.lhs +++ b/ghc/compiler/codeGen/CgStackery.lhs @@ -1,7 +1,7 @@  %  % (c) The GRASP/AQUA Project, Glasgow University, 1992-1998  % -% $Id: CgStackery.lhs,v 1.12 1999/06/24 13:04:20 simonmar Exp $ +% $Id: CgStackery.lhs,v 1.13 2000/01/13 14:33:58 hwloidl Exp $  %  \section[CgStackery]{Stack management functions} @@ -25,9 +25,10 @@ import AbsCSyn  import CgUsages		( getRealSp )  import AbsCUtils	( mkAbstractCs, mkAbsCStmts, getAmodeRep )  import PrimRep		( getPrimRepSize, PrimRep(..), isFollowableRep ) -import CmdLineOpts	( opt_SccProfilingOn ) +import CmdLineOpts	( opt_SccProfilingOn, opt_GranMacros )  import Panic		( panic ) -import Constants	( uF_SIZE, sCC_UF_SIZE, sEQ_FRAME_SIZE, sCC_SEQ_FRAME_SIZE ) +import Constants	( uF_SIZE, sCC_UF_SIZE, gRAN_UF_SIZE,  +			  sEQ_FRAME_SIZE, sCC_SEQ_FRAME_SIZE, gRAN_SEQ_FRAME_SIZE )  import IOExts		( trace )  \end{code} @@ -224,11 +225,13 @@ getFinalStackHW fcode info_down (MkCgState absC binds usages) = state1  \end{code}  \begin{code} -updateFrameSize | opt_SccProfilingOn = sCC_UF_SIZE -		| otherwise          = uF_SIZE +updateFrameSize | opt_SccProfilingOn = trace ("updateFrameSize = " ++ (show sCC_UF_SIZE)) sCC_UF_SIZE +		| opt_GranMacros     = trace ("updateFrameSize = " ++ (show gRAN_UF_SIZE))gRAN_UF_SIZE +		| otherwise          = trace ("updateFrameSize = " ++ (show uF_SIZE)) uF_SIZE  seqFrameSize    | opt_SccProfilingOn  = sCC_SEQ_FRAME_SIZE -	        | otherwise           = sEQ_FRAME_SIZE +	        | opt_GranMacros      = gRAN_SEQ_FRAME_SIZE +		| otherwise           = sEQ_FRAME_SIZE  \end{code}			  %************************************************************************ diff --git a/ghc/compiler/main/Constants.lhs b/ghc/compiler/main/Constants.lhs index ae358e26a4..14f466777c 100644 --- a/ghc/compiler/main/Constants.lhs +++ b/ghc/compiler/main/Constants.lhs @@ -34,6 +34,7 @@ module Constants (  	uF_SIZE,  	sCC_UF_SIZE, +	gRAN_UF_SIZE,  -- HWL  	uF_RET,  	uF_SU,  	uF_UPDATEE, @@ -41,6 +42,7 @@ module Constants (  	sEQ_FRAME_SIZE,  	sCC_SEQ_FRAME_SIZE, +	gRAN_SEQ_FRAME_SIZE, -- HWL  	mAX_Vanilla_REG,  	mAX_Float_REG, @@ -157,6 +159,9 @@ uF_SIZE	= (NOSCC_UF_SIZE::Int)  -- Same again, with profiling  sCC_UF_SIZE = (SCC_UF_SIZE::Int) +-- Same again, with gransim +gRAN_UF_SIZE = (GRAN_UF_SIZE::Int) +  -- Offsets in an update frame.  They don't change with profiling!  uF_RET         = (UF_RET::Int)  uF_SU          = (UF_SU::Int) @@ -169,6 +174,7 @@ Seq frame sizes.  \begin{code}  sEQ_FRAME_SIZE = (NOSCC_SEQ_FRAME_SIZE::Int)  sCC_SEQ_FRAME_SIZE = (SCC_SEQ_FRAME_SIZE::Int) +gRAN_SEQ_FRAME_SIZE = (GRAN_SEQ_FRAME_SIZE::Int)  \end{code}  \begin{code} diff --git a/ghc/compiler/nativeGen/StixMacro.lhs b/ghc/compiler/nativeGen/StixMacro.lhs index 747759ec8a..5ff2ea1d61 100644 --- a/ghc/compiler/nativeGen/StixMacro.lhs +++ b/ghc/compiler/nativeGen/StixMacro.lhs @@ -122,6 +122,7 @@ macroCode PUSH_UPD_FRAME args      	frame n = StInd PtrRep  	    (StIndex PtrRep stgSp (StInt (toInteger (n-uF_SIZE)))) +        -- HWL: these values are *wrong* in a GranSim setup; ToDo: fix  	a1 = StAssign PtrRep (frame uF_RET)     upd_frame_info  	a3 = StAssign PtrRep (frame uF_SU)      stgSu  	a4 = StAssign PtrRep (frame uF_UPDATEE) bhptr diff --git a/ghc/driver/ghc-asm.lprl b/ghc/driver/ghc-asm.lprl index 590b3a110f..d5adb3fa99 100644 --- a/ghc/driver/ghc-asm.lprl +++ b/ghc/driver/ghc-asm.lprl @@ -687,6 +687,9 @@ sub mangle_asm {  		    print STDERR "$Pgm: unknown prologue mangling? $TargetPlatform\n";  		} +		# HWL HACK: dont die, just print a warning +		#print stderr  "HWL: this should die! Prologue junk?: $p\n" if $p =~ /^\t[^\.]/ +		#    && $TargetPlatform !~ /^powerpc-/; #ToDo: remove test  		die "Prologue junk?: $p\n" if $p =~ /^\t[^\.]/  		    && $TargetPlatform !~ /^powerpc-/; #ToDo: remove test @@ -727,6 +730,9 @@ sub mangle_asm {  		} else {  		    print STDERR "$Pgm: unknown epilogue mangling? $TargetPlatform\n";  		} +		# HWL HACK: dont die, just print a warning +		#print stderr "HWL: this should die! Epilogue junk?: $e\n" if $e =~ /^\t[^\.]/ +		#    && $TargetPlatform !~ /^powerpc-/; #ToDo: remove test  		die "Epilogue junk?: $e\n" if $e =~ /^\t[^\.]/  		    && $TargetPlatform !~ /^powerpc-/; #ToDo: remove test diff --git a/ghc/driver/ghc.lprl b/ghc/driver/ghc.lprl index dca6d70396..bb80a14c59 100644 --- a/ghc/driver/ghc.lprl +++ b/ghc/driver/ghc.lprl @@ -1911,6 +1911,7 @@ eval 'exec perl -S \$0 \${1+"\$@"}'  # =!=!=!=!=!=!=!=!=!=!=!  # This script is automatically generated: DO NOT EDIT!!!  # Generated by Glasgow Haskell, version ${ProjectVersion} +# ngoqvam choHbogh vaj' vIHoHnISbej !!!!  #  \$pvm_executable      = '$pvm_executable';  \$pvm_executable_base = '$pvm_executable_base'; @@ -1942,7 +1943,9 @@ args: while ($a = shift(@ARGV)) {      }      if ( $a eq '-d' && $in_RTS_args ) {  	$debug = '-'; -    } elsif ( $a =~ /^-N(\d+)/ && $in_RTS_args ) { +    } elsif ( $a =~ /^-qN(\d+)/ && $in_RTS_args ) { +	$nprocessors = $1; +    } elsif ( $a =~ /^-qp(\d+)/ && $in_RTS_args ) {  	$nprocessors = $1;      } else {  	push(@nonPVM_args, $a); @@ -2817,9 +2820,24 @@ sub saveIntermediate {    local ($final,$suffix,$tmp)= @_ ;    local ($to_do); +  local ($new_suffix); +    # $final  -- root of where to park ${final}.${suffix}    # $tmp    -- temporary file where hsc put the intermediate file. +  # HWL: use -odir for .hc and .s files, too +  if ( $Specific_output_dir ne '' ) { +    $final = "${Specific_output_dir}/${final}"; +  }	 +  # HWL: use the same suffix as for $Osuffix in generating intermediate file, +  #      replacing o with hc or s, respectively.  +  if ( $Osuffix ne '' ) { +    ($new_suffix = $Osuffix) =~ s/o$/hc/ if $suffix eq "hc"; +    ($new_suffix = $Osuffix) =~ s/o$/s/ if $suffix eq "s"; +    $suffix = $new_suffix; +    print stderr "HWL says: suffix for intermediate file is $suffix; ${final}.${suffix} overall\n" if $Verbose; +  } +    # Delete the old file    $to_do = "$Rm ${final}.${suffix}"; &run_something($to_do, "Removing old .${suffix} file"); diff --git a/ghc/driver/test_mangler b/ghc/driver/test_mangler index f24f0e4bc0..96cf31ca68 100644 --- a/ghc/driver/test_mangler +++ b/ghc/driver/test_mangler @@ -1,7 +1,9 @@ -#! /usr/local/bin/perl +#! /usr/bin/perl  # a simple wrapper to test a .s-file mangler  # reads stdin, writes stdout +push(@INC,"/net/dazdak/BUILDS/gransim-4.04/i386-unknown-linux/ghc/driver"); +  $TargetPlatform = $ARGV[0]; shift; # nice error checking, Will  require("ghc-asm.prl") || die "require mangler failed!\n"; diff --git a/ghc/includes/CCall.h b/ghc/includes/CCall.h index 97ff9df649..3040c17491 100644 --- a/ghc/includes/CCall.h +++ b/ghc/includes/CCall.h @@ -1,5 +1,5 @@  /* ----------------------------------------------------------------------------- - * $Id: CCall.h,v 1.3 1999/02/05 16:02:19 simonm Exp $ + * $Id: CCall.h,v 1.4 2000/01/13 14:34:00 hwloidl Exp $   *   * (c) The GHC Team, 1998-1999   * @@ -55,6 +55,9 @@  #define STGCALL5(f,a,b,c,d,e) \    CALLER_SAVE_ALL (void) f(a,b,c,d,e); CALLER_RESTORE_ALL +#define STGCALL6(f,a,b,c,d,e,z) \ +  CALLER_SAVE_ALL (void) f(a,b,c,d,e,z); CALLER_RESTORE_ALL +  #define RET_STGCALL0(t,f) \    ({ t _r; CALLER_SAVE_ALL _r = f(); CALLER_RESTORE_ALL; _r; }) @@ -74,6 +77,9 @@  #define RET_STGCALL5(t,f,a,b,c,d,e) \    ({ t _r; CALLER_SAVE_ALL _r = f(a,b,c,d,e); CALLER_RESTORE_ALL; _r; }) +#define RET_STGCALL6(t,f,a,b,c,d,e,z) \ +  ({ t _r; CALLER_SAVE_ALL _r = f(a,b,c,d,e,z); CALLER_RESTORE_ALL; _r; }) +  /*   * A PRIM_STGCALL is used when we have arranged to save the R<n>, @@ -101,6 +107,9 @@  #define PRIM_STGCALL5(f,a,b,c,d,e) \    CALLER_SAVE_SYSTEM (void) f(a,b,c,d,e); CALLER_RESTORE_SYSTEM +#define PRIM_STGCALL6(f,a,b,c,d,e,z) \ +  CALLER_SAVE_SYSTEM (void) f(a,b,c,d,e,z); CALLER_RESTORE_SYSTEM +  #define RET_PRIM_STGCALL0(t,f) \    ({ t _r; CALLER_SAVE_SYSTEM _r = f(); CALLER_RESTORE_SYSTEM; _r; }) @@ -120,6 +129,9 @@  #define RET_PRIM_STGCALL5(t,f,a,b,c,d,e) \    ({ t _r; CALLER_SAVE_SYSTEM _r = f(a,b,c,d,e); CALLER_RESTORE_SYSTEM; _r; }) +#define RET_PRIM_STGCALL6(t,f,a,b,c,d,e,z) \ +  ({ t _r; CALLER_SAVE_SYSTEM _r = f(a,b,c,d,e,z); CALLER_RESTORE_SYSTEM; _r; }) +  /* ToDo: ccalls that might garbage collect - do we need to return to   * the scheduler to perform these?  Similarly, ccalls that might want   * to call Haskell right back, or start a new thread or something. diff --git a/ghc/includes/ClosureTypes.h b/ghc/includes/ClosureTypes.h index de58fac1dc..e1a9f2c591 100644 --- a/ghc/includes/ClosureTypes.h +++ b/ghc/includes/ClosureTypes.h @@ -1,5 +1,5 @@  /* ---------------------------------------------------------------------------- - * $Id: ClosureTypes.h,v 1.11 1999/05/11 16:47:40 keithw Exp $ + * $Id: ClosureTypes.h,v 1.12 2000/01/13 14:34:00 hwloidl Exp $   *    * (c) The GHC Team, 1998-1999   * @@ -72,10 +72,15 @@  #define WEAK		        56  #define FOREIGN		        57  #define STABLE_NAME	        58 +  #define TSO		        59  #define BLOCKED_FETCH	        60  #define FETCH_ME                61 -#define EVACUATED               62 -#define N_CLOSURE_TYPES         63 +#define FETCH_ME_BQ             62 +#define RBH                     63 + +#define EVACUATED               64 + +#define N_CLOSURE_TYPES         65  #endif CLOSURETYPES_H diff --git a/ghc/includes/Closures.h b/ghc/includes/Closures.h index 3ed2809edc..1de91efe07 100644 --- a/ghc/includes/Closures.h +++ b/ghc/includes/Closures.h @@ -1,5 +1,5 @@  /* ---------------------------------------------------------------------------- - * $Id: Closures.h,v 1.14 1999/12/01 14:34:48 simonmar Exp $ + * $Id: Closures.h,v 1.15 2000/01/13 14:34:00 hwloidl Exp $   *   * (c) The GHC Team, 1998-1999   * @@ -37,21 +37,39 @@ typedef struct {     The parallel header     -------------------------------------------------------------------------- */ -#ifdef GRAN +#ifdef PAR  typedef struct { -  W_ procs; -} StgGranHeader; +  /* StgWord ga; */  /* nope! global addresses are managed via a hash table */ +} StgParHeader;  #else /* !PAR */  typedef struct {    /* empty */ -} StgGranHeader; +} StgParHeader;  #endif /* PAR */  /* ----------------------------------------------------------------------------- +   The GranSim header +   -------------------------------------------------------------------------- */ + +#if defined(GRAN) + +typedef struct { +  StgWord procs; /* bitmask indicating on which PEs this closure resides */ +} StgGranHeader; + +#else /* !GRAN */ + +typedef struct { +  /* empty */ +} StgGranHeader; + +#endif /* GRAN */ + +/* -----------------------------------------------------------------------------     The ticky-ticky header     Comment from old Ticky.h: @@ -96,8 +114,11 @@ typedef struct {  #ifdef PROFILING  	StgProfHeader         prof;  #endif -#ifdef GRAN -	StgGranHeader         par; +#ifdef PAR +	StgParHeader          par; +#endif +#if defined(GRAN) +	StgGranHeader         gran;  #endif  #ifdef TICKY_TICKY  	StgTickyHeader        ticky; @@ -189,12 +210,6 @@ typedef struct StgCAF_ {  typedef struct {      StgHeader  header; -    struct StgTSO_ *blocking_queue; -    StgMutClosure *mut_link; -} StgBlockingQueue; - -typedef struct { -    StgHeader  header;      StgWord    words;      StgWord    payload[0];  } StgArrWords; @@ -294,12 +309,71 @@ typedef struct {    StgClosure*     value;  } StgMVar; -/* Parallel FETCH_ME closures */ -#ifdef PAR -typedef struct { +#if defined(PAR) || defined(GRAN) +/* +  StgBlockingQueueElement represents the types of closures that can be  +  found on a blocking queue: StgTSO, StgRBHSave, StgBlockedFetch. +  (StgRBHSave can only appear at the end of a blocking queue).   +  Logically, this is a union type, but defining another struct with a common +  layout is easier to handle in the code (same as for StgMutClosures). +*/ +typedef struct StgBlockingQueueElement_ { +  StgHeader                         header; +  struct StgBlockingQueueElement_  *link; +  StgMutClosure                    *mut_link; +  struct StgClosure_               *payload[0]; +} StgBlockingQueueElement; + +typedef struct StgBlockingQueue_ { +  StgHeader                         header; +  struct StgBlockingQueueElement_  *blocking_queue; +  StgMutClosure                    *mut_link; +} StgBlockingQueue; + +/* this closure is hanging at the end of a blocking queue in (par setup only) */ +typedef struct StgRBHSave_ {    StgHeader    header; -  void        *ga;		/* type globalAddr is abstract here */ +  StgPtr       payload[0]; +} StgRBHSave; + +typedef struct StgRBH_ { +  StgHeader                                header; +  struct StgBlockingQueueElement_         *blocking_queue; +  StgMutClosure                           *mut_link; +} StgRBH; + +#else +/* old sequential version of a blocking queue, which can only hold TSOs */ +typedef struct StgBlockingQueue_ { +  StgHeader                 header; +  struct StgTSO_           *blocking_queue; +  StgMutClosure            *mut_link; +} StgBlockingQueue; +#endif + +#if defined(PAR) +/* global indirections aka FETCH_ME closures */ +typedef struct StgFetchMe_ { +  StgHeader              header; +  globalAddr            *ga;		/* type globalAddr is abstract here */ +  StgMutClosure         *mut_link;  } StgFetchMe; + +/* same contents as an ordinary StgBlockingQueue */ +typedef struct StgFetchMeBlockingQueue_ { +  StgHeader                          header; +  struct StgBlockingQueueElement_   *blocking_queue; +  StgMutClosure                     *mut_link; +} StgFetchMeBlockingQueue; + +/* entry in a blocking queue, indicating a request from a TSO on another PE */ +typedef struct StgBlockedFetch_ { +  StgHeader                         header; +  struct StgBlockingQueueElement_  *link; +  StgMutClosure                    *mut_link; +  StgClosure                       *node; +  globalAddr                        ga; +} StgBlockedFetch;  #endif  #endif /* CLOSURES_H */ diff --git a/ghc/includes/Constants.h b/ghc/includes/Constants.h index bf7c83e91d..3983196937 100644 --- a/ghc/includes/Constants.h +++ b/ghc/includes/Constants.h @@ -1,5 +1,5 @@  /* ---------------------------------------------------------------------------- - * $Id: Constants.h,v 1.7 1999/10/27 09:58:36 simonmar Exp $ + * $Id: Constants.h,v 1.8 2000/01/13 14:34:00 hwloidl Exp $   *   * (c) The GHC Team, 1998-1999   * @@ -18,11 +18,13 @@     Header Sizes     NOTE: keep these in line with the real definitions in Closures.h +   HWL: checked GRAN_HDR_SIZE; ok     -------------------------------------------------------------------------- */  #define STD_HDR_SIZE   1  #define PROF_HDR_SIZE  1  #define GRAN_HDR_SIZE  1 +#define PAR_HDR_SIZE   0  #define TICKY_HDR_SIZE 0  #define ARR_HDR_SIZE   1 @@ -36,11 +38,13 @@     NOTE: keep these in line with the real definitions in InfoTables.h     NOTE: the PROF, and GRAN values are *wrong*  (ToDo) +   HWL: checked GRAN_ITBL_SIZE; ok     -------------------------------------------------------------------------- */  #define STD_ITBL_SIZE   3  #define PROF_ITBL_SIZE  1  #define GRAN_ITBL_SIZE  1 +#define PAR_ITBL_SIZE   0  #define TICKY_ITBL_SIZE 0  /* ----------------------------------------------------------------------------- @@ -98,9 +102,13 @@  /* -----------------------------------------------------------------------------     Update Frame Layout +   GranSim uses an additional word as bitmask in the update frame; actually, +   not really necessary, but uses standard closure layout that way +   NB: UF_RET etc are *wrong* in a GranSim setup; should be increased by 1  +       if compiling for GranSim (currently not used in compiler) -- HWL     -------------------------------------------------------------------------- */ -  #define NOSCC_UF_SIZE 	3 +#define GRAN_UF_SIZE 	4  #define SCC_UF_SIZE	4  #define UF_RET		0 @@ -112,9 +120,11 @@     SEQ frame size     I don't think seq frames really need sccs --SDM +   They don't need a GranSim bitmask either, but who cares anyway -- HWL     -------------------------------------------------------------------------- */  #define NOSCC_SEQ_FRAME_SIZE 2 +#define GRAN_SEQ_FRAME_SIZE  3  #define SCC_SEQ_FRAME_SIZE   3  /* ----------------------------------------------------------------------------- diff --git a/ghc/includes/GranSim.h b/ghc/includes/GranSim.h new file mode 100644 index 0000000000..88c6ad9b7d --- /dev/null +++ b/ghc/includes/GranSim.h @@ -0,0 +1,327 @@ +/* +  Time-stamp: <Tue Jan 11 2000 11:29:41 Stardate: [-30]4188.43 hwloidl> +  $Id: GranSim.h,v 1.2 2000/01/13 14:34:00 hwloidl Exp $ +   +  Headers for GranSim specific objects. +   +  Note that in GranSim we have one run-queue and blocking-queue for each +  processor. Therefore, this header file redefines variables like +  run_queue_hd to be relative to CurrentProc. The main arrays of runnable +  and blocking queues are defined in Schedule.c.  The important STG-called +  GranSim macros (e.g. for fetching nodes) are at the end of this +  file. Usually they are just wrappers to proper C functions in GranSim.c.  */ + +#ifndef GRANSIM_H +#define GRANSIM_H + +#if !defined(GRAN) + +//Dummy definitions for basic GranSim macros (see GranSim.h) +#define DO_GRAN_ALLOCATE(n)     			  /* nothing */ +#define DO_GRAN_UNALLOCATE(n)   			  /* nothing */ +#define DO_GRAN_FETCH(node)     			  /* nothing */ +#define DO_GRAN_EXEC(arith,branch,load,store,floats)      /* nothing */ +#define GRAN_FETCH_AND_RESCHEDULE(liveness_mask,reenter)  /* nothing */ +#define GRAN_RESCHEDULE(liveness_mask,reenter)	          /* nothing */ + +#endif + +#if defined(GRAN)  /* whole file */ + +extern StgTSO *CurrentTSOs[]; + +//@node Headers for GranSim specific objects, , , +//@section Headers for GranSim specific objects + +//@menu +//* Includes::			 +//* Externs and prototypes::	 +//* Run and blocking queues::	 +//* Spark queues::		 +//* Processor related stuff::	 +//* GranSim costs::		 +//* STG called GranSim functions::   +//* STG-called routines::	 +//@end menu + +//@node Includes, Externs and prototypes, Headers for GranSim specific objects, Headers for GranSim specific objects +//@subsection Includes + +/* +#include "Closures.h" +#include "TSO.h" +#include "Rts.h" +*/ + +//@node Externs and prototypes, Run and blocking queues, Includes, Headers for GranSim specific objects +//@subsection Externs and prototypes + +/* Global constants */ +extern char *gran_event_names[]; +extern char *proc_status_names[]; +extern char *event_names[]; + +/* Vars checked from within STG land */ +extern rtsBool  NeedToReSchedule, IgnoreEvents, IgnoreYields;  +;  +extern rtsTime  TimeOfNextEvent, TimeOfLastEvent, EndOfTimeSlice; + +/* costs for basic operations (copied from RTS flags) */ +extern nat gran_arith_cost, gran_branch_cost, gran_load_cost, gran_store_cost, gran_float_cost; + +extern nat SparksAvail;     /* How many sparks are available */ +extern nat SurplusThreads;  /* How many excess threads are there */ +extern nat sparksIgnored, sparksCreated; + +//@node Run and blocking queues, Spark queues, Externs and prototypes, Headers for GranSim specific objects +//@subsection Run and blocking queues + +/* declared in Schedule.c */ +extern StgTSO *run_queue_hds[], *run_queue_tls[]; +extern StgTSO *blocked_queue_hds[], *blocked_queue_tls[]; +extern StgTSO *ccalling_threadss[]; + +#define run_queue_hd         run_queue_hds[CurrentProc] +#define run_queue_tl         run_queue_tls[CurrentProc] +#define blocked_queue_hd     blocked_queue_hds[CurrentProc] +#define blocked_queue_tl     blocked_queue_tls[CurrentProc] +#define pending_sparks_hd    pending_sparks_hds[CurrentProc] +#define pending_sparks_tl    pending_sparks_tls[CurrentProc] +#define ccalling_threads     ccalling_threadss[CurrentProc] + +//@node Spark queues, Processor related stuff, Run and blocking queues, Headers for GranSim specific objects +//@subsection Spark queues + +/* +In GranSim we use a double linked list to represent spark queues. + +This is more flexible, but slower, than the array of pointers +representation used in GUM. We use the flexibility to define new fields in +the rtsSpark structure, representing e.g. granularity info (see HWL's PhD +thesis), or info about the parent of a spark. +*/ + +/* Sparks and spark queues */ +typedef struct rtsSpark_ +{ +  StgClosure    *node; +  StgInt         name, global; +  StgInt         gran_info;      /* for granularity improvement mechanisms */ +  PEs            creator;        /* PE that created this spark (unused) */ +  struct rtsSpark_  *prev, *next; +} rtsSpark; +typedef rtsSpark *rtsSparkQ; + +/* The spark queues, proper */ +/* In GranSim this is a globally visible array of spark queues */ +extern rtsSparkQ pending_sparks_hds[]; +extern rtsSparkQ pending_sparks_tls[]; + +/* Prototypes of those spark routines visible to compiler generated .hc */ +/* Routines only used inside the RTS are defined in rts/parallel GranSimRts.h */ +rtsSpark    *newSpark(StgClosure *node,  +		      StgInt name, StgInt gran_info, StgInt size_info,  +		      StgInt par_info, StgInt local); +void         add_to_spark_queue(rtsSpark *spark); + +//@node Processor related stuff, GranSim costs, Spark queues, Headers for GranSim specific objects +//@subsection Processor related stuff + +extern PEs CurrentProc; +extern rtsTime CurrentTime[];   + +/* Maximum number of PEs that can be simulated */ +#define MAX_PROC             32 /* (BITS_IN(StgWord))  */ // ToDo: fix this!! +//#if MAX_PROC==16  +//#else  +//#error MAX_PROC should be 32 on this architecture  +//#endif  + +#define CurrentTSO           CurrentTSOs[CurrentProc] + +/* Processor numbers to bitmasks and vice-versa */ +#define MainProc	     0           /* Id of main processor */ +#define NO_PRI               0           /* dummy priority */ +#define MAX_PRI              10000       /* max possible priority */ +#define MAIN_PRI             MAX_PRI     /* priority of main thread */  + +/* GrAnSim uses IdleProcs as bitmask to indicate which procs are idle */ +#define PE_NUMBER(n)          (1l << (long)n) +#define ThisPE		      PE_NUMBER(CurrentProc) +#define MainPE		      PE_NUMBER(MainProc) +#define Everywhere	      (~0l) +#define Nowhere	              (0l) +#define Now                   CurrentTime[CurrentProc] + +#define IS_LOCAL_TO(ga,proc)  ((1l << (PEs) proc) & ga) + +#define GRAN_TIME_SLICE       1000        /* max time between 2 ReSchedules */ + +//@node GranSim costs, STG called GranSim functions, Processor related stuff, Headers for GranSim specific objects +//@subsection GranSim costs + +/* Default constants for communication (see RtsFlags on how to change them) */ + +#define LATENCY		           1000	/* Latency for single packet */ +#define ADDITIONAL_LATENCY	    100	/* Latency for additional packets */ +#define BASICBLOCKTIME	    	     10 +#define FETCHTIME	  	(LATENCY*2+MSGUNPACKTIME) +#define LOCALUNBLOCKTIME  	     10 +#define GLOBALUNBLOCKTIME 	(LATENCY+MSGUNPACKTIME) + +#define	MSGPACKTIME		     0  /* Cost of creating a packet */ +#define	MSGUNPACKTIME		     0  /* Cost of receiving a packet */ +#define MSGTIDYTIME                  0  /* Cost of cleaning up after send */ + +/* How much to increase GrAnSims internal packet size if an overflow  +   occurs. +   NB: This is a GrAnSim internal variable and is independent of the +   simulated packet buffer size. +*/ + +#define GRANSIM_DEFAULT_PACK_BUFFER_SIZE     400 +#define REALLOC_SZ                           200 + +/* extern W_ gran_mpacktime, gran_mtidytime, gran_munpacktime; */ + +/* Thread cost model */ +#define THREADCREATETIME	   (25+THREADSCHEDULETIME) +#define THREADQUEUETIME		    12  /* Cost of adding a thread to the running/runnable queue */ +#define THREADDESCHEDULETIME	    75  /* Cost of descheduling a thread */ +#define THREADSCHEDULETIME	    75  /* Cost of scheduling a thread */ +#define THREADCONTEXTSWITCHTIME	    (THREADDESCHEDULETIME+THREADSCHEDULETIME) + +/* Instruction Cost model (SPARC, including cache misses) */ +#define ARITH_COST	     	   1 +#define BRANCH_COST	     	   2 +#define LOAD_COST	  	   4 +#define STORE_COST	  	   4 +#define FLOAT_COST		   1 /* ? */ + +#define HEAPALLOC_COST             11 + +#define PRI_SPARK_OVERHEAD    5 +#define PRI_SCHED_OVERHEAD    5 + +//@node STG called GranSim functions, STG-called routines, GranSim costs, Headers for GranSim specific objects +//@subsection STG called GranSim functions + +/* STG called GranSim functions */ +void GranSimAllocate(StgInt n); +void GranSimUnallocate(StgInt n); +void GranSimExec(StgWord ariths, StgWord branches, StgWord loads, StgWord stores, StgWord floats); +StgInt GranSimFetch(StgClosure *node); +void GranSimSpark(StgInt local, StgClosure *node); +void GranSimSparkAt(rtsSpark *spark, StgClosure *where,StgInt identifier); +void GranSimSparkAtAbs(rtsSpark *spark, PEs proc, StgInt identifier); +void GranSimBlock(StgTSO *tso, PEs proc, StgClosure *node); + + +//@node STG-called routines,  , STG called GranSim functions, Headers for GranSim specific objects +//@subsection STG-called routines + +/* Wrapped version of calls to GranSim-specific STG routines */ + +/* +#define DO_PERFORM_RESCHEDULE(liveness, always_reenter_node) PerformReschedule_wrapper(liveness, always_reenter_node) +*/ +#define DO_GRAN_ALLOCATE(n)     STGCALL1(GranSimAllocate, n) +#define DO_GRAN_UNALLOCATE(n)   STGCALL1(GranSimUnallocate, n) +#define DO_GRAN_FETCH(node)     STGCALL1(GranSimFetch, node) +#define DO_GRAN_EXEC(arith,branch,load,store,floats) GranSimExec(arith,branch,load,store,floats) + +/*  +   ToDo: Clean up this mess of GRAN macros!!! -- HWL +*/ +// DO_GRAN_FETCH((StgClosure*)R1.p); +#define GRAN_FETCH()		/* nothing */ + +#define GRAN_FETCH_AND_RESCHEDULE(liveness,reenter)	\ +          DO_GRAN_FETCH((StgClosure*)R1.p); 			        \ +          DO_GRAN_YIELD(liveness,ENTRY_CODE((D_)(*R1.p)));  +// RESTORE_EVERYTHING is done implicitly before entering threaded world agian + +/* +  This is the only macro currently enabled; +  It should check whether it is time for the current thread to yield +  (e.g. if there is a more recent event in the queue) and it should check +  whether node is local, via a call to GranSimFetch. +  ToDo: split this in 2 routines: +         - GRAN_YIELD (as it is below) +	 - GRAN_FETCH (the rest of this macro) +        emit only these 2 macros based on node's liveness +	  node alive: emit both macros +	  node not alive: do only a GRAN_YIELD +	   +        replace gran_yield_? with gran_block_? (they really block the current +	thread) +*/ +#define GRAN_RESCHEDULE(liveness,ptrs)  \ +          if (RET_STGCALL1(StgInt, GranSimFetch, (StgClosure*)R1.p)) {\ +            EXTFUN_RTS(gran_block_##ptrs); \ +            JMP_(gran_block_##ptrs);       \ +          } else {                         \ +	    if (TimeOfLastEvent < CurrentTime[CurrentProc] && \ +                HEAP_ALLOCED((StgClosure *)R1.p) && \ +                LOOKS_LIKE_GHC_INFO(get_itbl((StgClosure *)R1.p))) { \ +                                  EXTFUN_RTS(gran_yield_##ptrs); \ +                                  JMP_(gran_yield_##ptrs); \ +                } \ +            /* GRAN_YIELD(ptrs)  */             \ +	  } + + +//                                                   YIELD(liveness,reenter) + +// GRAN_YIELD(liveness_mask);  + +// GRAN_FETCH_AND_RESCHEDULE(liveness_mask,reenter) + +#define THREAD_CONTEXT_SWITCH(liveness_mask,reenter)	\ +        do { \ +	if (context_switch /* OR_INTERVAL_EXPIRED */) {	\ +          GRAN_RESCHEDULE(liveness_mask,reenter); \ +        } }while(0) + +#define GRAN_EXEC(arith,branch,load,store,floats)       \ +        { \ +          W_ cost = gran_arith_cost*arith +   \ +                    gran_branch_cost*branch + \ +                    gran_load_cost*load +   \ +                    gran_store_cost*store +   \ +                    gran_float_cost*floats;   \ +          CurrentTSO->gran.exectime += cost;                      \ +          CurrentTime[CurrentProc] += cost;                      \ +        } + +/* In GranSim we first check whether there is an event to handle; only if +   this is the case (or the time slice is over in case of fair scheduling) +   we do a yield, which is very similar to that in the concurrent world  +   ToDo: check whether gran_yield_? can be merged with other yielding codes +*/ + +#define DO_GRAN_YIELD(ptrs)	if (!IgnoreYields && \ +                                    TimeOfLastEvent < CurrentTime[CurrentProc] && \ +				    HEAP_ALLOCED((StgClosure *)R1.p) && \ +                                    LOOKS_LIKE_GHC_INFO(get_itbl((StgClosure *)R1.p))) { \ +                                  EXTFUN_RTS(gran_yield_##ptrs); \ +                                  JMP_(gran_yield_##ptrs); \ +                                } + +#define GRAN_YIELD(ptrs)                                   \ +        {                                                   \ +          extern  nat context_switch;                          \ +          if ( (CurrentTime[CurrentProc]>=EndOfTimeSlice) ||   \ +               ((CurrentTime[CurrentProc]>=TimeOfNextEvent) && \ +	        (TimeOfNextEvent!=0) && !IgnoreEvents )) {     \ +	    /* context_switch = 1; */                          \ +            DO_GRAN_YIELD(ptrs);   \ +	  }                                                    \ +	} + +#define ADD_TO_SPARK_QUEUE(spark)	      \ +   STGCALL1(add_to_spark_queue,spark) \ + +#endif /* GRAN */ + +#endif /* GRANSIM_H */ diff --git a/ghc/includes/InfoMacros.h b/ghc/includes/InfoMacros.h index 91900d44bd..a85529bad7 100644 --- a/ghc/includes/InfoMacros.h +++ b/ghc/includes/InfoMacros.h @@ -1,5 +1,5 @@  /* ---------------------------------------------------------------------------- - * $Id: InfoMacros.h,v 1.8 1999/11/30 11:44:32 simonmar Exp $ + * $Id: InfoMacros.h,v 1.9 2000/01/13 14:34:00 hwloidl Exp $   *    * (c) The GHC Team, 1998-1999   * @@ -31,8 +31,35 @@  #define INIT_VECTOR  #endif +/* +  On the GRAN/PAR specific parts of the InfoTables: + +  In both GranSim and GUM we use revertible black holes (RBH) when putting +  an updatable closure into a packet for communication. The entry code for +  an RBH performs standard blocking (as with any kind of BH). The info +  table for the RBH resides just before the one for the std info +  table. (NB: there is one RBH ITBL for every ITBL of an updatable +  closure.) The @rbh_infoptr@ field in the ITBL points from the std ITBL to +  the RBH ITBL and vice versa. This is used by the RBH_INFOPTR and +  REVERT_INFOPTR macros to turn an updatable node into an RBH and vice +  versa. Note, that the only case where we have to revert the RBH in its +  original form is when a packet is sent back because of garbage collection +  on another PE. In the RTS for GdH we will use this reversion mechanism in  +  order to deal with faults in the system.  +  ToDo: Check that RBHs are needed for all the info tables below. From a quick +  check of the macros generated in the libs it seems that all of them are used +  for generating THUNKs. +  Possible optimisation: Note that any RBH ITBL is a fixed distance away from  +  the actual ITBL. We could inline this offset as a constant into the RTS and +  avoid the rbh_infoptr fields altogether (Jim did that in the old RTS). +  -- HWL +*/ + +  /* function/thunk info tables --------------------------------------------- */ +#if defined(GRAN) || defined(PAR) +  #define \  INFO_TABLE_SRT(info,				/* info-table label */	\  	       entry,				/* entry code label */	\ @@ -41,17 +68,73 @@ INFO_TABLE_SRT(info,				/* info-table label */	\  	       type,				/* closure type */	\  	       info_class, entry_class,		/* C storage classes */	\  	       prof_descr, prof_type)		/* profiling info */	\ +        entry_class(RBH_##entry);                                      \          entry_class(entry);                                             \ +	info_class INFO_TBL_CONST StgInfoTable info; \ +	info_class INFO_TBL_CONST StgInfoTable RBH_##info = {		\ +		layout : { payload : {ptrs,nptrs} },			\ +		SRT_INFO(RBH,srt_,srt_off_,srt_len_),                  \ +                INCLUDE_RBH_INFO(info),			                \ +                INIT_ENTRY(RBH_##entry),                           \ +                INIT_VECTOR                                             \ +	} ; \ +        StgFunPtr  RBH_##entry (void) { JMP_(RBH_entry); } ;            \  	info_class INFO_TBL_CONST StgInfoTable info = {			\  		layout : { payload : {ptrs,nptrs} },			\  		SRT_INFO(type,srt_,srt_off_,srt_len_),			\ +                INCLUDE_RBH_INFO(RBH_##info),			\                  INIT_ENTRY(entry),                                      \                  INIT_VECTOR                                             \  	} +#else + +#define \ +INFO_TABLE_SRT(info,				/* info-table label */	\ +	       entry,				/* entry code label */	\ +	       ptrs, nptrs,			/* closure layout info */\ +	       srt_, srt_off_, srt_len_,	/* SRT info */		\ +	       type,				/* closure type */	\ +	       info_class, entry_class,		/* C storage classes */	\ +	       prof_descr, prof_type)		/* profiling info */	\ +        entry_class(entry);                                             \ +	info_class INFO_TBL_CONST StgInfoTable info = {			\ +		layout : { payload : {ptrs,nptrs} },			\ +		SRT_INFO(type,srt_,srt_off_,srt_len_),			\ +                INIT_ENTRY(entry),                                      \ +                INIT_VECTOR                                             \ +	} + +#endif  /* direct-return address info tables  --------------------------------------*/ +#if defined(GRAN) || defined(PAR) + +#define									\ +INFO_TABLE_SRT_BITMAP(info, entry, bitmap_, srt_, srt_off_, srt_len_,	\ +		      type, info_class, entry_class,			\ +		      prof_descr, prof_type)				\ +        entry_class(RBH_##entry);                                      \ +        entry_class(entry);                                             \ +	info_class INFO_TBL_CONST StgInfoTable info; \ +	info_class INFO_TBL_CONST StgInfoTable RBH_##info = {		\ +		layout : { bitmap : (StgWord32)bitmap_ },		\ +		SRT_INFO(RBH,srt_,srt_off_,srt_len_),			\ +                INCLUDE_RBH_INFO(info),			                \ +                INIT_ENTRY(RBH_##entry),				\ +                INIT_VECTOR						\ +	};                                                              \ +        StgFunPtr  RBH_##entry (void) { JMP_(RBH_entry); } ;            \ +	info_class INFO_TBL_CONST StgInfoTable info = {			\ +		layout : { bitmap : (StgWord32)bitmap_ },		\ +		SRT_INFO(type,srt_,srt_off_,srt_len_),			\ +                INCLUDE_RBH_INFO(RBH_##info),		                \ +                INIT_ENTRY(entry),					\ +                INIT_VECTOR						\ +	} +#else +  #define									\  INFO_TABLE_SRT_BITMAP(info, entry, bitmap_, srt_, srt_off_, srt_len_,	\  		      type, info_class, entry_class,			\ @@ -63,9 +146,36 @@ INFO_TABLE_SRT_BITMAP(info, entry, bitmap_, srt_, srt_off_, srt_len_,	\                  INIT_ENTRY(entry),					\                  INIT_VECTOR						\  	} +#endif  /* info-table without an SRT -----------------------------------------------*/ +#if defined(GRAN) || defined(PAR) + +#define							\ +INFO_TABLE(info, entry, ptrs, nptrs, type, info_class,	\ +	   entry_class, prof_descr, prof_type)		\ +        entry_class(RBH_##entry);                                      \ +        entry_class(entry);                                             \ +	info_class INFO_TBL_CONST StgInfoTable info; \ +	info_class INFO_TBL_CONST StgInfoTable RBH_##info = {	\ +		layout : { payload : {ptrs,nptrs} },	\ +		STD_INFO(RBH),				\ +                INCLUDE_RBH_INFO(info),	                \ +                INIT_ENTRY(RBH_##entry),		\ +                INIT_VECTOR				\ +	};                                              \ +        StgFunPtr  RBH_##entry (void) { JMP_(RBH_entry); } ;            \ +	info_class INFO_TBL_CONST StgInfoTable info = {	\ +		layout : { payload : {ptrs,nptrs} },	\ +		STD_INFO(type),				\ +                INCLUDE_RBH_INFO(RBH_##info),		                \ +                INIT_ENTRY(entry),			\ +                INIT_VECTOR				\ +	} + +#else +  #define							\  INFO_TABLE(info, entry, ptrs, nptrs, type, info_class,	\  	   entry_class, prof_descr, prof_type)		\ @@ -77,8 +187,36 @@ INFO_TABLE(info, entry, ptrs, nptrs, type, info_class,	\                  INIT_VECTOR				\  	} +#endif +  /* special selector-thunk info table ---------------------------------------*/ +#if defined(GRAN) || defined(PAR) + +#define							\ +INFO_TABLE_SELECTOR(info, entry, offset, info_class,	\ +		    entry_class, prof_descr, prof_type)	\ +        entry_class(RBH_##entry);                                      \ +        entry_class(entry);                                             \ +	info_class INFO_TBL_CONST StgInfoTable info; \ +	info_class INFO_TBL_CONST StgInfoTable RBH_##info = {	\ +		layout : { selector_offset : offset },	\ +		STD_INFO(RBH),		                \ +                INCLUDE_RBH_INFO(info),	                \ +                INIT_ENTRY(RBH_##entry),		\ +                INIT_VECTOR				\ +	};                                              \ +        StgFunPtr  RBH_##entry (void) { JMP_(RBH_entry); } ;            \ +	info_class INFO_TBL_CONST StgInfoTable info = {	\ +		layout : { selector_offset : offset },	\ +		STD_INFO(THUNK_SELECTOR),		\ +                INCLUDE_RBH_INFO(RBH_##info),           \ +                INIT_ENTRY(entry),			\ +                INIT_VECTOR				\ +	} + +#else +  #define							\  INFO_TABLE_SELECTOR(info, entry, offset, info_class,	\  		    entry_class, prof_descr, prof_type)	\ @@ -90,6 +228,8 @@ INFO_TABLE_SELECTOR(info, entry, offset, info_class,	\                  INIT_VECTOR				\  	} +#endif +  /* constructor info table --------------------------------------------------*/  #define \ diff --git a/ghc/includes/InfoTables.h b/ghc/includes/InfoTables.h index 0f8a659fe3..b3db5e5ca5 100644 --- a/ghc/includes/InfoTables.h +++ b/ghc/includes/InfoTables.h @@ -1,5 +1,5 @@  /* ---------------------------------------------------------------------------- - * $Id: InfoTables.h,v 1.17 1999/07/16 09:41:12 panne Exp $ + * $Id: InfoTables.h,v 1.18 2000/01/13 14:34:00 hwloidl Exp $   *    * (c) The GHC Team, 1998-1999   * @@ -36,12 +36,15 @@ typedef struct {     Parallelism info     -------------------------------------------------------------------------- */ -#ifdef PAR +#if 0 && (defined(PAR) || defined(GRAN)) -#define PAR_INFO_WORDS 0 +// CURRENTLY UNUSED +// ToDo: use this in StgInfoTable (mutually recursive) -- HWL + +#define PAR_INFO_WORDS 1  typedef struct { -       /* empty */ +  StgInfoTable *rbh_infoptr;     /* infoptr to the RBH  */  } StgParInfo;  #else /* !PAR */ @@ -54,6 +57,54 @@ typedef struct {  #endif /* PAR */ +/* +   Copied from ghc-0.29; ToDo: check this code -- HWL + +   In the parallel system, all updatable closures have corresponding +   revertible black holes.  When we are assembly-mangling, we guarantee +   that the revertible black hole code precedes the normal entry code, so +   that the RBH info table resides at a fixed offset from the normal info +   table.  Otherwise, we add the RBH info table pointer to the end of the +   normal info table and vice versa. + +   Currently has to use a !RBH_MAGIC_OFFSET setting. +   Still todo: init of par.infoptr field in all infotables!! +*/ + +#if defined(PAR) || defined(GRAN) +# define RBH_INFO_OFFSET	    (GEN_INFO_OFFSET+GEN_INFO_WORDS) + +# ifdef RBH_MAGIC_OFFSET + +#  error magic offset not yet implemented + +#  define RBH_INFO_WORDS    0 +#  define INCLUDE_RBH_INFO(infoptr) + +#  define RBH_INFOPTR(infoptr)	    (((P_)infoptr) - RBH_MAGIC_OFFSET) +#  define REVERT_INFOPTR(infoptr)   (((P_)infoptr) + RBH_MAGIC_OFFSET) + +# else + +#  define RBH_INFO_WORDS    1 +#  define INCLUDE_RBH_INFO(info)    rbh_infoptr : &(info) + +#  define RBH_INFOPTR(infoptr)	    (((StgInfoTable *)(infoptr))->rbh_infoptr) +#  define REVERT_INFOPTR(infoptr)   (((StgInfoTable *)(infoptr))->rbh_infoptr) + +# endif + +/* see ParallelRts.h */ +// EXTFUN(RBH_entry); +//StgClosure *convertToRBH(StgClosure *closure); +//#if defined(GRAN) +//void convertFromRBH(StgClosure *closure); +//#elif defined(PAR) +//void convertToFetchMe(StgPtr closure, globalAddr *ga); +//#endif + +#endif +  /* -----------------------------------------------------------------------------     Debugging info     -------------------------------------------------------------------------- */ @@ -98,11 +149,27 @@ extern StgWord16 closure_flags[];  #define closureFlags(c)         (closure_flags[get_itbl(c)->type]) -#define closure_STATIC(c)       (  closureFlags(c) & _STA) +#define closure_HNF(c)          (  closureFlags(c) & _HNF) +#define closure_BITMAP(c)       (  closureFlags(c) & _BTM) +#define closure_NON_SPARK(c)    ( (closureFlags(c) & _NS))  #define closure_SHOULD_SPARK(c) (!(closureFlags(c) & _NS)) +#define closure_STATIC(c)       (  closureFlags(c) & _STA) +#define closure_THUNK(c)        (  closureFlags(c) & _THU)  #define closure_MUTABLE(c)      (  closureFlags(c) & _MUT)  #define closure_UNPOINTED(c)    (  closureFlags(c) & _UPT) +#define closure_SRT(c)          (  closureFlags(c) & _SRT) + +/* same as above but for info-ptr rather than closure */ +#define ipFlags(ip)             (closure_flags[ip->type]) +#define ip_HNF(ip)               (  ipFlags(ip) & _HNF) +#define ip_BITMAP(ip)       	 (  ipFlags(ip) & _BTM) +#define ip_SHOULD_SPARK(ip) 	 (!(ipFlags(ip) & _NS)) +#define ip_STATIC(ip)       	 (  ipFlags(ip) & _STA) +#define ip_THUNK(ip)        	 (  ipFlags(ip) & _THU) +#define ip_MUTABLE(ip)      	 (  ipFlags(ip) & _MUT) +#define ip_UNPOINTED(ip)    	 (  ipFlags(ip) & _UPT) +#define ip_SRT(ip)          	 (  ipFlags(ip) & _SRT)  /* -----------------------------------------------------------------------------     Info Tables @@ -153,8 +220,9 @@ typedef StgClosure* StgSRT[];  typedef struct _StgInfoTable {      StgSRT         *srt;	/* pointer to the SRT table */ -#ifdef PAR -    StgParInfo	    par; +#if defined(PAR) || defined(GRAN) +  // StgParInfo	    par; +    struct _StgInfoTable    *rbh_infoptr;  #endif  #ifdef PROFILING    /* StgProfInfo     prof; */ diff --git a/ghc/includes/Parallel.h b/ghc/includes/Parallel.h new file mode 100644 index 0000000000..e9a6ef1cc6 --- /dev/null +++ b/ghc/includes/Parallel.h @@ -0,0 +1,342 @@ +/* +  Time-stamp: <Fri Dec 10 1999 17:15:01 Stardate: [-30]4028.38 software> +  +  Definitions for parallel machines. + +  This section contains definitions applicable only to programs compiled +  to run on a parallel machine, i.e. on GUM. Some of these definitions +  are also used when simulating parallel execution, i.e. on GranSim. +*/ + +/* +  ToDo: Check the PAR specfic part of this file  +        Move stuff into Closures.h and ClosureMacros.h  +	Clean-up GRAN specific code +  -- HWL +*/ + +#ifndef PARALLEL_H +#define PARALLEL_H + +#if defined(PAR) || defined(GRAN)        /* whole file */ + +//@node Parallel definitions, End of File +//@section Parallel definitions + +//@menu +//* Basic definitions::		 +//* GUM::			 +//* GranSim::			 +//@end menu + +//@node Basic definitions, GUM, Parallel definitions, Parallel definitions +//@subsection Basic definitions + +/* SET_PAR_HDR and SET_STATIC_PAR_HDR now live in ClosureMacros.h */ + +/* Needed for dumping routines */ +#if defined(PAR) +# define NODE_STR_LEN              20 +# define TIME_STR_LEN              120 +# define TIME                      rtsTime +# define CURRENT_TIME              msTime() +# define TIME_ON_PROC(p)           msTime() +# define CURRENT_PROC              thisPE +# define BINARY_STATS              RtsFlags.ParFlags.ParStats.Binary +#elif defined(GRAN) +# define NODE_STR_LEN              20 +# define TIME_STR_LEN              120 +# define TIME                      rtsTime +# define CURRENT_TIME              CurrentTime[CurrentProc] +# define TIME_ON_PROC(p)           CurrentTime[p] +# define CURRENT_PROC              CurrentProc +# define BINARY_STATS              RtsFlags.GranFlags.GranSimStats.Binary +#endif + +#if defined(PAR) +#  define MAX_PES	256		/* Maximum number of processors */ +	/* MAX_PES is enforced by SysMan, which does not +	   allow more than this many "processors". +	   This is important because PackGA [GlobAddr.lc] +	   **assumes** that a PE# can fit in 8+ bits. +	*/ + +# define SPARK_POOLS 	2   /* no. of spark pools */ +# define REQUIRED_POOL 	0   /* idx of pool of mandatory sparks (concurrency) */ +# define ADVISORY_POOL 	1   /* idx of pool of advisory sparks (parallelism) */ +#endif + +//@menu +//* GUM::			 +//* GranSim::			 +//@end menu +//*/ + +//@node GUM, GranSim, Basic definitions, Parallel definitions +//@subsection GUM + +#if defined(PAR)  +/* +Symbolic constants for the packing code. + +This constant defines how many words of data we can pack into a single +packet in the parallel (GUM) system. +*/ + +//@menu +//* Types::			 +//* Externs::			 +//* Prototypes::		 +//* Macros::			 +//@end menu +//*/ + +//@node Types, Externs, GUM, GUM +//@subsubsection Types + +/* Sparks and spark queues */ +typedef StgClosure  *rtsSpark; +typedef rtsSpark    *rtsSparkQ; + +typedef struct rtsPackBuffer_ { +  StgInt /* nat */           id;  +  StgInt /* nat */           size; +  StgInt /* nat */           unpacked_size; +  StgTSO       *tso; +  StgWord     *buffer[0];   +} rtsPackBuffer; + +#define PACK_BUFFER_HDR_SIZE 4 + +//@node Externs, Prototypes, Types, GUM +//@subsubsection Externs + +// extern rtsBool do_sp_profile; + +extern globalAddr theGlobalFromGA, theGlobalToGA; +extern StgBlockedFetch *PendingFetches; +extern GlobalTaskId    *allPEs; + +extern rtsBool      IAmMainThread, GlobalStopPending; +//extern rtsBool      fishing; +extern rtsTime      last_fish_arrived_at; +extern nat          outstandingFishes; +extern GlobalTaskId SysManTask; +extern int          seed;     /* pseudo-random-number generator seed: */ +                              /* Initialised in ParInit */ +extern StgInt       threadId; /* Number of Threads that have existed on a PE */ +extern GlobalTaskId mytid; + +extern GlobalTaskId *allPEs; +extern nat nPEs; +extern nat sparksIgnored, sparksCreated, threadsIgnored, threadsCreated; +extern nat advisory_thread_count; + +extern rtsBool InGlobalGC;  /* Are we in the midst of performing global GC */ + +static ullong startTime;    /* start of comp; in RtsStartup.c */ + +/* the spark pools proper */ +extern rtsSpark *pending_sparks_hd[];  /* ptr to start of a spark pool */  +extern rtsSpark *pending_sparks_tl[];  /* ptr to end of a spark pool */  +extern rtsSpark *pending_sparks_lim[];  +extern rtsSpark *pending_sparks_base[];  +extern nat spark_limit[]; + +extern rtsPackBuffer *PackBuffer;      /* size: can be set via option */ +extern rtsPackBuffer *buffer;             /* HWL_ */ +extern rtsPackBuffer *freeBuffer;           /* HWL_ */ +extern rtsPackBuffer *packBuffer;           /* HWL_ */ +extern rtsPackBuffer *gumPackBuffer; + +extern int thisPE; + +/* From Global.c */ +extern GALA *freeGALAList; +extern GALA *freeIndirections; +extern GALA *liveIndirections; +extern GALA *liveRemoteGAs; + +/* +extern HashTable *taskIDtoPEtable; +extern HashTable *LAtoGALAtable; +extern HashTable *pGAtoGALAtable; +*/ + +//@node Prototypes, Macros, Externs, GUM +//@subsubsection Prototypes + +/* From ParInit.c */ +void          initParallelSystem(void); +void          SynchroniseSystem(void); +void          par_exit(StgInt n); + +PEs           taskIDtoPE (GlobalTaskId gtid); +void          registerTask (GlobalTaskId gtid); +globalAddr   *LAGAlookup (StgClosure *addr); +StgClosure   *GALAlookup (globalAddr *ga); +//static GALA  *allocIndirection (StgPtr addr); +globalAddr   *makeGlobal (StgClosure *addr, rtsBool preferred); +globalAddr   *setRemoteGA (StgClosure *addr, globalAddr *ga, rtsBool preferred); +void          splitWeight (globalAddr *to, globalAddr *from); +globalAddr   *addWeight (globalAddr *ga); +void          initGAtables (void); +void          RebuildLAGAtable (void); +StgWord       PackGA (StgWord pe, int slot); + +//@node Macros,  , Prototypes, GUM +//@subsubsection Macros + +/* delay (in us) between dying fish returning and sending out a new fish */ +#define FISH_DELAY                   1000 +/* max no. of outstanding spark steals */ +#define MAX_FISHES                   1   + +// ToDo: check which of these is actually needed! + +#    define PACK_HEAP_REQUIRED  ((RtsFlags.ParFlags.packBufferSize - PACK_HDR_SIZE) / (PACK_GA_SIZE + _FHS) * (MIN_UPD_SIZE + 2)) + +#  define MAX_GAS 	(RtsFlags.ParFlags.packBufferSize / PACK_GA_SIZE) + + +#  define PACK_GA_SIZE	3	/* Size of a packed GA in words */ +			        /* Size of a packed fetch-me in words */ +#  define PACK_FETCHME_SIZE (PACK_GA_SIZE + FIXED_HS) + +#  define PACK_HDR_SIZE	1	/* Words of header in a packet */ + +#  define PACK_PLC_SIZE	2	/* Size of a packed PLC in words */ + +/* +  Definitions relating to the entire parallel-only fixed-header field. + +  On GUM, the global addresses for each local closure are stored in a +  separate hash table, rather then with the closure in the heap.  We call +  @getGA@ to look up the global address associated with a local closure (0 +  is returned for local closures that have no global address), and @setGA@ +  to store a new global address for a local closure which did not +  previously have one.  */ + +#  define GA_HDR_SIZE			0 +   +#  define GA(closure)		        getGA(closure) +   +#  define SET_GA(closure, ga)             setGA(closure,ga) +#  define SET_STATIC_GA(closure) +#  define SET_GRAN_HDR(closure,pe) +#  define SET_STATIC_PROCS(closure) +   +#  define MAX_GA_WEIGHT			0	/* Treat as 2^n */ +   +/* At the moment, there is no activity profiling for GUM.  This may change. */ +#  define SET_TASK_ACTIVITY(act)        /* nothing */ + +#endif /* PAR */ + +//@node GranSim,  , GUM, Parallel definitions +//@subsection GranSim + +#if defined(GRAN) +/* ToDo: Check which of the PAR routines are needed in GranSim -- HWL */ + +//@menu +//* Types::			 +//* Prototypes::		 +//* Macros::			 +//@end menu +//*/ + +//@node Types, Prototypes, GranSim, GranSim +//@subsubsection Types + +typedef struct rtsPackBuffer_ { +  StgInt /* nat */           id; +  StgInt /* nat */           size; +  StgInt /* nat */           unpacked_size; +  StgTSO       *tso; +  StgClosure  **buffer;   +} rtsPackBuffer; + +//@node Prototypes, Macros, Types, GranSim +//@subsubsection Prototypes + + +/* main packing functions */ +/* +rtsPackBuffer *PackNearbyGraph(StgClosure* closure, StgTSO* tso, nat *packbuffersize); +rtsPackBuffer *PackOneNode(StgClosure* closure, StgTSO* tso, nat *packbuffersize); +void PrintPacket(rtsPackBuffer *buffer); +StgClosure *UnpackGraph(rtsPackBuffer* buffer); +*/ +/* important auxiliary functions */ + +/*  +OLD CODE -- HWL +void  InitPackBuffer(void); +P_    AllocateHeap (W_ size); +P_    PackNearbyGraph (P_ closure, P_ tso, W_ *packbuffersize); +P_    PackOneNode (P_ closure, P_ tso, W_ *packbuffersize); +P_    UnpackGraph (P_ buffer); + +void    InitClosureQueue (void); +P_      DeQueueClosure(void); +void    QueueClosure (P_ closure); +// rtsBool QueueEmpty(); +void    PrintPacket (P_ buffer); +*/ + +// StgInfoTable *get_closure_info(StgClosure* node, unsigned int /* nat */ *size, unsigned int /* nat */ *ptrs, unsigned int /* nat */ *nonptrs, unsigned int /* nat */ *vhs, char *info_hdr_ty); +// int /* rtsBool */ IS_BLACK_HOLE(StgClosure* node)          ; + +//@node Macros,  , Prototypes, GranSim +//@subsubsection Macros + +/* max no. of outstanding spark steals */ +#define MAX_FISHES                   1   + +/* These are needed in the packing code to get the size of the packet +   right. The closures itself are never built in GrAnSim. */ +#  define FETCHME_VHS				IND_VHS +#  define FETCHME_HS				IND_HS +   +#  define FETCHME_GA_LOCN                       FETCHME_HS +   +#  define FETCHME_CLOSURE_SIZE(closure)		IND_CLOSURE_SIZE(closure) +#  define FETCHME_CLOSURE_NoPTRS(closure)		0L +#  define FETCHME_CLOSURE_NoNONPTRS(closure)	(IND_CLOSURE_SIZE(closure)-IND_VHS) +   +#  define MAX_GAS 	(RtsFlags.GranFlags.packBufferSize / PACK_GA_SIZE) +#  define PACK_GA_SIZE	3	/* Size of a packed GA in words */ +			        /* Size of a packed fetch-me in words */ +#  define PACK_FETCHME_SIZE (PACK_GA_SIZE + FIXED_HS) +#  define PACK_HDR_SIZE	4	/* Words of header in a packet */ + +#    define PACK_HEAP_REQUIRED  \ +      (RtsFlags.GranFlags.packBufferSize * sizeofW(StgClosure*) + \ +      2 * sizeofW(StgInt) + sizeofW(StgTSO*)) + +#    define PACK_FLAG_LOCN           0   +#    define PACK_TSO_LOCN            1 +#    define PACK_UNPACKED_SIZE_LOCN  2 +#    define PACK_SIZE_LOCN           3 +#    define MAGIC_PACK_FLAG          0xfabc + +#  define GA_HDR_SIZE			1 + +#  define PROCS_HDR_POSN		PAR_HDR_POSN +#  define PROCS_HDR_SIZE		1 + +/* Accessing components of the field */ +#  define PROCS(closure)	        ((closure)->header.gran.procs) +/* SET_PROCS is now SET_GRAN_HEADER in ClosureMacros.h. */ + +#endif   /* GRAN */ + +//@node End of File,  , Parallel definitions +//@section End of File + +#endif /* defined(PAR) || defined(GRAN)         whole file */ + +#endif /* Parallel_H */ + + diff --git a/ghc/includes/PrimOps.h b/ghc/includes/PrimOps.h index d11de24ee4..0d97628338 100644 --- a/ghc/includes/PrimOps.h +++ b/ghc/includes/PrimOps.h @@ -1,5 +1,5 @@  /* ----------------------------------------------------------------------------- - * $Id: PrimOps.h,v 1.43 2000/01/12 15:15:17 simonmar Exp $ + * $Id: PrimOps.h,v 1.44 2000/01/13 14:34:00 hwloidl Exp $   *   * (c) The GHC Team, 1998-1999   * @@ -734,6 +734,118 @@ EF_(unblockAsyncExceptionszh_fast);  extern int cmp_thread(const StgTSO *tso1, const StgTSO *tso2); +/* ------------------------------------------------------------------------ +   Parallel PrimOps + +   A par in the Haskell code is ultimately translated to a parzh macro +   (with a case wrapped around it to guarantee that the macro is actually  +    executed; see compiler/prelude/PrimOps.lhs) +   ---------------------------------------------------------------------- */ + +#if defined(GRAN) +// hash coding changed from 2.10 to 4.00 +#define parzh(r,node)             parZh(r,node) + +#define parZh(r,node)				\ +	PARZh(r,node,1,0,0,0,0,0) + +#define parAtzh(r,node,where,identifier,gran_info,size_info,par_info,rest) \ +	parATZh(r,node,where,identifier,gran_info,size_info,par_info,rest,1) + +#define parAtAbszh(r,node,proc,identifier,gran_info,size_info,par_info,rest) \ +	parATZh(r,node,proc,identifier,gran_info,size_info,par_info,rest,2) + +#define parAtRelzh(r,node,proc,identifier,gran_info,size_info,par_info,rest) \ +	parATZh(r,node,proc,identifier,gran_info,size_info,par_info,rest,3) + +#define parAtForNowzh(r,node,where,identifier,gran_info,size_info,par_info,rest)	\ +	parATZh(r,node,where,identifier,gran_info,size_info,par_info,rest,0) + +#define parATZh(r,node,where,identifier,gran_info,size_info,par_info,rest,local)	\ +{							\ +  rtsSparkQ result;						\ +  if (closure_SHOULD_SPARK((StgClosure*)node)) {				\ +    rtsSparkQ result;						\ +    STGCALL6(newSpark, node,identifier,gran_info,size_info,par_info,local);	\ +    if (local==2) {         /* special case for parAtAbs */   \ +      STGCALL3(GranSimSparkAtAbs, result,(I_)where,identifier);\ +    } else if (local==3) {  /* special case for parAtRel */   \ +      STGCALL3(GranSimSparkAtAbs, result,(I_)(CurrentProc+where),identifier);	\ +    } else {       \ +      STGCALL3(GranSimSparkAt, result,where,identifier);	\ +    }        \ +  }                                                     \ +} + +#define parLocalzh(r,node,identifier,gran_info,size_info,par_info,rest)	\ +	PARZh(r,node,rest,identifier,gran_info,size_info,par_info,1) + +#define parGlobalzh(r,node,identifier,gran_info,size_info,par_info,rest) \ +	PARZh(r,node,rest,identifier,gran_info,size_info,par_info,0) + +#define PARZh(r,node,rest,identifier,gran_info,size_info,par_info,local) \ +{                                                                        \ +  if (closure_SHOULD_SPARK((StgClosure*)node)) {                         \ +    rtsSpark *result;						         \ +    result = RET_STGCALL6(rtsSpark*, newSpark,                           \ +                          node,identifier,gran_info,size_info,par_info,local);\ +    STGCALL1(add_to_spark_queue,result); 				\ +    STGCALL2(GranSimSpark, local,(P_)node);	                        \ +  }							                \ +} + +#define copyablezh(r,node)				\ +  /* copyable not yet implemented!! */ + +#define noFollowzh(r,node)				\ +  /* noFollow not yet implemented!! */ + +#endif  /* GRAN */ + +#if 0 + +# if defined(GRAN) +/* ToDo: Use a parallel ticky macro for this */ +# define COUNT_SPARK(node)     { (CurrentTSO->gran.globalsparks)++; sparksCreated++; } +# elif defined(PAR) +# define COUNT_SPARK(node)     { (CurrentTSO->par.globalsparks)++; sparksCreated++; } +# endif + +/*  +   Note that we must bump the required thread count NOW, rather +   than when the thread is actually created.   + +   forkzh not needed any more; see ghc/rts/PrimOps.hc +*/ +#define forkzh(r,liveness,node)				\ +{							\ +  extern  nat context_switch;                           \ +  while (pending_sparks_tl[REQUIRED_POOL] == pending_sparks_lim[REQUIRED_POOL]) \ +    DO_YIELD((liveness << 1) | 1);			\ +  if (closure_SHOULD_SPARK((StgClosure *)node)) {				\ +    *pending_sparks_tl[REQUIRED_POOL]++ = (P_)(node);	\ +  } else {                                              \ +    sparksIgnored++;                                    \ +  }							\ +  context_switch = 1;					\ +} + +// old version of par (previously used in GUM + +#define parzh(r,node)					\ +{							\ +  extern  nat context_switch;                           \ +  COUNT_SPARK(node);						\ +  if (closure_SHOULD_SPARK((StgClosure *)node) &&	\ +      pending_sparks_tl[ADVISORY_POOL] < pending_sparks_lim[ADVISORY_POOL]) {\ +    *pending_sparks_tl[ADVISORY_POOL]++ = (StgClosure *)(node);	\ +  } else {						\ +    sparksIgnored++;					\ +  }							\ +  r = context_switch = 1;					\ +} +#endif /* 0 */ +  #if defined(SMP) || defined(PAR)  #define parzh(r,node)					\  {							\ diff --git a/ghc/includes/Rts.h b/ghc/includes/Rts.h index 7d3511837a..40deb1e431 100644 --- a/ghc/includes/Rts.h +++ b/ghc/includes/Rts.h @@ -1,5 +1,5 @@  /* ----------------------------------------------------------------------------- - * $Id: Rts.h,v 1.11 2000/01/13 12:40:15 simonmar Exp $ + * $Id: Rts.h,v 1.12 2000/01/13 14:34:01 hwloidl Exp $   *   * (c) The GHC Team, 1998-1999   * @@ -42,17 +42,10 @@  #define MAX_RTS_ARGS 32  /* ----------------------------------------------------------------------------- -   Useful typedefs +   Assertions and Debuggery     -------------------------------------------------------------------------- */ -typedef unsigned int  nat;           /* at least 32 bits (like int) */ -typedef unsigned long lnat;          /* at least 32 bits            */ -typedef unsigned long long ullong;   /* at least 32 bits            */ -   -typedef enum {  -    rtsFalse = 0,  -    rtsTrue  -} rtsBool; +#define IF_RTSFLAGS(c,s)  if (RtsFlags.c) { s; }  /* -----------------------------------------------------------------------------     Assertions and Debuggery diff --git a/ghc/includes/RtsTypes.h b/ghc/includes/RtsTypes.h new file mode 100644 index 0000000000..10c4bdeb49 --- /dev/null +++ b/ghc/includes/RtsTypes.h @@ -0,0 +1,76 @@ +/* +  Time-stamp: <Mon Nov 22 1999 21:29:44 Stardate: [-30]3939.47 hwloidl> + +  RTS specific types. +*/ + +/* ------------------------------------------------------------------------- +   Generally useful typedefs +   ------------------------------------------------------------------------- */ + +#ifndef RTS_TYPES_H +#define RTS_TYPES_H + +typedef unsigned int  nat;           /* at least 32 bits (like int) */ +typedef unsigned long lnat;          /* at least 32 bits            */ +typedef unsigned long long ullong;   /* at least 32 bits            */ + +/* ullong (64|128-bit) type: only include if needed (not ANSI) */ +#if defined(__GNUC__)  +#define LL(x) (x##LL) +#else +#define LL(x) (x##L) +#endif +   +typedef enum {  +    rtsFalse = 0,  +    rtsTrue  +} rtsBool; + +/*  +   Types specific to the parallel runtime system. +*/ + +#if defined(PAR) +/* types only needed in the parallel system */ +typedef struct hashtable ParHashTable; +typedef struct hashlist ParHashList; + +// typedef double REAL_TIME; +// typedef W_ TIME; +// typedef GlobalTaskId Proc; +typedef int           GlobalTaskId; +typedef ullong        rtsTime; +typedef GlobalTaskId  PEs; +typedef unsigned int  rtsWeight; +typedef int           rtsPacket; +typedef int           OpCode; + +/* Global addresses i.e. unique ids in a parallel setup; needed in Closures.h*/ +typedef struct { +  union { +    StgPtr plc; +    struct { +      GlobalTaskId gtid; +      int slot; +    } gc; +  } payload; +  rtsWeight weight; +} globalAddr; + +/* (GA, LA) pairs */ +typedef struct gala { +    globalAddr ga; +    StgPtr la; +    struct gala *next; +    rtsBool preferred; +} GALA; + +#elif defined(GRAN) + +typedef lnat      rtsTime; +typedef StgWord   PEs; + +#endif + +#endif /* RTS_TYPES_H */ diff --git a/ghc/includes/SchedAPI.h b/ghc/includes/SchedAPI.h index 317a177b91..18c48f5f9c 100644 --- a/ghc/includes/SchedAPI.h +++ b/ghc/includes/SchedAPI.h @@ -1,5 +1,5 @@  /* ----------------------------------------------------------------------------- - * $Id: SchedAPI.h,v 1.8 1999/11/18 12:10:17 sewardj Exp $ + * $Id: SchedAPI.h,v 1.9 2000/01/13 14:34:01 hwloidl Exp $   *   * (c) The GHC Team 1998   * @@ -11,6 +11,11 @@  #ifndef SCHEDAPI_H  #define SCHEDAPI_H +#if defined(GRAN) +// Dummy def for NO_PRI if not in GranSim +#define NO_PRI  0 +#endif +  /*    * schedule() plus the thread creation functions are not part   * part of the external RTS API, so leave them out if we're @@ -22,8 +27,11 @@ SchedulerStatus waitThread(StgTSO *main_thread, /*out*/StgClosure **ret);  /*    * Creating threads   */ - +#if defined(GRAN) +StgTSO *createThread(nat stack_size, StgInt pri); +#else  StgTSO *createThread(nat stack_size); +#endif  void scheduleThread(StgTSO *tso);  static inline void pushClosure   (StgTSO *tso, StgClosure *c) { @@ -38,7 +46,11 @@ static inline void pushRealWorld (StgTSO *tso) {  static inline StgTSO *  createGenThread(nat stack_size,  StgClosure *closure) {    StgTSO *t; +#if defined(GRAN) +  t = createThread(stack_size, NO_PRI); +#else    t = createThread(stack_size); +#endif    pushClosure(t,closure);    return t;  } @@ -46,7 +58,11 @@ createGenThread(nat stack_size,  StgClosure *closure) {  static inline StgTSO *  createIOThread(nat stack_size,  StgClosure *closure) {    StgTSO *t; +#if defined(GRAN) +  t = createThread(stack_size, NO_PRI); +#else    t = createThread(stack_size); +#endif    pushRealWorld(t);    pushClosure(t,closure);    return t; @@ -60,7 +76,11 @@ createIOThread(nat stack_size,  StgClosure *closure) {  static inline StgTSO *  createStrictIOThread(nat stack_size,  StgClosure *closure) {    StgTSO *t; +#if defined(GRAN) +  t = createThread(stack_size, NO_PRI); +#else    t = createThread(stack_size); +#endif    pushClosure(t,closure);    pushClosure(t,(StgClosure*)&forceIO_closure);    return t; diff --git a/ghc/includes/Stg.h b/ghc/includes/Stg.h index c8d729d45f..0ae31a0d94 100644 --- a/ghc/includes/Stg.h +++ b/ghc/includes/Stg.h @@ -1,5 +1,5 @@  /* ----------------------------------------------------------------------------- - * $Id: Stg.h,v 1.21 1999/11/09 15:57:40 simonmar Exp $ + * $Id: Stg.h,v 1.22 2000/01/13 14:34:01 hwloidl Exp $   *   * (c) The GHC Team, 1998-1999   * @@ -103,6 +103,7 @@ void _stgAssert (char *, unsigned int);  /* Global type definitions*/  #include "StgTypes.h" +#include "RtsTypes.h"  /* Global constaints */  #include "Constants.h" @@ -116,6 +117,12 @@ void _stgAssert (char *, unsigned int);  #include "InfoTables.h"  #include "TSO.h" +/* Simulated-parallel information */ +#include "GranSim.h" + +/* Parallel information */ +#include "Parallel.h" +  /* STG/Optimised-C related stuff */  #include "SMP.h"  #include "MachRegs.h" diff --git a/ghc/includes/StgMacros.h b/ghc/includes/StgMacros.h index bc269ad199..ab78687be2 100644 --- a/ghc/includes/StgMacros.h +++ b/ghc/includes/StgMacros.h @@ -1,5 +1,5 @@  /* ----------------------------------------------------------------------------- - * $Id: StgMacros.h,v 1.19 1999/11/22 16:44:30 sewardj Exp $ + * $Id: StgMacros.h,v 1.20 2000/01/13 14:34:01 hwloidl Exp $   *   * (c) The GHC Team, 1998-1999   * @@ -138,6 +138,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }  	}  #define HP_CHK(headroom,ret,r,layout,tag_assts)			\ +        DO_GRAN_ALLOCATE(headroom)                              \  	if ((Hp += headroom) > HpLim) {				\  	    EXTFUN_RTS(stg_chk_##layout);		 	\  	    tag_assts						\ @@ -146,6 +147,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }  	}  #define HP_STK_CHK(stk_headroom,hp_headroom,ret,r,layout,tag_assts) \ +        DO_GRAN_ALLOCATE(hp_headroom)                              \  	if (Sp - stk_headroom < SpLim || (Hp += hp_headroom) > HpLim) {	\  	    EXTFUN_RTS(stg_chk_##layout);		 	\  	    tag_assts						\ @@ -165,6 +167,10 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }     functions.  In all these cases, node points to a closure that we     can just enter to restart the heap check (the NP stands for 'node points'). +   In the NP case GranSim absolutely has to check whether the current node  +   resides on the current processor. Otherwise a FETCH event has to be +   scheduled. All that is done in GranSimFetch. -- HWL +     HpLim points to the LAST WORD of valid allocation space.     -------------------------------------------------------------------------- */ @@ -176,6 +182,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }  	}  #define HP_CHK_NP(headroom,ptrs,tag_assts)			\ +        DO_GRAN_ALLOCATE(headroom)                              \  	if ((Hp += (headroom)) > HpLim) {			\  	    EXTFUN_RTS(stg_gc_enter_##ptrs);			\              tag_assts						\ @@ -183,6 +190,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }  	}  #define HP_CHK_SEQ_NP(headroom,ptrs,tag_assts)			\ +        DO_GRAN_ALLOCATE(headroom)                              \  	if ((Hp += (headroom)) > HpLim) {			\  	    EXTFUN_RTS(stg_gc_seq_##ptrs);			\              tag_assts						\ @@ -190,6 +198,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }  	}  #define HP_STK_CHK_NP(stk_headroom, hp_headroom, ptrs, tag_assts) \ +        DO_GRAN_ALLOCATE(hp_headroom)                              \  	if ((Sp - (stk_headroom)) < SpLim || (Hp += (hp_headroom)) > HpLim) { \  	    EXTFUN_RTS(stg_gc_enter_##ptrs);		 	\              tag_assts						\ @@ -200,6 +209,7 @@ static inline int IS_ARG_TAG( StgWord p ) { return p <= ARGTAG_MAX; }  /* Heap checks for branches of a primitive case / unboxed tuple return */  #define GEN_HP_CHK_ALT(headroom,lbl,tag_assts)			\ +        DO_GRAN_ALLOCATE(headroom)                              \  	if ((Hp += (headroom)) > HpLim) {			\  	    EXTFUN_RTS(lbl);					\              tag_assts						\ @@ -341,6 +351,25 @@ EF_(stg_gen_block);      JMP_(stg_block_##ptrs);			\    } +#if defined(PAR) +/* +  Similar to BLOCK_NP but separates the saving of the thread state from the +  actual jump via an StgReturn +*/ + +#define SAVE_THREAD_STATE(ptrs)                  \ +  ASSERT(ptrs==1);                               \ +  Sp -= 1;                                       \ +  Sp[0] = R1.w;                                  \ +  SaveThreadState();                              + +#define THREAD_RETURN(ptrs)                      \ +  ASSERT(ptrs==1);                               \ +  CurrentTSO->whatNext = ThreadEnterGHC;         \ +  R1.i = ThreadBlocked;                          \ +  JMP_(StgReturn);                                +#endif +  /* -----------------------------------------------------------------------------     CCall_GC needs to push a dummy stack frame containing the contents     of volatile registers and variables.   diff --git a/ghc/includes/StgMiscClosures.h b/ghc/includes/StgMiscClosures.h index d9c3489fd1..e0ed4247f0 100644 --- a/ghc/includes/StgMiscClosures.h +++ b/ghc/includes/StgMiscClosures.h @@ -1,5 +1,5 @@  /* ----------------------------------------------------------------------------- - * $Id: StgMiscClosures.h,v 1.15 1999/11/02 15:05:53 simonmar Exp $ + * $Id: StgMiscClosures.h,v 1.16 2000/01/13 14:34:01 hwloidl Exp $   *   * (c) The GHC Team, 1998-1999   * @@ -31,6 +31,9 @@ STGFUN(WHITEHOLE_entry);  STGFUN(SE_BLACKHOLE_entry);  STGFUN(SE_CAF_BLACKHOLE_entry);  #endif +#if defined(PAR) || defined(GRAN) +STGFUN(RBH_entry); +#endif  STGFUN(BCO_entry);  STGFUN(EVACUATED_entry);  STGFUN(FOREIGN_entry); @@ -50,6 +53,15 @@ STGFUN(MUT_CONS_entry);  STGFUN(END_MUT_LIST_entry);  STGFUN(dummy_ret_entry); +/* this is the NIL ptr for a TSO queue (e.g. runnable queue) */ +#define END_TSO_QUEUE  ((StgTSO *)(void*)&END_TSO_QUEUE_closure) +#if defined(PAR) || defined(GRAN) +/* this is the NIL ptr for a blocking queue */ +# define END_BQ_QUEUE  ((StgBlockingQueueElement *)(void*)&END_TSO_QUEUE_closure) +/* this is the NIL ptr for a blocked fetch queue (as in PendingFetches in GUM) */ +# define END_BF_QUEUE  ((StgBlockedFetch *)(void*)&END_TSO_QUEUE_closure) +#endif +  /* info tables */  extern DLL_IMPORT_RTS const StgInfoTable IND_info; @@ -69,6 +81,9 @@ extern DLL_IMPORT_RTS const StgInfoTable WHITEHOLE_info;  extern DLL_IMPORT_RTS const StgInfoTable SE_BLACKHOLE_info;  extern DLL_IMPORT_RTS const StgInfoTable SE_CAF_BLACKHOLE_info;  #endif +#if defined(PAR) || defined(GRAN) +extern DLL_IMPORT_RTS const StgInfoTable RBH_info; +#endif  extern DLL_IMPORT_RTS const StgInfoTable BCO_info;  extern DLL_IMPORT_RTS const StgInfoTable EVACUATED_info;  extern DLL_IMPORT_RTS const StgInfoTable FOREIGN_info; diff --git a/ghc/includes/TSO.h b/ghc/includes/TSO.h index 5cc34be31b..ce46e00c9a 100644 --- a/ghc/includes/TSO.h +++ b/ghc/includes/TSO.h @@ -1,5 +1,5 @@  /* ----------------------------------------------------------------------------- - * $Id: TSO.h,v 1.9 1999/12/01 14:34:49 simonmar Exp $ + * $Id: TSO.h,v 1.10 2000/01/13 14:34:01 hwloidl Exp $   *   * (c) The GHC Team, 1998-1999   * @@ -10,6 +10,30 @@  #ifndef TSO_H  #define TSO_H +#if defined(GRAN) || defined(PAR) +// magic marker for TSOs; debugging only +#define TSO_MAGIC 4321 + +typedef struct { +  StgInt   pri; +  StgInt   magic; +  StgInt   sparkname; +  rtsTime  startedat; +  rtsBool  exported; +  StgInt   basicblocks; +  StgInt   allocs; +  rtsTime  exectime; +  rtsTime  fetchtime; +  rtsTime  fetchcount; +  rtsTime  blocktime; +  StgInt   blockcount; +  rtsTime  blockedat; +  StgInt   globalsparks; +  StgInt   localsparks; +  rtsTime  clock; +} StgTSOStatBuf; +#endif +  #if defined(PROFILING)  typedef struct {    CostCentreStack *CCCS;	/* thread's current CCS */ @@ -20,14 +44,21 @@ typedef struct {  #endif /* PROFILING */  #if defined(PAR) -typedef struct { -} StgTSOParInfo; +typedef StgTSOStatBuf StgTSOParInfo;  #else /* !PAR */  typedef struct {  } StgTSOParInfo;  #endif /* PAR */ -#if defined(TICKY_TICKY) +#if defined(GRAN) +typedef StgTSOStatBuf StgTSOGranInfo; +#else /* !GRAN */ +typedef struct { +} StgTSOGranInfo; +#endif /* GRAN */ + + +#if defined(TICKY)  typedef struct {  } StgTSOTickyInfo;  #else /* !TICKY_TICKY */ @@ -86,6 +117,9 @@ typedef enum {    BlockedOnRead,    BlockedOnWrite,    BlockedOnDelay +#if defined(PAR) +  , BlockedOnGA    // blocked on a remote closure represented by a Global Address +#endif  } StgTSOBlockReason;  typedef union { @@ -93,6 +127,9 @@ typedef union {    struct StgTSO_ *tso;    int fd;    unsigned int delay; +#if defined(PAR) +  globalAddr ga; +#endif  } StgTSOBlockInfo;  /* @@ -104,6 +141,7 @@ typedef union {  typedef struct StgTSO_ {    StgHeader          header;    struct StgTSO_*    link; +  /* SDM and HWL agree that it would be cool to have a list of all TSOs */    StgMutClosure *    mut_link;	/* TSO's are mutable of course! */    StgTSOWhatNext     whatNext;    StgTSOBlockReason  why_blocked; @@ -113,7 +151,7 @@ typedef struct StgTSO_ {    StgTSOTickyInfo    ticky;     StgTSOProfInfo     prof;    StgTSOParInfo      par; -  /* GranSim Info? */ +  StgTSOGranInfo     gran;    /* The thread stack... */    StgWord    	     stack_size;     /* stack size in *words* */ diff --git a/ghc/includes/Updates.h b/ghc/includes/Updates.h index d814c10c82..5378b6c871 100644 --- a/ghc/includes/Updates.h +++ b/ghc/includes/Updates.h @@ -1,5 +1,5 @@  /* ----------------------------------------------------------------------------- - * $Id: Updates.h,v 1.15 1999/11/09 15:47:09 simonmar Exp $ + * $Id: Updates.h,v 1.16 2000/01/13 14:34:01 hwloidl Exp $   *   * (c) The GHC Team, 1998-1999   * @@ -91,18 +91,80 @@     Awaken any threads waiting on this computation     -------------------------------------------------------------------------- */ +#if defined(PAR)  + +/*  +   In a parallel setup several types of closures, might have a blocking queue: +     BLACKHOLE_BQ ... same as in the default concurrent setup; it will be +                      reawakened via calling UPD_IND on that closure after +		      having finished the computation of the graph +     FETCH_ME_BQ  ... a global indirection (FETCH_ME) may be entered by a  +                      local TSO, turning it into a FETCH_ME_BQ; it will be +		      reawakened via calling processResume +     RBH          ... a revertible black hole may be entered by another  +                      local TSO, putting it onto its blocking queue; since +		      RBHs only exist while the corresponding closure is in  +		      transit, they will be reawakened via calling  +		      convertToFetchMe (upon processing an ACK message) + +   In a parallel setup a blocking queue may contain 3 types of closures: +     TSO           ... as in the default concurrent setup +     BLOCKED_FETCH ... indicating that a TSO on another PE is waiting for +                       the result of the current computation +     CONSTR        ... a RBHSave closure (which contains data ripped out of +                       the closure to make room for a blocking queue; since +		       it only contains data we use the exisiting type of +		       a CONSTR closure); this closure is the end of a  +		       blocking queue for an RBH closure; it only exists in +		       this kind of blocking queue and must be at the end +		       of the queue +*/		       +extern void awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node); +#define DO_AWAKEN_BQ(bqe, node)  STGCALL2(awakenBlockedQueue, bqe, node); + +#define AWAKEN_BQ(info,closure)						\ +     	if (info == &BLACKHOLE_BQ_info ||               \ +	    info == &FETCH_ME_BQ_info ||                \ +	    get_itbl(closure)->type == RBH) {		                \ +		StgBlockingQueueElement *bqe = ((StgBlockingQueue *)closure)->blocking_queue;\ +		ASSERT(bqe!=END_BQ_QUEUE);		                \ +		DO_AWAKEN_BQ(bqe, closure);     	                \ +	} + +#elif defined(GRAN) + +extern void awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node); +#define DO_AWAKEN_BQ(bq, node)  STGCALL2(awakenBlockedQueue, bq, node); + +/* In GranSim we don't have FETCH_ME or FETCH_ME_BQ closures, so they are +   not checked. The rest of the code is the same as for GUM. +*/ +#define AWAKEN_BQ(info,closure)						\ +     	if (info == &BLACKHOLE_BQ_info ||               \ +	    get_itbl(closure)->type == RBH) {		                \ +		StgBlockingQueueElement *bqe = ((StgBlockingQueue *)closure)->blocking_queue;\ +		ASSERT(bqe!=END_BQ_QUEUE);		                \ +		DO_AWAKEN_BQ(bqe, closure);     	                \ +	} + + +#else /* !GRAN && !PAR */ +  extern void awakenBlockedQueue(StgTSO *q); +#define DO_AWAKEN_BQ(closure)  	\ +        STGCALL1(awakenBlockedQueue,		\ +		 ((StgBlockingQueue *)closure)->blocking_queue);  #define AWAKEN_BQ(info,closure)						\       	if (info == &BLACKHOLE_BQ_info) {				\ -	     STGCALL1(awakenBlockedQueue,				\ -		      ((StgBlockingQueue *)closure)->blocking_queue);	\ +          DO_AWAKEN_BQ(closure);                                        \  	} +#endif /* GRAN || PAR */ -/* ----------------------------------------------------------------------------- +/* -------------------------------------------------------------------------     Push an update frame on the stack. -   -------------------------------------------------------------------------- */ +   ------------------------------------------------------------------------- */  #if defined(PROFILING)  #define PUSH_STD_CCCS(frame) frame->header.prof.ccs = CCCS diff --git a/ghc/lib/std/Makefile b/ghc/lib/std/Makefile index 51b68d23a2..853c599de8 100644 --- a/ghc/lib/std/Makefile +++ b/ghc/lib/std/Makefile @@ -47,6 +47,13 @@ ifneq "$(way)" ""  SRC_HC_OPTS += -hisuf $(way_)hi  endif +# HWL: for debugging GranSim generate .hc and .s files +SRC_HC_OPTS += -keep-hc-files-too -keep-s-files-too +# # HWL: why isn't that on by default !!???????????? +# ifeq "$(way)" "mg" +# SRC_HC_OPTS += -gransim +# endif +  # per-module flags  PrelArrExtra_HC_OPTS     += -monly-2-regs @@ -55,7 +62,7 @@ PrelArrExtra_HC_OPTS     += -monly-2-regs  PrelNumExtra_HC_OPTS     += -H24m -K2m  PrelPack_HC_OPTS	 += -K4m -PrelBase_HC_OPTS         += -H12m +PrelBase_HC_OPTS         += -H32m -K32m  PrelRead_HC_OPTS         += -H20m  PrelTup_HC_OPTS          += -H12m -K2m  PrelNum_HC_OPTS		 += -H12m -K4m diff --git a/ghc/lib/std/PrelHandle.lhs b/ghc/lib/std/PrelHandle.lhs index 85289ad873..d65c234e85 100644 --- a/ghc/lib/std/PrelHandle.lhs +++ b/ghc/lib/std/PrelHandle.lhs @@ -29,7 +29,9 @@ import PrelShow  import PrelAddr		( Addr, nullAddr )  import PrelReal		( toInteger )  import PrelPack         ( packString ) +#ifndef __PARALLEL_HASKELL__  import PrelWeak		( addForeignFinalizer ) +#endif  import Ix  #ifdef __CONCURRENT_HASKELL__ diff --git a/ghc/lib/std/PrelWeak.lhs b/ghc/lib/std/PrelWeak.lhs index 3b09a396e9..354332bb04 100644 --- a/ghc/lib/std/PrelWeak.lhs +++ b/ghc/lib/std/PrelWeak.lhs @@ -7,6 +7,8 @@  \begin{code}  {-# OPTIONS -fno-implicit-prelude #-} +#ifndef __PARALLEL_HASKELL__ +  module PrelWeak where  import PrelGHC @@ -43,4 +45,6 @@ instance Eq (Weak v) where    (Weak w1) == (Weak w2) = w1 `sameWeak#` w2  -} +#endif +  \end{code} diff --git a/ghc/mk/paths.mk b/ghc/mk/paths.mk index be6dd7d137..88e39ae8f9 100644 --- a/ghc/mk/paths.mk +++ b/ghc/mk/paths.mk @@ -1,5 +1,5 @@  # ----------------------------------------------------------------------------- -# $Id: paths.mk,v 1.17 2000/01/10 11:59:55 simonmar Exp $ +# $Id: paths.mk,v 1.18 2000/01/13 14:34:02 hwloidl Exp $  #  # ghc project specific make variables  # @@ -25,7 +25,6 @@ GHC_INCLUDE_DIR   	:= $(TOP)/includes  GHC_UTILS_DIR	 	:= $(TOP)/utils  GHC_INTERPRETER_DIR 	:= $(TOP)/interpreter -GHC_SYSMAN_DIR 		:= $(GHC_RUNTIME_DIR)/gum  GHC_HSP_DIR 		:= $(GHC_HSC_DIR)  GHC_MKDEPENDHS_DIR	:= $(GHC_UTILS_DIR)/mkdependHS  GHC_HSCPP_DIR 		:= $(GHC_UTILS_DIR)/hscpp @@ -37,11 +36,12 @@ GHC_HSCPP    		= $(GHC_HSCPP_DIR)/hscpp  GHC_MKDEPENDHS 		= $(GHC_MKDEPENDHS_DIR)/mkdependHS-inplace  GHC_HSP    		= $(GHC_HSP_DIR)/hsp  GHC_HSC    		= $(GHC_HSC_DIR)/hsc -GHC_SYSMAN    		= $(GHC_RUNTIME_DIR)/gum/SysMan -  UNLIT	 		= $(GHC_UNLIT_DIR)/unlit  GHC_UNLIT		= $(GHC_UNLIT_DIR)/unlit +GHC_SYSMAN    		= $(GHC_RUNTIME_DIR)/parallel/SysMan +GHC_SYSMAN_DIR 		= $(GHC_RUNTIME_DIR)/parallel +  #-----------------------------------------------------------------------------  # Stuff for the C-compiling phase in particular... diff --git a/ghc/rts/ClosureFlags.c b/ghc/rts/ClosureFlags.c index a7fdb0b699..89e98e4728 100644 --- a/ghc/rts/ClosureFlags.c +++ b/ghc/rts/ClosureFlags.c @@ -1,5 +1,5 @@  /* ----------------------------------------------------------------------------- - * $Id: ClosureFlags.c,v 1.5 2000/01/12 12:39:20 simonmar Exp $ + * $Id: ClosureFlags.c,v 1.6 2000/01/13 14:34:02 hwloidl Exp $   *   * (c) The GHC Team 1998-1999   * @@ -23,6 +23,7 @@ StgWord16 closure_flags[] = {   *  to thunks.)   */ +/*                             0    1    2    3    4   5   6   7 */  /*			     HNF  BTM   NS  STA  THU MUT UPT SRT */  /* INVALID_OBJECT       */ ( 0                                   ), @@ -84,8 +85,14 @@ StgWord16 closure_flags[] = {  /* WEAK		   	*/ (_HNF|     _NS|              _UPT     ),  /* FOREIGN		*/ (_HNF|     _NS|              _UPT     ),  /* STABLE_NAME	   	*/ (_HNF|     _NS|              _UPT     ), +  /* TSO                  */ (_HNF|     _NS|         _MUT|_UPT     ), -/* BLOCKED_FETCH	*/ (_HNF|     _NS                        ), -/* FETCH_ME		*/ (_HNF|     _NS                        ), -/* EVACUATED		*/ ( 0                                   ) +/* BLOCKED_FETCH	*/ (_HNF|     _NS|         _MUT|_UPT     ), +/* FETCH_ME		*/ (_HNF|     _NS|         _MUT|_UPT     ), +/* FETCH_ME_BQ          */ ( 	      _NS|         _MUT|_UPT     ), +/* RBH                  */ ( 	      _NS|         _MUT|_UPT     ), + +/* EVACUATED		*/ ( 0                                   ), + +/* N_CLOSURE_TYPES      */ ( 0                                   )  }; diff --git a/ghc/rts/Exception.hc b/ghc/rts/Exception.hc index f19f212d03..7fdd6fd334 100644 --- a/ghc/rts/Exception.hc +++ b/ghc/rts/Exception.hc @@ -1,5 +1,5 @@  /* ----------------------------------------------------------------------------- - * $Id: Exception.hc,v 1.2 1999/12/02 09:52:41 simonmar Exp $ + * $Id: Exception.hc,v 1.3 2000/01/13 14:34:02 hwloidl Exp $   *   * (c) The GHC Team, 1998-1999   * @@ -14,6 +14,9 @@  #include "Storage.h"  #include "RtsUtils.h"  #include "RtsFlags.h" +#if defined(PAR) +# include "FetchMe.h" +#endif  /* -----------------------------------------------------------------------------     Exception Primitives @@ -62,7 +65,16 @@ FN_(unblockAsyncExceptionszh_ret_entry)  {    FB_      ASSERT(CurrentTSO->blocked_exceptions != NULL); +#if defined(GRAN) +# error FixME +#elif defined(PAR) +      // is CurrentTSO->block_info.closure always set to the node +      // holding the blocking queue !? -- HWL +      awakenBlockedQueue(CurrentTSO->blocked_exceptions,  +	                 CurrentTSO->block_info.closure); +#else      awakenBlockedQueue(CurrentTSO->blocked_exceptions); +#endif      CurrentTSO->blocked_exceptions = NULL;      Sp++;      JMP_(ENTRY_CODE(Sp[0])); @@ -76,7 +88,16 @@ FN_(unblockAsyncExceptionszh_fast)      STK_CHK_GEN(2, R1_PTR, unblockAsyncExceptionszh_fast, );      if (CurrentTSO->blocked_exceptions != NULL) { +#if defined(GRAN) +# error FixME +#elif defined(PAR) +      // is CurrentTSO->block_info.closure always set to the node +      // holding the blocking queue !? -- HWL +      awakenBlockedQueue(CurrentTSO->blocked_exceptions,  +	                 CurrentTSO->block_info.closure); +#else        awakenBlockedQueue(CurrentTSO->blocked_exceptions); +#endif        CurrentTSO->blocked_exceptions = NULL;        Sp--;        Sp[0] = (W_)&blockAsyncExceptionszh_ret_info; diff --git a/ghc/rts/GC.c b/ghc/rts/GC.c index f3ce4c6374..3665034b15 100644 --- a/ghc/rts/GC.c +++ b/ghc/rts/GC.c @@ -1,5 +1,5 @@  /* ----------------------------------------------------------------------------- - * $Id: GC.c,v 1.68 1999/12/01 15:07:00 simonmar Exp $ + * $Id: GC.c,v 1.69 2000/01/13 14:34:02 hwloidl Exp $   *   * (c) The GHC Team 1998-1999   * @@ -7,6 +7,25 @@   *   * ---------------------------------------------------------------------------*/ +//@menu +//* Includes::			 +//* STATIC OBJECT LIST::	 +//* Static function declarations::   +//* Garbage Collect::		 +//* Weak Pointers::		 +//* Evacuation::		 +//* Scavenging::		 +//* Reverting CAFs::		 +//* Sanity code for CAF garbage collection::   +//* Lazy black holing::		 +//* Stack squeezing::		 +//* Pausing a thread::		 +//* Index::			 +//@end menu + +//@node Includes, STATIC OBJECT LIST +//@subsection Includes +  #include "Rts.h"  #include "RtsFlags.h"  #include "RtsUtils.h" @@ -23,9 +42,21 @@  #include "SchedAPI.h"  #include "Weak.h"  #include "StablePriv.h" +#if defined(GRAN) || defined(PAR) +# include "GranSimRts.h" +# include "ParallelRts.h" +# include "FetchMe.h" +# if defined(DEBUG) +#  include "Printer.h" +#  include "ParallelDebug.h" +# endif +#endif  StgCAF* enteredCAFs; +//@node STATIC OBJECT LIST, Static function declarations, Includes +//@subsection STATIC OBJECT LIST +  /* STATIC OBJECT LIST.   *   * During GC: @@ -96,6 +127,9 @@ bdescr *old_to_space;  lnat new_blocks;		/* blocks allocated during this GC */  lnat g0s0_pcnt_kept = 30;	/* percentage of g0s0 live at last minor GC */ +//@node Static function declarations, Garbage Collect, STATIC OBJECT LIST +//@subsection Static function declarations +  /* -----------------------------------------------------------------------------     Static function declarations     -------------------------------------------------------------------------- */ @@ -119,6 +153,9 @@ static void         scavenge_mut_once_list  ( generation *g );  static void         gcCAFs                  ( void );  #endif +//@node Garbage Collect, Weak Pointers, Static function declarations +//@subsection Garbage Collect +  /* -----------------------------------------------------------------------------     GarbageCollect @@ -141,6 +178,7 @@ static void         gcCAFs                  ( void );       - free from-space in each step, and set from-space = to-space.     -------------------------------------------------------------------------- */ +//@cindex GarbageCollect  void GarbageCollect(void (*get_roots)(void))  { @@ -153,6 +191,11 @@ void GarbageCollect(void (*get_roots)(void))    CostCentreStack *prev_CCS;  #endif +#if defined(DEBUG) && defined(GRAN) +  IF_DEBUG(gc, belch("@@ Starting garbage collection at %ld (%lx)\n",  +		     Now, Now)) +#endif +    /* tell the stats department that we've started a GC */    stat_startGC(); @@ -176,8 +219,10 @@ void GarbageCollect(void (*get_roots)(void))    major_gc = (N == RtsFlags.GcFlags.generations-1);    /* check stack sanity *before* GC (ToDo: check all threads) */ -  /*IF_DEBUG(sanity, checkTSO(MainTSO,0)); */ -  IF_DEBUG(sanity, checkFreeListSanity()); +#if defined(GRAN) +  // ToDo!: check sanity  IF_DEBUG(sanity, checkTSOsSanity()); +#endif +    IF_DEBUG(sanity, checkFreeListSanity());    /* Initialise the static object lists     */ @@ -296,6 +341,8 @@ void GarbageCollect(void (*get_roots)(void))      /* Do the mut-once lists first */      for (g = RtsFlags.GcFlags.generations-1; g > N; g--) { +      IF_PAR_DEBUG(verbose, +		   printMutOnceList(&generations[g]));        scavenge_mut_once_list(&generations[g]);        evac_gen = g;        for (st = generations[g].n_steps-1; st >= 0; st--) { @@ -304,6 +351,8 @@ void GarbageCollect(void (*get_roots)(void))      }      for (g = RtsFlags.GcFlags.generations-1; g > N; g--) { +      IF_PAR_DEBUG(verbose, +		   printMutableList(&generations[g]));        scavenge_mutable_list(&generations[g]);        evac_gen = g;        for (st = generations[g].n_steps-1; st >= 0; st--) { @@ -317,6 +366,19 @@ void GarbageCollect(void (*get_roots)(void))    evac_gen = 0;    get_roots(); +#if defined(PAR) +  /* And don't forget to mark the TSO if we got here direct from +   * Haskell! */ +  /* Not needed in a seq version? +  if (CurrentTSO) { +    CurrentTSO = (StgTSO *)MarkRoot((StgClosure *)CurrentTSO); +  } +  */ + +  /* Mark the entries in the GALA table of the parallel system */ +  markLocalGAs(major_gc); +#endif +    /* Mark the weak pointer list, and prepare to detect dead weak     * pointers.     */ @@ -577,7 +639,7 @@ void GarbageCollect(void (*get_roots)(void))        int pc_free;         adjusted_blocks = (RtsFlags.GcFlags.maxHeapSize - 2 * blocks); -      IF_DEBUG(gc, fprintf(stderr, "Near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %d\n", RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks)); +      IF_DEBUG(gc, fprintf(stderr, "@@ Near maximum heap size of 0x%x blocks, blocks = %d, adjusted to %d\n", RtsFlags.GcFlags.maxHeapSize, blocks, adjusted_blocks));        pc_free = adjusted_blocks * 100 / RtsFlags.GcFlags.maxHeapSize;        if (pc_free < RtsFlags.GcFlags.pcFreeHeap) /* might even be < 0 */ {  	heapOverflow(); @@ -648,6 +710,11 @@ void GarbageCollect(void (*get_roots)(void))     */    resetNurseries(); +#if defined(PAR) +  /* Reconstruct the Global Address tables used in GUM */ +  RebuildGAtables(major_gc); +#endif +    /* start any pending finalizers */    scheduleFinalizers(old_weak_ptr_list); @@ -675,6 +742,9 @@ void GarbageCollect(void (*get_roots)(void))    stat_endGC(allocated, collected, live, copied, N);  } +//@node Weak Pointers, Evacuation, Garbage Collect +//@subsection Weak Pointers +  /* -----------------------------------------------------------------------------     Weak Pointers @@ -694,6 +764,7 @@ void GarbageCollect(void (*get_roots)(void))     probably be optimised by keeping per-generation lists of weak     pointers, but for a few weak pointers this scheme will work.     -------------------------------------------------------------------------- */ +//@cindex traverse_weak_ptr_list  static rtsBool   traverse_weak_ptr_list(void) @@ -782,6 +853,8 @@ traverse_weak_ptr_list(void)     evacuated need to be evacuated now.     -------------------------------------------------------------------------- */ +//@cindex cleanup_weak_ptr_list +  static void  cleanup_weak_ptr_list ( StgWeak **list )  { @@ -809,6 +882,8 @@ cleanup_weak_ptr_list ( StgWeak **list )     closure if it is alive, or NULL otherwise.     -------------------------------------------------------------------------- */ +//@cindex isAlive +  StgClosure *  isAlive(StgClosure *p)  { @@ -823,10 +898,14 @@ isAlive(StgClosure *p)       * for static closures with an empty SRT or CONSTR_STATIC_NOCAFs.       */ +#if 1 || !defined(PAR)      /* ignore closures in generations that we're not collecting. */ +    /* In GUM we use this routine when rebuilding GA tables; for some +       reason it has problems with the LOOKS_LIKE_STATIC macro -- HWL */      if (LOOKS_LIKE_STATIC(p) || Bdescr((P_)p)->gen->no > N) {        return p;      } +#endif      switch (info->type) { @@ -850,12 +929,24 @@ isAlive(StgClosure *p)    }  } +//@cindex MarkRoot  StgClosure *  MarkRoot(StgClosure *root)  { +  //if (root != END_TSO_QUEUE)    return evacuate(root);  } +//@cindex MarkRootHWL +StgClosure * +MarkRootHWL(StgClosure *root) +{ +  StgClosure *new = evacuate(root); +  upd_evacuee(root, new); +  return new; +} + +//@cindex addBlock  static void addBlock(step *step)  {    bdescr *bd = allocBlock(); @@ -877,6 +968,8 @@ static void addBlock(step *step)    new_blocks++;  } +//@cindex upd_evacuee +  static __inline__ void   upd_evacuee(StgClosure *p, StgClosure *dest)  { @@ -884,6 +977,8 @@ upd_evacuee(StgClosure *p, StgClosure *dest)    ((StgEvacuated *)p)->evacuee = dest;  } +//@cindex copy +  static __inline__ StgClosure *  copy(StgClosure *src, nat size, step *step)  { @@ -925,6 +1020,8 @@ copy(StgClosure *src, nat size, step *step)   * used to optimise evacuation of BLACKHOLEs.   */ +//@cindex copyPart +  static __inline__ StgClosure *  copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, step *step)  { @@ -953,6 +1050,9 @@ copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, step *step)    return (StgClosure *)dest;  } +//@node Evacuation, Scavenging, Weak Pointers +//@subsection Evacuation +  /* -----------------------------------------------------------------------------     Evacuate a large object @@ -964,6 +1064,8 @@ copyPart(StgClosure *src, nat size_to_reserve, nat size_to_copy, step *step)     evacuated, or 0 otherwise.     -------------------------------------------------------------------------- */ +//@cindex evacuate_large +  static inline void  evacuate_large(StgPtr p, rtsBool mutable)  { @@ -1026,6 +1128,8 @@ evacuate_large(StgPtr p, rtsBool mutable)     the promotion until the next GC.     -------------------------------------------------------------------------- */ +//@cindex mkMutCons +  static StgClosure *  mkMutCons(StgClosure *ptr, generation *gen)  { @@ -1075,7 +1179,7 @@ mkMutCons(StgClosure *ptr, generation *gen)                           didn't manage to evacuate this object into evac_gen.     -------------------------------------------------------------------------- */ - +//@cindex evacuate  static StgClosure *  evacuate(StgClosure *q) @@ -1085,6 +1189,9 @@ evacuate(StgClosure *q)    step *step;    const StgInfoTable *info; +  nat size, ptrs, nonptrs, vhs; +  char str[80]; +  loop:    if (HEAP_ALLOCED(q)) {      bd = Bdescr((P_)q); @@ -1110,7 +1217,15 @@ loop:    ASSERT(q && (LOOKS_LIKE_GHC_INFO(GET_INFO(q))  	       || IS_HUGS_CONSTR_INFO(GET_INFO(q))));    info = get_itbl(q); - +  /* +  if (info->type==RBH) { +    info = REVERT_INFOPTR(info); +    IF_DEBUG(gc, +	     belch("@_ Trying to evacuate an RBH %p (%s); reverting to IP %p (%s)", +		     q, info_type(q), info, info_type_by_ip(info))); +  } +  */ +      switch (info -> type) {    case BCO: @@ -1328,7 +1443,7 @@ loop:    case CATCH_FRAME:    case SEQ_FRAME:      /* shouldn't see these */ -    barf("evacuate: stack frame\n"); +    barf("evacuate: stack frame at %p\n", q);    case AP_UPD:    case PAP: @@ -1347,7 +1462,7 @@ loop:      if (evac_gen > 0) {		/* optimisation */        StgClosure *p = ((StgEvacuated*)q)->evacuee;        if (Bdescr((P_)p)->gen->no < evac_gen) { -	/*	fprintf(stderr,"evac failed!\n");*/ +	IF_DEBUG(gc, belch("@@ evacuate: evac of EVACUATED node %p failed!", p));  	failed_to_evac = rtsTrue;  	TICK_GC_FAILED_PROMOTION();        } @@ -1417,10 +1532,44 @@ loop:        }      } +#if defined(PAR) +  case RBH: // cf. BLACKHOLE_BQ +    { +      //StgInfoTable *rip = get_closure_info(q, &size, &ptrs, &nonptrs, &vhs, str); +      to = copy(q,BLACKHOLE_sizeW(),step);  +      //ToDo: derive size etc from reverted IP +      //to = copy(q,size,step); +      recordMutable((StgMutClosure *)to); +      IF_DEBUG(gc, +	       belch("@@ evacuate: RBH %p (%s) to %p (%s)", +		     q, info_type(q), to, info_type(to))); +      return to; +    } +    case BLOCKED_FETCH: +    ASSERT(sizeofW(StgBlockedFetch) >= MIN_NONUPD_SIZE); +    to = copy(q,sizeofW(StgBlockedFetch),step); +    IF_DEBUG(gc, +	     belch("@@ evacuate: %p (%s) to %p (%s)", +		   q, info_type(q), to, info_type(to))); +    return to; +    case FETCH_ME: -    fprintf(stderr,"evacuate: unimplemented/strange closure type\n"); -    return q; +    ASSERT(sizeofW(StgBlockedFetch) >= MIN_UPD_SIZE); +    to = copy(q,sizeofW(StgFetchMe),step); +    IF_DEBUG(gc, +	     belch("@@ evacuate: %p (%s) to %p (%s)", +		   q, info_type(q), to, info_type(to))); +    return to; + +  case FETCH_ME_BQ: +    ASSERT(sizeofW(StgBlockedFetch) >= MIN_UPD_SIZE); +    to = copy(q,sizeofW(StgFetchMeBlockingQueue),step); +    IF_DEBUG(gc, +	     belch("@@ evacuate: %p (%s) to %p (%s)", +		   q, info_type(q), to, info_type(to))); +    return to; +#endif    default:      barf("evacuate: strange closure type %d", (int)(info->type)); @@ -1433,6 +1582,7 @@ loop:     relocate_TSO is called just after a TSO has been copied from src to     dest.  It adjusts the update frame list for the new location.     -------------------------------------------------------------------------- */ +//@cindex relocate_TSO  StgTSO *  relocate_TSO(StgTSO *src, StgTSO *dest) @@ -1481,6 +1631,11 @@ relocate_TSO(StgTSO *src, StgTSO *dest)    return dest;  } +//@node Scavenging, Reverting CAFs, Evacuation +//@subsection Scavenging + +//@cindex scavenge_srt +  static inline void  scavenge_srt(const StgInfoTable *info)  { @@ -1548,7 +1703,7 @@ scavengeTSO (StgTSO *tso)     scavenging a mutable object where early promotion isn't such a good     idea.       -------------------------------------------------------------------------- */ -    +//@cindex scavenge  static void  scavenge(step *step) @@ -1582,6 +1737,11 @@ scavenge(step *step)  		 || IS_HUGS_CONSTR_INFO(GET_INFO((StgClosure *)p))));      info = get_itbl((StgClosure *)p); +    /* +    if (info->type==RBH) +      info = REVERT_INFOPTR(info); +    */ +      switch (info -> type) {      case BCO: @@ -1849,8 +2009,72 @@ scavenge(step *step)  	break;        } +#if defined(PAR) +    case RBH: // cf. BLACKHOLE_BQ +      {  +	// nat size, ptrs, nonptrs, vhs; +	// char str[80]; +	// StgInfoTable *rip = get_closure_info(p, &size, &ptrs, &nonptrs, &vhs, str); +	StgRBH *rbh = (StgRBH *)p; +	(StgClosure *)rbh->blocking_queue =  +	  evacuate((StgClosure *)rbh->blocking_queue); +	if (failed_to_evac) { +	  failed_to_evac = rtsFalse; +	  recordMutable((StgMutClosure *)rbh); +	} +	IF_DEBUG(gc, +		 belch("@@ scavenge: RBH %p (%s) (new blocking_queue link=%p)", +		       p, info_type(p), (StgClosure *)rbh->blocking_queue)); +	// ToDo: use size of reverted closure here! +	p += BLACKHOLE_sizeW();  +	break; +      } +      case BLOCKED_FETCH: +      {  +	StgBlockedFetch *bf = (StgBlockedFetch *)p; +	/* follow the pointer to the node which is being demanded */ +	(StgClosure *)bf->node =  +	  evacuate((StgClosure *)bf->node); +	/* follow the link to the rest of the blocking queue */ +	(StgClosure *)bf->link =  +	  evacuate((StgClosure *)bf->link); +	if (failed_to_evac) { +	  failed_to_evac = rtsFalse; +	  recordMutable((StgMutClosure *)bf); +	} +	IF_DEBUG(gc, +		 belch("@@ scavenge: %p (%s); node is now %p; exciting, isn't it", +		     bf, info_type((StgClosure *)bf),  +		     bf->node, info_type(bf->node))); +	p += sizeofW(StgBlockedFetch); +	break; +      } +      case FETCH_ME: +      IF_DEBUG(gc, +	       belch("@@ scavenge: HWL claims nothing to do for %p (%s)", +		     p, info_type((StgClosure *)p))); +      p += sizeofW(StgFetchMe); +      break; // nothing to do in this case + +    case FETCH_ME_BQ: // cf. BLACKHOLE_BQ +      {  +	StgFetchMeBlockingQueue *fmbq = (StgFetchMeBlockingQueue *)p; +	(StgClosure *)fmbq->blocking_queue =  +	  evacuate((StgClosure *)fmbq->blocking_queue); +	if (failed_to_evac) { +	  failed_to_evac = rtsFalse; +	  recordMutable((StgMutClosure *)fmbq); +	} +	IF_DEBUG(gc, +		 belch("@@ scavenge: %p (%s) exciting, isn't it", +		     p, info_type((StgClosure *)p))); +	p += sizeofW(StgFetchMeBlockingQueue); +	break; +      } +#endif +      case EVACUATED:        barf("scavenge: unimplemented/strange closure type\n"); @@ -1879,6 +2103,8 @@ scavenge(step *step)     because they contain old-to-new generation pointers.  Only certain     objects can have this property.     -------------------------------------------------------------------------- */ +//@cindex scavenge_one +  static rtsBool  scavenge_one(StgClosure *p)  { @@ -1890,6 +2116,11 @@ scavenge_one(StgClosure *p)    info = get_itbl(p); +  /* ngoq moHqu'!  +  if (info->type==RBH) +    info = REVERT_INFOPTR(info); // if it's an RBH, look at the orig closure +  */ +    switch (info -> type) {    case FUN: @@ -1976,6 +2207,7 @@ scavenge_one(StgClosure *p)     generations older than the one being collected) as roots.  We also     remove non-mutable objects from the mutable list at this point.     -------------------------------------------------------------------------- */ +//@cindex scavenge_mut_once_list  static void  scavenge_mut_once_list(generation *gen) @@ -1997,6 +2229,10 @@ scavenge_mut_once_list(generation *gen)  		 || IS_HUGS_CONSTR_INFO(GET_INFO(p))));      info = get_itbl(p); +    /* +    if (info->type==RBH) +      info = REVERT_INFOPTR(info); // if it's an RBH, look at the orig closure +    */      switch(info->type) {      case IND_OLDGEN: @@ -2008,7 +2244,8 @@ scavenge_mut_once_list(generation *gen)        ((StgIndOldGen *)p)->indirectee =           evacuate(((StgIndOldGen *)p)->indirectee); -#if 0 +#ifdef DEBUG +      if (RtsFlags.DebugFlags.gc)         /* Debugging code to print out the size of the thing we just         * promoted          */ @@ -2107,6 +2344,7 @@ scavenge_mut_once_list(generation *gen)    gen->mut_once_list = new_list;  } +//@cindex scavenge_mutable_list  static void  scavenge_mutable_list(generation *gen) @@ -2127,6 +2365,10 @@ scavenge_mutable_list(generation *gen)  		 || IS_HUGS_CONSTR_INFO(GET_INFO(p))));      info = get_itbl(p); +    /* +    if (info->type==RBH) +      info = REVERT_INFOPTR(info); // if it's an RBH, look at the orig closure +    */      switch(info->type) {      case MUT_ARR_PTRS_FROZEN: @@ -2136,6 +2378,10 @@ scavenge_mutable_list(generation *gen)        {  	StgPtr end, q; +	IF_DEBUG(gc, +		 belch("@@ scavenge_mut_list: scavenging MUT_ARR_PTRS_FROZEN %p; size: %#x ; next: %p", +		       p, mut_arr_ptrs_sizeW((StgMutArrPtrs*)p), p->mut_link)); +  	end = (P_)p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);  	evac_gen = gen->no;  	for (q = (P_)((StgMutArrPtrs *)p)->payload; q < end; q++) { @@ -2158,6 +2404,10 @@ scavenge_mutable_list(generation *gen)        {  	StgPtr end, q; +	IF_DEBUG(gc, +		 belch("@@ scavenge_mut_list: scavenging MUT_ARR_PTRS %p; size: %#x ; next: %p", +		       p, mut_arr_ptrs_sizeW((StgMutArrPtrs*)p), p->mut_link)); +  	end = (P_)p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p);  	for (q = (P_)((StgMutArrPtrs *)p)->payload; q < end; q++) {  	  (StgClosure *)*q = evacuate((StgClosure *)*q); @@ -2170,6 +2420,10 @@ scavenge_mutable_list(generation *gen)         * it from the mutable list if possible by promoting whatever it         * points to.         */ +	IF_DEBUG(gc, +		 belch("@@ scavenge_mut_list: scavenging MUT_VAR %p; var: %p ; next: %p", +		       p, ((StgMutVar *)p)->var, p->mut_link)); +        ASSERT(p->header.info != &MUT_CONS_info);        ((StgMutVar *)p)->var = evacuate(((StgMutVar *)p)->var);        p->mut_link = gen->mut_list; @@ -2179,6 +2433,11 @@ scavenge_mutable_list(generation *gen)      case MVAR:        {  	StgMVar *mvar = (StgMVar *)p; + +	IF_DEBUG(gc, +		 belch("@@ scavenge_mut_list: scavenging MAVR %p; head: %p; tail: %p; value: %p ; next: %p", +		       mvar, mvar->head, mvar->tail, mvar->value, p->mut_link)); +  	(StgClosure *)mvar->head = evacuate((StgClosure *)mvar->head);  	(StgClosure *)mvar->tail = evacuate((StgClosure *)mvar->tail);  	(StgClosure *)mvar->value = evacuate((StgClosure *)mvar->value); @@ -2205,6 +2464,11 @@ scavenge_mutable_list(generation *gen)      case BLACKHOLE_BQ:        {   	StgBlockingQueue *bh = (StgBlockingQueue *)p; + +	IF_DEBUG(gc, +		 belch("@@ scavenge_mut_list: scavenging BLACKHOLE_BQ (%p); next: %p", +		       p, p->mut_link)); +  	(StgClosure *)bh->blocking_queue =   	  evacuate((StgClosure *)bh->blocking_queue);  	p->mut_link = gen->mut_list; @@ -2233,6 +2497,8 @@ scavenge_mutable_list(generation *gen)        }        continue; +    // HWL: old PAR code deleted here +      default:        /* shouldn't have anything else on the mutables list */        barf("scavenge_mutable_list: strange object? %d", (int)(info->type)); @@ -2240,6 +2506,8 @@ scavenge_mutable_list(generation *gen)    }  } +//@cindex scavenge_static +  static void  scavenge_static(void)  { @@ -2255,7 +2523,10 @@ scavenge_static(void)    while (p != END_OF_STATIC_LIST) {      info = get_itbl(p); - +    /* +    if (info->type==RBH) +      info = REVERT_INFOPTR(info); // if it's an RBH, look at the orig closure +    */      /* make sure the info pointer is into text space */      ASSERT(p && (LOOKS_LIKE_GHC_INFO(GET_INFO(p))  		 || IS_HUGS_CONSTR_INFO(GET_INFO(p)))); @@ -2324,6 +2595,7 @@ scavenge_static(void)     objects pointed to by it.  We can use the same code for walking     PAPs, since these are just sections of copied stack.     -------------------------------------------------------------------------- */ +//@cindex scavenge_stack  static void  scavenge_stack(StgPtr p, StgPtr stack_end) @@ -2332,6 +2604,8 @@ scavenge_stack(StgPtr p, StgPtr stack_end)    const StgInfoTable* info;    StgWord32 bitmap; +  IF_DEBUG(sanity, belch("  scavenging stack between %p and %p", p, stack_end)); +    /*      * Each time around this loop, we are looking at a chunk of stack     * that starts with either a pending argument section or an  @@ -2380,8 +2654,18 @@ scavenge_stack(StgPtr p, StgPtr stack_end)        /* probably a slow-entry point return address: */      case FUN:      case FUN_STATIC: -      p++; +      { +#if 0	 +	StgPtr old_p = p; +	p++; p++;  +	IF_DEBUG(sanity,  +		 belch("HWL: scavenge_stack: FUN(_STATIC) adjusting p from %p to %p (instead of %p)", +		       old_p, p, old_p+1)); +#else +      p++; /* what if FHS!=1 !? -- HWL */ +#endif        goto follow_srt; +      }        /* Specialised code for update frames, since they're so common.         * We *know* the updatee points to a BLACKHOLE, CAF_BLACKHOLE, @@ -2436,14 +2720,29 @@ scavenge_stack(StgPtr p, StgPtr stack_end)        }        /* small bitmap (< 32 entries, or 64 on a 64-bit machine) */ -    case RET_BCO: -    case RET_SMALL: -    case RET_VEC_SMALL:      case STOP_FRAME:      case CATCH_FRAME:      case SEQ_FRAME: +      { +	StgPtr old_p = p; // debugging only -- HWL +      /* stack frames like these are ordinary closures and therefore may  +	 contain setup-specific fixed-header words (as in GranSim!); +	 therefore, these cases should not use p++ but &(p->payload) -- HWL */ +      IF_DEBUG(gran, IF_DEBUG(sanity, printObj(p))); +      bitmap = info->layout.bitmap; + +      p = (StgPtr)&(((StgClosure *)p)->payload); +      IF_DEBUG(sanity,  +		 belch("HWL: scavenge_stack: (STOP|CATCH|SEQ)_FRAME adjusting p from %p to %p (instead of %p)", +		       old_p, p, old_p+1)); +      goto small_bitmap; +      } +    case RET_BCO: +    case RET_SMALL: +    case RET_VEC_SMALL:        bitmap = info->layout.bitmap;        p++; +      /* this assumes that the payload starts immediately after the info-ptr */      small_bitmap:        while (bitmap != 0) {  	if ((bitmap & 1) == 0) { @@ -2504,6 +2803,7 @@ scavenge_stack(StgPtr p, StgPtr stack_end)    objects are (repeatedly) mutable, so most of the time evac_gen will    be zero.    --------------------------------------------------------------------------- */ +//@cindex scavenge_large  static void  scavenge_large(step *step) @@ -2580,6 +2880,7 @@ scavenge_large(step *step)      case TSO:  	scavengeTSO((StgTSO *)p); +        // HWL: old PAR code deleted here  	continue;      default: @@ -2588,6 +2889,8 @@ scavenge_large(step *step)    }  } +//@cindex zero_static_object_list +  static void  zero_static_object_list(StgClosure* first_static)  { @@ -2610,6 +2913,8 @@ zero_static_object_list(StgClosure* first_static)   * It doesn't do any harm to zero all the mutable link fields on the   * mutable list.   */ +//@cindex zero_mutable_list +  static void  zero_mutable_list( StgMutClosure *first )  { @@ -2621,9 +2926,13 @@ zero_mutable_list( StgMutClosure *first )    }  } +//@node Reverting CAFs, Sanity code for CAF garbage collection, Scavenging +//@subsection Reverting CAFs +  /* -----------------------------------------------------------------------------     Reverting CAFs     -------------------------------------------------------------------------- */ +//@cindex RevertCAFs  void RevertCAFs(void)  { @@ -2639,6 +2948,8 @@ void RevertCAFs(void)    enteredCAFs = END_CAF_LIST;  } +//@cindex revert_dead_CAFs +  void revert_dead_CAFs(void)  {      StgCAF* caf = enteredCAFs; @@ -2660,6 +2971,9 @@ void revert_dead_CAFs(void)      }  } +//@node Sanity code for CAF garbage collection, Lazy black holing, Reverting CAFs +//@subsection Sanity code for CAF garbage collection +  /* -----------------------------------------------------------------------------     Sanity code for CAF garbage collection. @@ -2673,6 +2987,8 @@ void revert_dead_CAFs(void)     -------------------------------------------------------------------------- */  #ifdef DEBUG +//@cindex gcCAFs +  static void  gcCAFs(void)  { @@ -2710,6 +3026,9 @@ gcCAFs(void)  }  #endif +//@node Lazy black holing, Stack squeezing, Sanity code for CAF garbage collection +//@subsection Lazy black holing +  /* -----------------------------------------------------------------------------     Lazy black holing. @@ -2717,6 +3036,7 @@ gcCAFs(void)     some work, we have to run down the stack and black-hole all the     closures referred to by update frames.     -------------------------------------------------------------------------- */ +//@cindex threadLazyBlackHole  static void  threadLazyBlackHole(StgTSO *tso) @@ -2772,6 +3092,9 @@ threadLazyBlackHole(StgTSO *tso)    }  } +//@node Stack squeezing, Pausing a thread, Lazy black holing +//@subsection Stack squeezing +  /* -----------------------------------------------------------------------------   * Stack squeezing   * @@ -2779,6 +3102,7 @@ threadLazyBlackHole(StgTSO *tso)   * lazy black holing here.   *   * -------------------------------------------------------------------------- */ +//@cindex threadSqueezeStack  static void  threadSqueezeStack(StgTSO *tso) @@ -2789,6 +3113,14 @@ threadSqueezeStack(StgTSO *tso)    StgUpdateFrame *prev_frame;			/* Temporally previous */    StgPtr bottom;    rtsBool prev_was_update_frame; +#if DEBUG +  StgUpdateFrame *top_frame; +  nat upd_frames=0, stop_frames=0, catch_frames=0, seq_frames=0, +      bhs=0, squeezes=0; +  void printObj( StgClosure *obj ); // from Printer.c + +  top_frame  = tso->su; +#endif    bottom = &(tso->stack[tso->stack_size]);    frame  = tso->su; @@ -2814,6 +3146,30 @@ threadSqueezeStack(StgTSO *tso)      frame->link = next_frame;      next_frame = frame;      frame = prev_frame; +#if DEBUG +    IF_DEBUG(sanity, +	     if (!(frame>=top_frame && frame<=bottom)) { +	       printObj((StgClosure *)prev_frame); +	       barf("threadSqueezeStack: current frame is rubbish %p; previous was %p\n",  +		    frame, prev_frame); +	     }) +    switch (get_itbl(frame)->type) { +    case UPDATE_FRAME: upd_frames++; +                       if (frame->updatee->header.info == &BLACKHOLE_info) +			 bhs++; +                       break; +    case STOP_FRAME:  stop_frames++; +                      break; +    case CATCH_FRAME: catch_frames++; +                      break; +    case SEQ_FRAME: seq_frames++; +                    break; +    default: +      barf("Found non-frame during stack squeezing at %p (prev frame was %p)\n", +	   frame, prev_frame); +      printObj((StgClosure *)prev_frame); +    } +#endif      if (get_itbl(frame)->type == UPDATE_FRAME  	&& frame->updatee->header.info == &BLACKHOLE_info) {          break; @@ -2863,8 +3219,9 @@ threadSqueezeStack(StgTSO *tso)        StgClosure *updatee_keep   = prev_frame->updatee;        StgClosure *updatee_bypass = frame->updatee; -#if 0 /* DEBUG */ -      fprintf(stderr, "squeezing frame at %p\n", frame); +#if DEBUG +      IF_DEBUG(gc, fprintf(stderr, "@@ squeezing frame at %p\n", frame)); +      squeezes++;  #endif        /* Deal with blocking queues.  If both updatees have blocked @@ -2949,9 +3306,10 @@ threadSqueezeStack(StgTSO *tso)        else  	next_frame_bottom = tso->sp - 1; -#if 0 /* DEBUG */ -      fprintf(stderr, "sliding [%p, %p] by %ld\n", sp, next_frame_bottom, -	      displacement); +#if DEBUG +      IF_DEBUG(gc, +	       fprintf(stderr, "sliding [%p, %p] by %ld\n", sp, next_frame_bottom, +		       displacement))  #endif        while (sp >= next_frame_bottom) { @@ -2965,8 +3323,16 @@ threadSqueezeStack(StgTSO *tso)    tso->sp += displacement;    tso->su = prev_frame; +#if DEBUG +  IF_DEBUG(gc, +	   fprintf(stderr, "@@ threadSqueezeStack: squeezed %d update-frames; found %d BHs; found %d update-, %d stop-, %d catch, %d seq-frames\n", +		   squeezes, bhs, upd_frames, stop_frames, catch_frames, seq_frames)) +#endif  } +//@node Pausing a thread, Index, Stack squeezing +//@subsection Pausing a thread +  /* -----------------------------------------------------------------------------   * Pausing a thread   *  @@ -2974,6 +3340,7 @@ threadSqueezeStack(StgTSO *tso)   * here.  We also take the opportunity to do stack squeezing if it's   * turned on.   * -------------------------------------------------------------------------- */ +//@cindex threadPaused  void  threadPaused(StgTSO *tso) @@ -2983,3 +3350,83 @@ threadPaused(StgTSO *tso)    else      threadLazyBlackHole(tso);  } + +#if DEBUG +//@cindex printMutOnceList +void +printMutOnceList(generation *gen) +{ +  const StgInfoTable *info; +  StgMutClosure *p, *next, *new_list; + +  p = gen->mut_once_list; +  new_list = END_MUT_LIST; +  next = p->mut_link; + +  evac_gen = gen->no; +  failed_to_evac = rtsFalse; + +  fprintf(stderr, "@@ Mut once list %p: ", gen->mut_once_list); +  for (; p != END_MUT_LIST; p = next, next = p->mut_link) { +    fprintf(stderr, "%p (%s), ",  +	    p, info_type((StgClosure *)p)); +  } +  fputc('\n', stderr); +} + +//@cindex printMutableList +void +printMutableList(generation *gen) +{ +  const StgInfoTable *info; +  StgMutClosure *p, *next; + +  p = gen->saved_mut_list; +  next = p->mut_link; + +  evac_gen = 0; +  failed_to_evac = rtsFalse; + +  fprintf(stderr, "@@ Mutable list %p: ", gen->saved_mut_list); +  for (; p != END_MUT_LIST; p = next, next = p->mut_link) { +    fprintf(stderr, "%p (%s), ",  +	    p, info_type((StgClosure *)p)); +  } +  fputc('\n', stderr); +} +#endif /* DEBUG */ + +//@node Index,  , Pausing a thread +//@subsection Index + +//@index +//* GarbageCollect::  @cindex\s-+GarbageCollect +//* MarkRoot::  @cindex\s-+MarkRoot +//* RevertCAFs::  @cindex\s-+RevertCAFs +//* addBlock::  @cindex\s-+addBlock +//* cleanup_weak_ptr_list::  @cindex\s-+cleanup_weak_ptr_list +//* copy::  @cindex\s-+copy +//* copyPart::  @cindex\s-+copyPart +//* evacuate::  @cindex\s-+evacuate +//* evacuate_large::  @cindex\s-+evacuate_large +//* gcCAFs::  @cindex\s-+gcCAFs +//* isAlive::  @cindex\s-+isAlive +//* mkMutCons::  @cindex\s-+mkMutCons +//* relocate_TSO::  @cindex\s-+relocate_TSO +//* revert_dead_CAFs::  @cindex\s-+revert_dead_CAFs +//* scavenge::  @cindex\s-+scavenge +//* scavenge_large::  @cindex\s-+scavenge_large +//* scavenge_mut_once_list::  @cindex\s-+scavenge_mut_once_list +//* scavenge_mutable_list::  @cindex\s-+scavenge_mutable_list +//* scavenge_one::  @cindex\s-+scavenge_one +//* scavenge_srt::  @cindex\s-+scavenge_srt +//* scavenge_stack::  @cindex\s-+scavenge_stack +//* scavenge_static::  @cindex\s-+scavenge_static +//* threadLazyBlackHole::  @cindex\s-+threadLazyBlackHole +//* threadPaused::  @cindex\s-+threadPaused +//* threadSqueezeStack::  @cindex\s-+threadSqueezeStack +//* traverse_weak_ptr_list::  @cindex\s-+traverse_weak_ptr_list +//* upd_evacuee::  @cindex\s-+upd_evacuee +//* zero_mutable_list::  @cindex\s-+zero_mutable_list +//* zero_static_object_list::  @cindex\s-+zero_static_object_list +//@end index diff --git a/ghc/rts/GC.h b/ghc/rts/GC.h index dc7beb8ec2..212620e623 100644 --- a/ghc/rts/GC.h +++ b/ghc/rts/GC.h @@ -1,5 +1,5 @@  /* ----------------------------------------------------------------------------- - * $Id: GC.h,v 1.4 1999/02/05 16:02:43 simonm Exp $ + * $Id: GC.h,v 1.5 2000/01/13 14:34:03 hwloidl Exp $   *   * (c) The GHC Team, 1998-1999   * @@ -9,3 +9,4 @@  void threadPaused(StgTSO *);  StgClosure *isAlive(StgClosure *p); +void GarbageCollect(void (*get_roots)(void)); diff --git a/ghc/rts/Hash.h b/ghc/rts/Hash.h index ac0df5cb49..74ff3217eb 100644 --- a/ghc/rts/Hash.h +++ b/ghc/rts/Hash.h @@ -1,5 +1,5 @@  /*----------------------------------------------------------------------------- - * $Id: Hash.h,v 1.1 1999/01/27 12:11:26 simonm Exp $ + * $Id: Hash.h,v 1.2 2000/01/13 14:34:03 hwloidl Exp $   *   * (c) The GHC Team, 1999   * @@ -14,3 +14,4 @@ void        insertHashTable ( HashTable *table, StgWord key, void *data );  void *      removeHashTable ( HashTable *table, StgWord key, void *data );  void        freeHashTable   ( HashTable *table, void (*freeDataFun)(void *) );  HashTable * allocHashTable  ( void ); + diff --git a/ghc/rts/HeapStackCheck.hc b/ghc/rts/HeapStackCheck.hc index fc29ba7f25..1a30f44ec0 100644 --- a/ghc/rts/HeapStackCheck.hc +++ b/ghc/rts/HeapStackCheck.hc @@ -1,5 +1,5 @@  /* ----------------------------------------------------------------------------- - * $Id: HeapStackCheck.hc,v 1.10 1999/11/09 15:46:51 simonmar Exp $ + * $Id: HeapStackCheck.hc,v 1.11 2000/01/13 14:34:03 hwloidl Exp $   *   * (c) The GHC Team, 1998-1999   * @@ -276,6 +276,334 @@ EXTFUN(stg_gc_enter_8)    FE_  } +#if defined(GRAN) +/* +  ToDo: merge the block and yield macros, calling something like BLOCK(N) +        at the end; +*/ + +/*  +   Should we actually ever do a yield in such a case?? -- HWL +*/ +EXTFUN(gran_yield_0) +{ +  FB_ +  SaveThreadState();					 +  CurrentTSO->whatNext = ThreadEnterGHC;		 +  R1.i = ThreadYielding; +  JMP_(StgReturn); +  FE_ +} + +EXTFUN(gran_yield_1) +{ +  FB_ +  Sp -= 1; +  Sp[0] = R1.w; +  SaveThreadState();					 +  CurrentTSO->whatNext = ThreadEnterGHC;		 +  R1.i = ThreadYielding; +  JMP_(StgReturn); +  FE_ +} + +/*- 2 Regs--------------------------------------------------------------------*/ + +EXTFUN(gran_yield_2) +{ +  FB_ +  Sp -= 2; +  Sp[1] = R2.w; +  Sp[0] = R1.w; +  SaveThreadState();					 +  CurrentTSO->whatNext = ThreadEnterGHC;		 +  R1.i = ThreadYielding; +  JMP_(StgReturn); +  FE_ +} + +/*- 3 Regs -------------------------------------------------------------------*/ + +EXTFUN(gran_yield_3) +{ +  FB_ +  Sp -= 3; +  Sp[2] = R3.w; +  Sp[1] = R2.w; +  Sp[0] = R1.w; +  SaveThreadState();					 +  CurrentTSO->whatNext = ThreadEnterGHC;		 +  R1.i = ThreadYielding; +  JMP_(StgReturn); +  FE_ +} + +/*- 4 Regs -------------------------------------------------------------------*/ + +EXTFUN(gran_yield_4) +{ +  FB_ +  Sp -= 4; +  Sp[3] = R4.w; +  Sp[2] = R3.w; +  Sp[1] = R2.w; +  Sp[0] = R1.w; +  SaveThreadState();					 +  CurrentTSO->whatNext = ThreadEnterGHC;		 +  R1.i = ThreadYielding; +  JMP_(StgReturn); +  FE_ +} + +/*- 5 Regs -------------------------------------------------------------------*/ + +EXTFUN(gran_yield_5) +{ +  FB_ +  Sp -= 5; +  Sp[4] = R5.w; +  Sp[3] = R4.w; +  Sp[2] = R3.w; +  Sp[1] = R2.w; +  Sp[0] = R1.w; +  SaveThreadState();					 +  CurrentTSO->whatNext = ThreadEnterGHC;		 +  R1.i = ThreadYielding; +  JMP_(StgReturn); +  FE_ +} + +/*- 6 Regs -------------------------------------------------------------------*/ + +EXTFUN(gran_yield_6) +{ +  FB_ +  Sp -= 6; +  Sp[5] = R6.w; +  Sp[4] = R5.w; +  Sp[3] = R4.w; +  Sp[2] = R3.w; +  Sp[1] = R2.w; +  Sp[0] = R1.w; +  SaveThreadState();					 +  CurrentTSO->whatNext = ThreadEnterGHC;		 +  R1.i = ThreadYielding; +  JMP_(StgReturn); +  FE_ +} + +/*- 7 Regs -------------------------------------------------------------------*/ + +EXTFUN(gran_yield_7) +{ +  FB_ +  Sp -= 7; +  Sp[6] = R7.w; +  Sp[5] = R6.w; +  Sp[4] = R5.w; +  Sp[3] = R4.w; +  Sp[2] = R3.w; +  Sp[1] = R2.w; +  Sp[0] = R1.w; +  SaveThreadState();					 +  CurrentTSO->whatNext = ThreadEnterGHC;		 +  R1.i = ThreadYielding; +  JMP_(StgReturn); +  FE_ +} + +/*- 8 Regs -------------------------------------------------------------------*/ + +EXTFUN(gran_yield_8) +{ +  FB_ +  Sp -= 8; +  Sp[7] = R8.w; +  Sp[6] = R7.w; +  Sp[5] = R6.w; +  Sp[4] = R5.w; +  Sp[3] = R4.w; +  Sp[2] = R3.w; +  Sp[1] = R2.w; +  Sp[0] = R1.w; +  SaveThreadState();					 +  CurrentTSO->whatNext = ThreadEnterGHC;		 +  R1.i = ThreadYielding; +  JMP_(StgReturn); +  FE_ +} + +// the same routines but with a block rather than a yield + +EXTFUN(gran_block_1) +{ +  FB_ +  Sp -= 1; +  Sp[0] = R1.w; +  SaveThreadState();					 +  CurrentTSO->whatNext = ThreadEnterGHC;		 +  R1.i = ThreadBlocked; +  JMP_(StgReturn); +  FE_ +} + +/*- 2 Regs--------------------------------------------------------------------*/ + +EXTFUN(gran_block_2) +{ +  FB_ +  Sp -= 2; +  Sp[1] = R2.w; +  Sp[0] = R1.w; +  SaveThreadState();					 +  CurrentTSO->whatNext = ThreadEnterGHC;		 +  R1.i = ThreadBlocked; +  JMP_(StgReturn); +  FE_ +} + +/*- 3 Regs -------------------------------------------------------------------*/ + +EXTFUN(gran_block_3) +{ +  FB_ +  Sp -= 3; +  Sp[2] = R3.w; +  Sp[1] = R2.w; +  Sp[0] = R1.w; +  SaveThreadState();					 +  CurrentTSO->whatNext = ThreadEnterGHC;		 +  R1.i = ThreadBlocked; +  JMP_(StgReturn); +  FE_ +} + +/*- 4 Regs -------------------------------------------------------------------*/ + +EXTFUN(gran_block_4) +{ +  FB_ +  Sp -= 4; +  Sp[3] = R4.w; +  Sp[2] = R3.w; +  Sp[1] = R2.w; +  Sp[0] = R1.w; +  SaveThreadState();					 +  CurrentTSO->whatNext = ThreadEnterGHC;		 +  R1.i = ThreadBlocked; +  JMP_(StgReturn); +  FE_ +} + +/*- 5 Regs -------------------------------------------------------------------*/ + +EXTFUN(gran_block_5) +{ +  FB_ +  Sp -= 5; +  Sp[4] = R5.w; +  Sp[3] = R4.w; +  Sp[2] = R3.w; +  Sp[1] = R2.w; +  Sp[0] = R1.w; +  SaveThreadState();					 +  CurrentTSO->whatNext = ThreadEnterGHC;		 +  R1.i = ThreadBlocked; +  JMP_(StgReturn); +  FE_ +} + +/*- 6 Regs -------------------------------------------------------------------*/ + +EXTFUN(gran_block_6) +{ +  FB_ +  Sp -= 6; +  Sp[5] = R6.w; +  Sp[4] = R5.w; +  Sp[3] = R4.w; +  Sp[2] = R3.w; +  Sp[1] = R2.w; +  Sp[0] = R1.w; +  SaveThreadState();					 +  CurrentTSO->whatNext = ThreadEnterGHC;		 +  R1.i = ThreadBlocked; +  JMP_(StgReturn); +  FE_ +} + +/*- 7 Regs -------------------------------------------------------------------*/ + +EXTFUN(gran_block_7) +{ +  FB_ +  Sp -= 7; +  Sp[6] = R7.w; +  Sp[5] = R6.w; +  Sp[4] = R5.w; +  Sp[3] = R4.w; +  Sp[2] = R3.w; +  Sp[1] = R2.w; +  Sp[0] = R1.w; +  SaveThreadState();					 +  CurrentTSO->whatNext = ThreadEnterGHC;		 +  R1.i = ThreadBlocked; +  JMP_(StgReturn); +  FE_ +} + +/*- 8 Regs -------------------------------------------------------------------*/ + +EXTFUN(gran_block_8) +{ +  FB_ +  Sp -= 8; +  Sp[7] = R8.w; +  Sp[6] = R7.w; +  Sp[5] = R6.w; +  Sp[4] = R5.w; +  Sp[3] = R4.w; +  Sp[2] = R3.w; +  Sp[1] = R2.w; +  Sp[0] = R1.w; +  SaveThreadState();					 +  CurrentTSO->whatNext = ThreadEnterGHC;		 +  R1.i = ThreadBlocked; +  JMP_(StgReturn); +  FE_ +} + +#endif + +#if 0 && defined(PAR) + +/* +  Similar to stg_block_1 (called via StgMacro BLOCK_NP) but separates the +  saving of the thread state from the actual jump via an StgReturn. +  We need this separation because we call RTS routines in blocking entry codes +  before jumping back into the RTS (see parallel/FetchMe.hc). +*/ + +EXTFUN(par_block_1_no_jump) +{ +  FB_ +  Sp -= 1; +  Sp[0] = R1.w; +  SaveThreadState();					 +  FE_ +} + +EXTFUN(par_jump) +{ +  FB_ +  CurrentTSO->whatNext = ThreadEnterGHC;		 +  R1.i = ThreadBlocked; +  JMP_(StgReturn); +  FE_ +} + +#endif +  /* -----------------------------------------------------------------------------     For a case expression on a polymorphic or function-typed object, if     the default branch (there can only be one branch) of the case fails diff --git a/ghc/rts/Main.c b/ghc/rts/Main.c index 09e6e218a9..1c721b795c 100644 --- a/ghc/rts/Main.c +++ b/ghc/rts/Main.c @@ -1,5 +1,5 @@  /* ----------------------------------------------------------------------------- - * $Id: Main.c,v 1.13 2000/01/13 12:40:15 simonmar Exp $ + * $Id: Main.c,v 1.14 2000/01/13 14:34:03 hwloidl Exp $   *   * (c) The GHC Team 1998-1999   * @@ -16,21 +16,25 @@  #include "RtsUtils.h"  #ifdef DEBUG -#include "Printer.h"   /* for printing        */ +# include "Printer.h"   /* for printing        */  #endif  #ifdef INTERPRETER -#include "Assembler.h" +# include "Assembler.h"  #endif  #ifdef PAR -#include "ParInit.h" -#include "Parallel.h" -#include "LLC.h" +# include "ParInit.h" +# include "Parallel.h" +# include "LLC.h" +#endif + +#if defined(GRAN) || defined(PAR) +# include "GranSimRts.h"  #endif  #ifdef HAVE_WINDOWS_H -#include <windows.h> +# include <windows.h>  #endif @@ -41,24 +45,65 @@  int main(int argc, char *argv[])  {      int exit_status; -      SchedulerStatus status; +    /* all GranSim/GUM init is done in startupHaskell; sets IAmMainThread! */ +      startupHaskell(argc,argv); -#  ifndef PAR -    /* ToDo: want to start with a larger stack size */ -    status = rts_evalIO((StgClosure *)&mainIO_closure, NULL); -#  else +    /* kick off the computation by creating the main thread with a pointer +       to mainIO_closure representing the computation of the overall program; +       then enter the scheduler with this thread and off we go; +       +       the same for GranSim (we have only one instance of this code) + +       in a parallel setup, where we have many instances of this code +       running on different PEs, we should do this only for the main PE +       (IAmMainThread is set in startupHaskell)  +    */ + +#  if defined(PAR) + +#   if DEBUG +    { /* a wait loop to allow attachment of gdb to UNIX threads */ +      nat i, j, s; + +      for (i=0, s=0; i<RtsFlags.ParFlags.wait; i++) +	for (j=0; j<1000000; j++)  +	  s += j % 65536; +    } +    IF_PAR_DEBUG(verbose, +		 belch("Passed wait loop")); +#   endif +      if (IAmMainThread == rtsTrue) { -    /*Just to show we're alive */        fprintf(stderr, "Main Thread Started ...\n"); -      + +      /* ToDo: Dump event for the main thread */        status = rts_evalIO((StgClosure *)&mainIO_closure, NULL);      } else { -      WaitForPEOp(PP_FINISH,SysManTask); -      exit(EXIT_SUCCESS); +      /* Just to show we're alive */ +      IF_PAR_DEBUG(verbose, +		   fprintf(stderr, "== [%x] Non-Main PE enters scheduler without work ...\n", +			   mytid)); +      +      /* all non-main threads enter the scheduler without work */ +      status = schedule( /* nothing */ );      } -#  endif /* PAR */ + +#  elif defined(GRAN) + +    /* ToDo: Dump event for the main thread */ +    status = rts_evalIO((StgClosure *)&mainIO_closure, NULL); + +#  else /* !PAR && !GRAN */ + +    /* ToDo: want to start with a larger stack size */ +    status = rts_evalIO((StgClosure *)&mainIO_closure, NULL); + +#  endif /* !PAR && !GRAN */ + +    // ToDo: update for parallel execution +    /* check the status of the entire Haskell computation */      switch (status) {      case Deadlock:        prog_belch("no threads to run:  infinite loop or deadlock?"); diff --git a/ghc/rts/Makefile b/ghc/rts/Makefile index 920d8ee04a..59d516aed0 100644 --- a/ghc/rts/Makefile +++ b/ghc/rts/Makefile @@ -1,6 +1,6 @@  #----------------------------------------------------------------------------- -# $Id: Makefile,v 1.16 1999/12/07 15:52:40 simonmar Exp $ - +# $Id: Makefile,v 1.17 2000/01/13 14:34:03 hwloidl Exp $ +#  #  This is the Makefile for the runtime-system stuff.  #  This stuff is written in C (and cannot be written in Haskell).  # @@ -21,9 +21,9 @@ include $(TOP)/mk/boilerplate.mk  WAYS=$(GhcLibWays) -SRCS_RTS_C  = $(wildcard *.c) $(wildcard hooks/*.c) $(filter-out gum/SysMan.c,$(wildcard gum/*.c)) +SRCS_RTS_C  = $(wildcard *.c) $(wildcard hooks/*.c) $(filter-out parallel/SysMan.c,$(wildcard parallel/*.c))  SRCS_RTS_S  = $(wildcard *.S) -SRCS_RTS_HC = $(wildcard *.hc) +SRCS_RTS_HC = $(wildcard *.hc) $(wildcard parallel/*.hc)  ifneq "$(way)" "dll"  SRCS_RTS_C  := $(filter-out RtsDllMain.c, $(SRCS_RTS_C)) @@ -59,7 +59,7 @@ WARNING_OPTS += -optc-Wbad-function-cast  #WARNING_OPTS += -optc-Wredundant-decls   #WARNING_OPTS += -optc-Wconversion -SRC_HC_OPTS += -I../includes -I. -Igum $(WARNING_OPTS) $(GhcRtsHcOpts) -optc-DCOMPILING_RTS +SRC_HC_OPTS += -I../includes -I. -Iparallel $(WARNING_OPTS) $(GhcRtsHcOpts) -optc-DCOMPILING_RTS  SRC_CC_OPTS = $(GhcRtsCcOpts)  ifneq "$(way)" "dll" @@ -149,7 +149,7 @@ endif  #  ifeq "$(way)" "mp" -all :: gum/SysMan +all :: parallel/SysMan  ifdef solaris2_TARGET_OS  __socket_libs = -lsocket -lnsl @@ -157,12 +157,12 @@ else  __socket_libs =  endif -gum/SysMan : gum/SysMan.mp_o gum/LLComms.mp_o  +parallel/SysMan : parallel/SysMan.mp_o parallel/LLComms.mp_o RtsUtils.mp_o RtsFlags.mp_o  	$(RM) $@ -	gcc -o $@ gum/SysMan.mp_o gum/LLComms.mp_o -L$$PVM_ROOT/lib/$$PVM_ARCH -lgpvm3 -lpvm3 $(__socket_libs) +	gcc -o $@ parallel/SysMan.mp_o parallel/LLComms.mp_o -L$$PVM_ROOT/lib/$$PVM_ARCH -lgpvm3 -lpvm3 $(__socket_libs) -CLEAN_FILES  += gum/SysMan.mp_o gum/SysMan -INSTALL_LIBEXECS += gum/SysMan +CLEAN_FILES  += parallel/SysMan.mp_o parallel/SysMan +INSTALL_LIBEXECS += parallel/SysMan  endif  #----------------------------------------------------------------------------- diff --git a/ghc/rts/PrimOps.hc b/ghc/rts/PrimOps.hc index 01d0a0aa40..8a2db252cf 100644 --- a/ghc/rts/PrimOps.hc +++ b/ghc/rts/PrimOps.hc @@ -1,5 +1,5 @@  /* ----------------------------------------------------------------------------- - * $Id: PrimOps.hc,v 1.38 2000/01/13 12:40:15 simonmar Exp $ + * $Id: PrimOps.hc,v 1.39 2000/01/13 14:34:03 hwloidl Exp $   *   * (c) The GHC Team, 1998-1999   * @@ -907,7 +907,14 @@ FN_(putMVarzh_fast)     */    if (mvar->head != (StgTSO *)&END_TSO_QUEUE_closure) {      ASSERT(mvar->head->why_blocked == BlockedOnMVar); +#if defined(GRAN) +# error FixME +#elif defined(PAR) +    // ToDo: check 2nd arg (mvar) is right +    mvar->head = RET_STGCALL2(StgTSO *,unblockOne,mvar->head,mvar); +#else      mvar->head = RET_STGCALL1(StgTSO *,unblockOne,mvar->head); +#endif      if (mvar->head == (StgTSO *)&END_TSO_QUEUE_closure) {        mvar->tail = (StgTSO *)&END_TSO_QUEUE_closure;      } diff --git a/ghc/rts/Printer.c b/ghc/rts/Printer.c index cbb20ddddf..600d0a207f 100644 --- a/ghc/rts/Printer.c +++ b/ghc/rts/Printer.c @@ -1,6 +1,5 @@ -  /* ----------------------------------------------------------------------------- - * $Id: Printer.c,v 1.18 1999/11/29 18:59:46 sewardj Exp $ + * $Id: Printer.c,v 1.19 2000/01/13 14:34:04 hwloidl Exp $   *   * Copyright (c) 1994-1999.   * @@ -18,6 +17,12 @@  #include "Bytecodes.h"  /* for InstrPtr */  #include "Disassembler.h" +#include "Printer.h" + +// HWL: explicit fixed header size to make debugging easier +int fixed_hs = FIXED_HS, itbl_sz = sizeofW(StgInfoTable),  +    uf_sz=sizeofW(StgUpdateFrame), sf_sz=sizeofW(StgSeqFrame);  +  /* --------------------------------------------------------------------------   * local function decls   * ------------------------------------------------------------------------*/ @@ -198,6 +203,14 @@ void printClosure( StgClosure *obj )              fprintf(stderr,")\n");               break; +#if defined(GRAN) || defined(PAR) +    case RBH: +      fprintf(stderr,"RBH(");  +      printPtr((StgPtr)stgCast(StgRBH*,obj)->blocking_queue); +      fprintf(stderr,")\n");  +      break; +#endif +      case CONSTR:      case CONSTR_1_0: case CONSTR_0_1:      case CONSTR_1_1: case CONSTR_0_2: case CONSTR_2_0: @@ -305,6 +318,13 @@ void printClosure( StgClosure *obj )      }  } +/* +void printGraph( StgClosure *obj ) +{ + printClosure(obj); +} +*/ +  StgPtr printStackObj( StgPtr sp )  {      /*fprintf(stderr,"Stack[%d] = ", &stgStack[STACK_SIZE] - sp); */ @@ -678,7 +698,7 @@ static void printZcoded( const char *raw )  /* Causing linking trouble on Win32 plats, so I'm     disabling this for now.   */ -#if defined(HAVE_BFD_H) && !defined(_WIN32) +#if defined(HAVE_BFD_H) && !defined(_WIN32) && defined(USE_BSD)  #include <bfd.h> diff --git a/ghc/rts/RtsFlags.c b/ghc/rts/RtsFlags.c index c3c05153d2..8324e1a584 100644 --- a/ghc/rts/RtsFlags.c +++ b/ghc/rts/RtsFlags.c @@ -1,5 +1,5 @@  /* ----------------------------------------------------------------------------- - * $Id: RtsFlags.c,v 1.23 2000/01/13 12:40:15 simonmar Exp $ + * $Id: RtsFlags.c,v 1.24 2000/01/13 14:34:04 hwloidl Exp $   *   * (c) The AQUA Project, Glasgow University, 1994-1997   * (c) The GHC Team, 1998-1999 @@ -8,6 +8,19 @@   *   * ---------------------------------------------------------------------------*/ +//@menu +//* Includes::			 +//* Constants::			 +//* Static function decls::	 +//* Command-line option parsing routines::   +//* GranSim specific options::	 +//* Aux fcts::			 +//@end menu +//*/ + +//@node Includes, Constants +//@subsection Includes +  #include "Rts.h"  #include "RtsFlags.h"  #include "RtsUtils.h" @@ -36,12 +49,100 @@ char  **prog_argv = NULL;  int     rts_argc;  /* ditto */  char   *rts_argv[MAX_RTS_ARGS]; +//@node Constants, Static function decls, Includes +//@subsection Constants +  /*   * constants, used later    */  #define RTS 1  #define PGM 0 +#if defined(GRAN) + +char *gran_debug_opts_strs[] = { +  "DEBUG (-bDe, -bD1): event_trace; printing event trace.\n", +  "DEBUG (-bDE, -bD2): event_stats; printing event statistics.\n", +  "DEBUG (-bDb, -bD4): bq; check blocking queues\n", +  "DEBUG (-bDG, -bD8): pack; routines for (un-)packing graph structures.\n", +  "DEBUG (-bDq, -bD16): checkSparkQ; check consistency of the spark queues.\n", +  "DEBUG (-bDf, -bD32): thunkStealing; print forwarding of fetches.\n", +  "DEBUG (-bDr, -bD64): randomSteal; stealing sparks/threads from random PEs.\n", +  "DEBUG (-bDF, -bD128): findWork; searching spark-pools (local & remote), thread queues for work.\n", +  "DEBUG (-bDu, -bD256): unused; currently unused flag.\n", +  "DEBUG (-bDS, -bD512): pri; priority sparking or scheduling.\n", +  "DEBUG (-bD:, -bD1024): checkLight; check GranSim-Light setup.\n", +  "DEBUG (-bDo, -bD2048): sortedQ; check whether spark/thread queues are sorted.\n", +  "DEBUG (-bDz, -bD4096): blockOnFetch; check for blocked on fetch.\n", +  "DEBUG (-bDP, -bD8192): packBuffer; routines handling pack buffer (GranSim internal!).\n", +  "DEBUG (-bDt, -bD16384): blockOnFetch_sanity; check for TSO asleep on fetch.\n", +}; + +/* one character codes for the available debug options */ +char gran_debug_opts_flags[] = { +  'e', 'E', 'b', 'G', 'q', 'f', 'r', 'F', 'u', 'S', ':', 'o', 'z', 'P', 't' +}; + +/* prefix strings printed with the debug messages of the corresponding type */ +char *gran_debug_opts_prefix[] = { +  "", /* event_trace */  +  "", /* event_stats */ +  "##", /* bq */ +  "**", /* pack */ +  "^^", /* checkSparkQ */ +  "==", /* thunkStealing */ +  "^^", /* randomSteal */ +  "+-", /* findWork */ +  "", /* unused */ +  "++", /* pri */ +  "::", /* checkLight */ +  "##", /* sortedQ */ +  "", /* blockOnFetch */ +  "", /* packBuffer */ +  "" /* blockOnFetch_sanity */ +}; + +#elif defined(PAR) + +char *par_debug_opts_strs[] = { +  "DEBUG (-qDv, -qD1): verbose; be generally verbose with parallel related stuff.\n", +  "DEBUG (-qDt, -qD2): trace; trace messages.\n", +  "DEBUG (-qDs, -qD4): schedule; scheduling of parallel threads.\n", +  "DEBUG (-qDe, -qD8): free; free messages.\n", +  "DEBUG (-qDr, -qD16): resume; resume messages.\n", +  "DEBUG (-qDw, -qD32): weight; print weights for GC.\n", +  "DEBUG (-qDF, -qD64): fetch; fetch messages.\n", +  "DEBUG (-qDa, -qD128): ack; ack messages.\n", +  "DEBUG (-qDf, -qD256): fish; fish messages.\n", +  "DEBUG (-qDo, -qD512): forward; forwarding messages to other PEs.\n", +  "DEBUG (-qDp, -qD1024): pack; packing and unpacking graphs.\n" +}; + +/* one character codes for the available debug options */ +char par_debug_opts_flags[] = { +  'v', 't', 's', 'e', 'r', 'w', 'F', 'a', 'f', 'o', 'p'   +}; + +/* prefix strings printed with the debug messages of the corresponding type */ +char *par_debug_opts_prefix[] = { +  "  ", /* verbose */ +  "..", /* trace */ +  "--", /* schedule */ +  "!!", /* free */ +  "[]", /* resume */ +  ";;", /* weight */ +  "%%", /* fetch */ +  ",,", /* ack */ +  "$$", /* fish */ +  "", /* forward */ +  "**" /* pack */ +}; + +#endif /* PAR */ + +//@node Static function decls, Command-line option parsing routines, Constants +//@subsection Static function decls +  /* -----------------------------------------------------------------------------     Static function decls     -------------------------------------------------------------------------- */ @@ -56,6 +157,20 @@ open_stats_file (  static I_ decode(const char *s);  static void bad_option(const char *s); +#if defined(GRAN) +static void enable_GranSimLight(void); +static void process_gran_option(int arg, int *rts_argc, char *rts_argv[], rtsBool *error); +static void set_GranSim_debug_options(nat n); +static void help_GranSim_debug_options(nat n); +#elif defined(PAR) +static void process_par_option(int arg, int *rts_argc, char *rts_argv[], rtsBool *error); +static void set_par_debug_options(nat n); +static void help_par_debug_options(nat n); +#endif + +//@node Command-line option parsing routines, GranSim specific options, Static function decls +//@subsection Command-line option parsing routines +  /* -----------------------------------------------------------------------------   * Command-line option parsing routines.   * ---------------------------------------------------------------------------*/ @@ -109,82 +224,92 @@ void initRtsFlagsDefaults(void)  #endif  #ifdef PAR -    RtsFlags.ParFlags.parallelStats	= rtsFalse; -    RtsFlags.ParFlags.granSimStats	= rtsFalse; -    RtsFlags.ParFlags.granSimStats_Binary = rtsFalse; +    RtsFlags.ParFlags.ParStats.Full   	  = rtsFalse; +    RtsFlags.ParFlags.ParStats.Binary 	  = rtsFalse; +    RtsFlags.ParFlags.ParStats.Sparks 	  = rtsFalse; +    RtsFlags.ParFlags.ParStats.Heap   	  = rtsFalse; +    RtsFlags.ParFlags.ParStats.NewLogfile = rtsFalse; +    RtsFlags.ParFlags.ParStats.Global     = rtsFalse; +      RtsFlags.ParFlags.outputDisabled	= rtsFalse;      RtsFlags.ParFlags.packBufferSize	= 1024; + +    RtsFlags.ParFlags.maxThreads        = 1024; +    RtsFlags.ParFlags.maxFishes        = MAX_FISHES; +    RtsFlags.ParFlags.fishDelay         = FISH_DELAY;  #endif  #if defined(PAR) || defined(SMP)      RtsFlags.ParFlags.maxLocalSparks	= 4096; -#endif +#endif /* PAR || SMP */ + +#if defined(GRAN) +    /* ToDo: check defaults for GranSim and GUM */ +    RtsFlags.ConcFlags.ctxtSwitchTime	= CS_MIN_MILLISECS;  /* In milliseconds */ +    RtsFlags.ConcFlags.maxThreads	= 65536; // refers to mandatory threads +    RtsFlags.GcFlags.maxStkSize		= (1024 * 1024) / sizeof(W_); +    RtsFlags.GcFlags.initialStkSize	= 1024 / sizeof(W_); + +    RtsFlags.GranFlags.GranSimStats.Full	= rtsFalse; +    RtsFlags.GranFlags.GranSimStats.Suppressed	= rtsFalse; +    RtsFlags.GranFlags.GranSimStats.Binary      = rtsFalse; +    RtsFlags.GranFlags.GranSimStats.Sparks      = rtsFalse; +    RtsFlags.GranFlags.GranSimStats.Heap        = rtsFalse; +    RtsFlags.GranFlags.GranSimStats.NewLogfile  = rtsFalse; +    RtsFlags.GranFlags.GranSimStats.Global      = rtsFalse; -#ifdef GRAN -    RtsFlags.GranFlags.granSimStats	= rtsFalse; -    RtsFlags.GranFlags.granSimStats_suppressed	= rtsFalse; -    RtsFlags.GranFlags.granSimStats_Binary = rtsFalse; -    RtsFlags.GranFlags.granSimStats_Sparks = rtsFalse; -    RtsFlags.GranFlags.granSimStats_Heap = rtsFalse; -    RtsFlags.GranFlags.labelling	= rtsFalse;      RtsFlags.GranFlags.packBufferSize	= 1024;      RtsFlags.GranFlags.packBufferSize_internal = GRANSIM_DEFAULT_PACK_BUFFER_SIZE; -    RtsFlags.GranFlags.proc  = MAX_PROC; -    RtsFlags.GranFlags.max_fishes = MAX_FISHES; -    RtsFlags.GranFlags.time_slice = GRAN_TIME_SLICE; -    RtsFlags.GranFlags.Light = rtsFalse; - -    RtsFlags.GranFlags.gran_latency =             LATENCY;           -    RtsFlags.GranFlags.gran_additional_latency =  ADDITIONAL_LATENCY;  -    RtsFlags.GranFlags.gran_fetchtime =           FETCHTIME;  -    RtsFlags.GranFlags.gran_lunblocktime =        LOCALUNBLOCKTIME;  -    RtsFlags.GranFlags.gran_gunblocktime =        GLOBALUNBLOCKTIME; -    RtsFlags.GranFlags.gran_mpacktime =           MSGPACKTIME;       -    RtsFlags.GranFlags.gran_munpacktime =         MSGUNPACKTIME; -    RtsFlags.GranFlags.gran_mtidytime =           MSGTIDYTIME; - -    RtsFlags.GranFlags.gran_threadcreatetime =         THREADCREATETIME; -    RtsFlags.GranFlags.gran_threadqueuetime =          THREADQUEUETIME; -    RtsFlags.GranFlags.gran_threaddescheduletime =     THREADDESCHEDULETIME; -    RtsFlags.GranFlags.gran_threadscheduletime =       THREADSCHEDULETIME; -    RtsFlags.GranFlags.gran_threadcontextswitchtime =  THREADCONTEXTSWITCHTIME; - -    RtsFlags.GranFlags.gran_arith_cost =         ARITH_COST;        -    RtsFlags.GranFlags.gran_branch_cost =        BRANCH_COST;  -    RtsFlags.GranFlags.gran_load_cost =          LOAD_COST;         -    RtsFlags.GranFlags.gran_store_cost =         STORE_COST;  -    RtsFlags.GranFlags.gran_float_cost =         FLOAT_COST;        - -    RtsFlags.GranFlags.gran_heapalloc_cost =     HEAPALLOC_COST; - -    RtsFlags.GranFlags.gran_pri_spark_overhead = PRI_SPARK_OVERHEAD;         -    RtsFlags.GranFlags.gran_pri_sched_overhead = PRI_SCHED_OVERHEAD;         - -    RtsFlags.GranFlags.DoFairSchedule = rtsFalse;              -    RtsFlags.GranFlags.DoReScheduleOnFetch = rtsFalse;         -    RtsFlags.GranFlags.DoStealThreadsFirst = rtsFalse;         -    RtsFlags.GranFlags.SimplifiedFetch = rtsFalse;             -    RtsFlags.GranFlags.DoAlwaysCreateThreads = rtsFalse;       -    RtsFlags.GranFlags.DoGUMMFetching = rtsFalse;              -    RtsFlags.GranFlags.DoThreadMigration = rtsFalse;           -    RtsFlags.GranFlags.FetchStrategy = 2;                      +    RtsFlags.GranFlags.proc         = MAX_PROC; +    RtsFlags.GranFlags.Fishing      = rtsFalse; +    RtsFlags.GranFlags.maxFishes   = MAX_FISHES; +    RtsFlags.GranFlags.time_slice   = GRAN_TIME_SLICE; +    RtsFlags.GranFlags.Light        = rtsFalse; + +    RtsFlags.GranFlags.Costs.latency =             LATENCY;           +    RtsFlags.GranFlags.Costs.additional_latency =  ADDITIONAL_LATENCY;  +    RtsFlags.GranFlags.Costs.fetchtime =           FETCHTIME;  +    RtsFlags.GranFlags.Costs.lunblocktime =        LOCALUNBLOCKTIME;  +    RtsFlags.GranFlags.Costs.gunblocktime =        GLOBALUNBLOCKTIME; +    RtsFlags.GranFlags.Costs.mpacktime =           MSGPACKTIME;       +    RtsFlags.GranFlags.Costs.munpacktime =         MSGUNPACKTIME; +    RtsFlags.GranFlags.Costs.mtidytime =           MSGTIDYTIME; + +    RtsFlags.GranFlags.Costs.threadcreatetime =         THREADCREATETIME; +    RtsFlags.GranFlags.Costs.threadqueuetime =          THREADQUEUETIME; +    RtsFlags.GranFlags.Costs.threaddescheduletime =     THREADDESCHEDULETIME; +    RtsFlags.GranFlags.Costs.threadscheduletime =       THREADSCHEDULETIME; +    RtsFlags.GranFlags.Costs.threadcontextswitchtime =  THREADCONTEXTSWITCHTIME; + +    RtsFlags.GranFlags.Costs.arith_cost =         ARITH_COST;        +    RtsFlags.GranFlags.Costs.branch_cost =        BRANCH_COST;  +    RtsFlags.GranFlags.Costs.load_cost =          LOAD_COST;         +    RtsFlags.GranFlags.Costs.store_cost =         STORE_COST;  +    RtsFlags.GranFlags.Costs.float_cost =         FLOAT_COST;        + +    RtsFlags.GranFlags.Costs.heapalloc_cost =     HEAPALLOC_COST; + +    RtsFlags.GranFlags.Costs.pri_spark_overhead = PRI_SPARK_OVERHEAD;         +    RtsFlags.GranFlags.Costs.pri_sched_overhead = PRI_SCHED_OVERHEAD;         + +    RtsFlags.GranFlags.DoFairSchedule           = rtsFalse;              +    RtsFlags.GranFlags.DoAsyncFetch             = rtsFalse;         +    RtsFlags.GranFlags.DoStealThreadsFirst      = rtsFalse;         +    RtsFlags.GranFlags.DoAlwaysCreateThreads    = rtsFalse;       +    RtsFlags.GranFlags.DoBulkFetching           = rtsFalse;              +    RtsFlags.GranFlags.DoThreadMigration        = rtsFalse;           +    RtsFlags.GranFlags.FetchStrategy            = 2;                           RtsFlags.GranFlags.PreferSparksOfLocalNodes = rtsFalse;    -    RtsFlags.GranFlags.DoPrioritySparking = rtsFalse;          -    RtsFlags.GranFlags.DoPriorityScheduling = rtsFalse;        -    RtsFlags.GranFlags.SparkPriority = 0; -    RtsFlags.GranFlags.SparkPriority2 = 0;  -    RtsFlags.GranFlags.RandomPriorities = rtsFalse;            -    RtsFlags.GranFlags.InversePriorities = rtsFalse;           -    RtsFlags.GranFlags.IgnorePriorities = rtsFalse;            -    RtsFlags.GranFlags.ThunksToPack = 0;                       -    RtsFlags.GranFlags.RandomSteal = rtsTrue; -    RtsFlags.GranFlags.NoForward = rtsFalse; -    RtsFlags.GranFlags.PrintFetchMisses = rtsFalse; - -    RtsFlags.GranFlags.debug = 0x0; -    RtsFlags.GranFlags.event_trace = rtsFalse; -    RtsFlags.GranFlags.event_trace_all = rtsFalse; +    RtsFlags.GranFlags.DoPrioritySparking       = rtsFalse;          +    RtsFlags.GranFlags.DoPriorityScheduling     = rtsFalse;        +    RtsFlags.GranFlags.SparkPriority            = 0; +    RtsFlags.GranFlags.SparkPriority2           = 0;  +    RtsFlags.GranFlags.RandomPriorities         = rtsFalse;            +    RtsFlags.GranFlags.InversePriorities        = rtsFalse;           +    RtsFlags.GranFlags.IgnorePriorities         = rtsFalse;            +    RtsFlags.GranFlags.ThunksToPack             = 0;                       +    RtsFlags.GranFlags.RandomSteal              = rtsTrue;  #endif  #ifdef TICKY_TICKY @@ -279,10 +404,15 @@ usage_text[] = {  # ifdef SMP  "  -N<n>     Use <n> OS threads (default: 1)",  # endif +"  -e<size>        Size of spark pools (default 100)", +"  -t<num>   Set maximum number of advisory threads per PE (default 32)", +"  -o<num>   Set stack chunk size (default 1024)", +  # ifdef PAR -"  -q        Enable activity profile (output files in ~/<program>*.gr)", -"  -qb       Enable binary activity profile (output file /tmp/<program>.gb)", -"  -Q<size>  Set pack-buffer size (default: 1024)", +"  -qP       Enable activity profile (output files in ~/<program>*.gr)", +"  -qQ<size> Set pack-buffer size (default: 1024)", +"  -qd       Turn on PVM-ish debugging", +"  -qO       Disable output for performance measurement",  # endif  # if defined(SMP) || defined(PAR)  "  -e<n>     Maximum number of outstanding local sparks (default: 4096)", @@ -470,6 +600,8 @@ error = rtsTrue;                     if ((n>>7)&1) RtsFlags.DebugFlags.sanity      = rtsTrue;                     if ((n>>8)&1) RtsFlags.DebugFlags.stable      = rtsTrue;                     if ((n>>9)&1) RtsFlags.DebugFlags.prof        = rtsTrue; +                   if ((n>>10)&1) RtsFlags.DebugFlags.gran       = rtsTrue; +                   if ((n>>11)&1) RtsFlags.DebugFlags.par        = rtsTrue;                  }  		break;  #endif @@ -546,7 +678,7 @@ error = rtsTrue;  		RtsFlags.GcFlags.giveStats ++;  #ifdef PAR  		/* Opening all those files would almost certainly fail... */ -		RtsFlags.ParFlags.parallelStats = rtsTrue; +		RtsFlags.ParFlags.ParStats.Full = rtsTrue;  		RtsFlags.GcFlags.statsFile = stderr; /* temporary; ToDo: rm */  #else  		RtsFlags.GcFlags.statsFile @@ -776,37 +908,9 @@ error = rtsTrue;  		}  		) break; -	      case 'O': +    	      case 'q':  		PAR_BUILD_ONLY( -		RtsFlags.ParFlags.outputDisabled = rtsTrue; -		) break; - -	      case 'q': /* activity profile option */ -    	    	PAR_BUILD_ONLY( -		if (rts_argv[arg][2] == 'b') -		    RtsFlags.ParFlags.granSimStats_Binary = rtsTrue; -    	    	else -    	    	    RtsFlags.ParFlags.granSimStats = rtsTrue; -		) break; - -#if 0 /* or??? */ -	      case 'q': /* quasi-parallel profile option */ -    	    	GRAN_BUILD_ONLY ( -		if (rts_argv[arg][2] == 'v') -		    do_qp_prof = 2; -    	    	else -    	    	    do_qp_prof++; -		) break; -#endif /* 0??? */ - -	      case 'Q': /* Set pack buffer size */ -		PAR_BUILD_ONLY( -		if (rts_argv[arg][2] != '\0') { -		    RtsFlags.ParFlags.packBufferSize = decode(rts_argv[arg]+2); -		} else { -		  prog_belch("missing size of PackBuffer (for -Q)"); -		  error = rtsTrue; -    	    	} +		process_par_option(arg, rts_argc, rts_argv, &error);  		) break;  	      /* =========== GRAN =============================== */ @@ -870,6 +974,942 @@ error = rtsTrue;      }  } +#if defined(GRAN) + +//@node GranSim specific options, Aux fcts, Command-line option parsing routines +//@subsection GranSim specific options + +static void +enable_GranSimLight(void) { + +    fprintf(stderr,"GrAnSim Light enabled (infinite number of processors;  0 communication costs)\n"); +    RtsFlags.GranFlags.Light=rtsTrue; +    RtsFlags.GranFlags.Costs.latency =  +	RtsFlags.GranFlags.Costs.fetchtime =  +	RtsFlags.GranFlags.Costs.additional_latency = +	RtsFlags.GranFlags.Costs.gunblocktime =  +	RtsFlags.GranFlags.Costs.lunblocktime = +	RtsFlags.GranFlags.Costs.threadcreatetime =  +	RtsFlags.GranFlags.Costs.threadqueuetime = +	RtsFlags.GranFlags.Costs.threadscheduletime =  +	RtsFlags.GranFlags.Costs.threaddescheduletime = +	RtsFlags.GranFlags.Costs.threadcontextswitchtime = 0; +   +    RtsFlags.GranFlags.Costs.mpacktime =  +	RtsFlags.GranFlags.Costs.munpacktime = 0; + +    RtsFlags.GranFlags.DoFairSchedule = rtsTrue; +    RtsFlags.GranFlags.DoAsyncFetch = rtsFalse; +    RtsFlags.GranFlags.DoAlwaysCreateThreads = rtsTrue; +    /* FetchStrategy is irrelevant in GrAnSim-Light */ + +    /* GrAnSim Light often creates an abundance of parallel threads, +       each with its own stack etc. Therefore, it's in general a good +       idea to use small stack chunks (use the -o<size> option to  +       increase it again).  +    */ +    // RtsFlags.ConcFlags.stkChunkSize = 100; + +    RtsFlags.GranFlags.proc = 1;  +} + +static void +process_gran_option(int arg, int *rts_argc, char *rts_argv[], rtsBool *error) +{ +    if (rts_argv[arg][1] != 'b') /* All GranSim options start with -b */ +      return; + +    /* or a ridiculously idealised simulator */ +    if(strcmp((rts_argv[arg]+2),"oring")==0) { +      RtsFlags.GranFlags.Costs.latency =  +	RtsFlags.GranFlags.Costs.fetchtime =  +	RtsFlags.GranFlags.Costs.additional_latency = +	RtsFlags.GranFlags.Costs.gunblocktime =  +	RtsFlags.GranFlags.Costs.lunblocktime = +	RtsFlags.GranFlags.Costs.threadcreatetime =  +	RtsFlags.GranFlags.Costs.threadqueuetime = +	RtsFlags.GranFlags.Costs.threadscheduletime =  +	RtsFlags.GranFlags.Costs.threaddescheduletime = +	RtsFlags.GranFlags.Costs.threadcontextswitchtime = 0; + +      RtsFlags.GranFlags.Costs.mpacktime =  +	RtsFlags.GranFlags.Costs.munpacktime = 0; + +      RtsFlags.GranFlags.Costs.arith_cost =  +	RtsFlags.GranFlags.Costs.float_cost =  +	RtsFlags.GranFlags.Costs.load_cost = +	RtsFlags.GranFlags.Costs.store_cost =  +	RtsFlags.GranFlags.Costs.branch_cost = 0; + +      RtsFlags.GranFlags.Costs.heapalloc_cost = 1; + +      /* ++RtsFlags.GranFlags.DoFairSchedule; */ +      RtsFlags.GranFlags.DoStealThreadsFirst = rtsTrue;        /* -bZ */ +      RtsFlags.GranFlags.DoThreadMigration   = rtsTrue;        /* -bM */ +      RtsFlags.GranFlags.GranSimStats.Full   = rtsTrue;        /* -bP */ +      return; +    } + +      /* or a somewhat idealised simulator */ +      if(strcmp((rts_argv[arg]+2),"onzo")==0) { +	RtsFlags.GranFlags.Costs.latency =  +	RtsFlags.GranFlags.Costs.fetchtime =  +	RtsFlags.GranFlags.Costs.additional_latency = +	RtsFlags.GranFlags.Costs.gunblocktime =  +	RtsFlags.GranFlags.Costs.lunblocktime = +	RtsFlags.GranFlags.Costs.threadcreatetime =  +	RtsFlags.GranFlags.Costs.threadqueuetime = +	RtsFlags.GranFlags.Costs.threadscheduletime =  +	RtsFlags.GranFlags.Costs.threaddescheduletime = +	RtsFlags.GranFlags.Costs.threadcontextswitchtime = 0; + +	RtsFlags.GranFlags.Costs.mpacktime =  +	RtsFlags.GranFlags.Costs.munpacktime = 0; +	 +	RtsFlags.GranFlags.Costs.heapalloc_cost = 1; + +	/* RtsFlags.GranFlags.DoFairSchedule  = rtsTrue; */       /* -b-R */ +	/* RtsFlags.GranFlags.DoStealThreadsFirst = rtsTrue; */   /* -b-T */ +	RtsFlags.GranFlags.DoAsyncFetch = rtsTrue;         /* -bZ */ +	RtsFlags.GranFlags.DoThreadMigration  = rtsTrue;          /* -bM */ +	RtsFlags.GranFlags.GranSimStats.Full  = rtsTrue;          /* -bP */ +#  if defined(GRAN_CHECK) && defined(GRAN) +	RtsFlags.GranFlags.Debug.event_stats = rtsTrue; /* print event statistics   */ +#  endif +	return; +      } + +      /* Communication and task creation cost parameters */ +      switch(rts_argv[arg][2]) { +        case '.': +	  IgnoreYields = rtsTrue; // HWL HACK +	  break; + +        case ':': +	  enable_GranSimLight();       /* set flags for GrAnSim-Light mode */ +	  break; + +        case 'l': +	  if (rts_argv[arg][3] != '\0') +	    { +	      RtsFlags.GranFlags.Costs.gunblocktime =  +	      RtsFlags.GranFlags.Costs.latency = decode(rts_argv[arg]+3); +	      RtsFlags.GranFlags.Costs.fetchtime = 2*RtsFlags.GranFlags.Costs.latency; +	    } +	  else +	    RtsFlags.GranFlags.Costs.latency = LATENCY; +	  break; + +        case 'a': +	  if (rts_argv[arg][3] != '\0') +	    RtsFlags.GranFlags.Costs.additional_latency = decode(rts_argv[arg]+3); +	  else +	    RtsFlags.GranFlags.Costs.additional_latency = ADDITIONAL_LATENCY; +	  break; + +        case 'm': +	  if (rts_argv[arg][3] != '\0') +	    RtsFlags.GranFlags.Costs.mpacktime = decode(rts_argv[arg]+3); +	  else +	    RtsFlags.GranFlags.Costs.mpacktime = MSGPACKTIME; +	  break; + +        case 'x': +	  if (rts_argv[arg][3] != '\0') +	    RtsFlags.GranFlags.Costs.mtidytime = decode(rts_argv[arg]+3); +	  else +	    RtsFlags.GranFlags.Costs.mtidytime = 0; +	  break; + +        case 'r': +	  if (rts_argv[arg][3] != '\0') +	    RtsFlags.GranFlags.Costs.munpacktime = decode(rts_argv[arg]+3); +	  else +	    RtsFlags.GranFlags.Costs.munpacktime = MSGUNPACKTIME; +	  break; +	   +        case 'g': +	  if (rts_argv[arg][3] != '\0') +	    RtsFlags.GranFlags.Costs.fetchtime = decode(rts_argv[arg]+3); +	  else +	    RtsFlags.GranFlags.Costs.fetchtime = FETCHTIME; +	  break; +	   +        case 'n': +	  if (rts_argv[arg][3] != '\0') +	    RtsFlags.GranFlags.Costs.gunblocktime = decode(rts_argv[arg]+3); +	  else +	    RtsFlags.GranFlags.Costs.gunblocktime = GLOBALUNBLOCKTIME; +	  break; + +        case 'u': +	  if (rts_argv[arg][3] != '\0') +	    RtsFlags.GranFlags.Costs.lunblocktime = decode(rts_argv[arg]+3); +	  else +	    RtsFlags.GranFlags.Costs.lunblocktime = LOCALUNBLOCKTIME; +	  break; + +	/* Thread-related metrics */ +        case 't': +	  if (rts_argv[arg][3] != '\0') +	    RtsFlags.GranFlags.Costs.threadcreatetime = decode(rts_argv[arg]+3); +	  else +	    RtsFlags.GranFlags.Costs.threadcreatetime = THREADCREATETIME; +	  break; +	   +        case 'q': +	  if (rts_argv[arg][3] != '\0') +	    RtsFlags.GranFlags.Costs.threadqueuetime = decode(rts_argv[arg]+3); +	  else +	    RtsFlags.GranFlags.Costs.threadqueuetime = THREADQUEUETIME; +	  break; +	   +        case 'c': +	  if (rts_argv[arg][3] != '\0') +	    RtsFlags.GranFlags.Costs.threadscheduletime = decode(rts_argv[arg]+3); +	  else +	    RtsFlags.GranFlags.Costs.threadscheduletime = THREADSCHEDULETIME; +	   +	  RtsFlags.GranFlags.Costs.threadcontextswitchtime = RtsFlags.GranFlags.Costs.threadscheduletime +	    + RtsFlags.GranFlags.Costs.threaddescheduletime; +	  break; + +        case 'd': +	  if (rts_argv[arg][3] != '\0') +	    RtsFlags.GranFlags.Costs.threaddescheduletime = decode(rts_argv[arg]+3); +	  else +	    RtsFlags.GranFlags.Costs.threaddescheduletime = THREADDESCHEDULETIME; +	   +	  RtsFlags.GranFlags.Costs.threadcontextswitchtime = RtsFlags.GranFlags.Costs.threadscheduletime +	    + RtsFlags.GranFlags.Costs.threaddescheduletime; +	  break; + +	/* Instruction Cost Metrics */ +        case 'A': +	  if (rts_argv[arg][3] != '\0') +	    RtsFlags.GranFlags.Costs.arith_cost = decode(rts_argv[arg]+3); +	  else +	    RtsFlags.GranFlags.Costs.arith_cost = ARITH_COST; +	  break; + +        case 'F': +	  if (rts_argv[arg][3] != '\0') +	    RtsFlags.GranFlags.Costs.float_cost = decode(rts_argv[arg]+3); +	  else +	    RtsFlags.GranFlags.Costs.float_cost = FLOAT_COST; +	  break; +		       +        case 'B': +	  if (rts_argv[arg][3] != '\0') +	    RtsFlags.GranFlags.Costs.branch_cost = decode(rts_argv[arg]+3); +	  else +	    RtsFlags.GranFlags.Costs.branch_cost = BRANCH_COST; +	  break; + +        case 'L': +	  if (rts_argv[arg][3] != '\0') +	    RtsFlags.GranFlags.Costs.load_cost = decode(rts_argv[arg]+3); +	  else +	    RtsFlags.GranFlags.Costs.load_cost = LOAD_COST; +	  break; +	   +        case 'S': +	  if (rts_argv[arg][3] != '\0') +	    RtsFlags.GranFlags.Costs.store_cost = decode(rts_argv[arg]+3); +	  else +	    RtsFlags.GranFlags.Costs.store_cost = STORE_COST; +	  break; + +        case 'H': +	  if (rts_argv[arg][3] != '\0') +	    RtsFlags.GranFlags.Costs.heapalloc_cost = decode(rts_argv[arg]+3); +	  else +	    RtsFlags.GranFlags.Costs.heapalloc_cost = 0; +	  break; + +        case 'y': +	  RtsFlags.GranFlags.DoAsyncFetch = rtsTrue; +	  if (rts_argv[arg][3] != '\0') +	    RtsFlags.GranFlags.FetchStrategy = decode(rts_argv[arg]+3); +	  else +	    RtsFlags.GranFlags.FetchStrategy = 2; +	  if (RtsFlags.GranFlags.FetchStrategy == 0) +	    RtsFlags.GranFlags.DoAsyncFetch = rtsFalse; +	  break; +	   +        case 'K':   /* sort overhead (per elem in spark list) */ +	  if (rts_argv[arg][3] != '\0') +	    RtsFlags.GranFlags.Costs.pri_spark_overhead = decode(rts_argv[arg]+3); +	  else +	    RtsFlags.GranFlags.Costs.pri_spark_overhead = PRI_SPARK_OVERHEAD; +	  fprintf(stderr,"Overhead for pri spark: %d (per elem).\n", +		         RtsFlags.GranFlags.Costs.pri_spark_overhead); +	  break; + +        case 'O':  /* sort overhead (per elem in spark list) */ +	  if (rts_argv[arg][3] != '\0') +	    RtsFlags.GranFlags.Costs.pri_sched_overhead = decode(rts_argv[arg]+3); +	  else +	    RtsFlags.GranFlags.Costs.pri_sched_overhead = PRI_SCHED_OVERHEAD; +	  fprintf(stderr,"Overhead for pri sched: %d (per elem).\n", +		       RtsFlags.GranFlags.Costs.pri_sched_overhead); +	  break; + +        /* General Parameters */ +        case 'p': +	  if (rts_argv[arg][3] != '\0') +	    { +	      RtsFlags.GranFlags.proc = decode(rts_argv[arg]+3); +	      if (RtsFlags.GranFlags.proc==0) { +		  enable_GranSimLight(); /* set flags for GrAnSim-Light mode */ +	      } else if (RtsFlags.GranFlags.proc > MAX_PROC ||  +			 RtsFlags.GranFlags.proc < 1) +		{ +		  fprintf(stderr,"setupRtsFlags: no more than %u processors +allowed\n",  +			  MAX_PROC); +		  *error = rtsTrue; +		} +	    } +	  else +	    RtsFlags.GranFlags.proc = MAX_PROC; +	  break; + +        case 'f': +	  RtsFlags.GranFlags.Fishing = rtsTrue; +	  if (rts_argv[arg][3] != '\0') +	    RtsFlags.GranFlags.maxFishes = decode(rts_argv[arg]+3); +	  else +	    RtsFlags.GranFlags.maxFishes = MAX_FISHES; +	  break; +	   +        case 'w': +	  if (rts_argv[arg][3] != '\0') +	    RtsFlags.GranFlags.time_slice = decode(rts_argv[arg]+3); +	  else +	    RtsFlags.GranFlags.time_slice = GRAN_TIME_SLICE; +	  break; +	   +        case 'C': +	  RtsFlags.GranFlags.DoAlwaysCreateThreads=rtsTrue; +	  RtsFlags.GranFlags.DoThreadMigration=rtsTrue; +	  break; + +        case 'G': +	  fprintf(stderr,"Bulk fetching enabled.\n"); +	  RtsFlags.GranFlags.DoBulkFetching=rtsTrue; +	  break; +	   +        case 'M': +	  fprintf(stderr,"Thread migration enabled.\n"); +	  RtsFlags.GranFlags.DoThreadMigration=rtsTrue; +	  break; + +        case 'R': +	  fprintf(stderr,"Fair Scheduling enabled.\n"); +	  RtsFlags.GranFlags.DoFairSchedule=rtsTrue; +	  break; +	   +        case 'I': +	  fprintf(stderr,"Priority Scheduling enabled.\n"); +	  RtsFlags.GranFlags.DoPriorityScheduling=rtsTrue; +	  break; + +        case 'T': +	  RtsFlags.GranFlags.DoStealThreadsFirst=rtsTrue; +	  RtsFlags.GranFlags.DoThreadMigration=rtsTrue; +	  break; +	   +        case 'Z': +	  RtsFlags.GranFlags.DoAsyncFetch=rtsTrue; +	  break; +	   +/*          case 'z': */ +/*  	  RtsFlags.GranFlags.SimplifiedFetch=rtsTrue; */ +/*  	  break; */ +	   +        case 'N': +	  RtsFlags.GranFlags.PreferSparksOfLocalNodes=rtsTrue; +	  break; +	   +        case 'b': +	  RtsFlags.GranFlags.GranSimStats.Binary=rtsTrue; +	  break; +	   +        case 'P': +	  /* format is -bP<c> where <c> is one char describing kind of profile */ +	  RtsFlags.GranFlags.GranSimStats.Full = rtsTrue; +	  switch(rts_argv[arg][3]) { +	  case '\0': break; // nothing special, just an ordinary profile +	  case '0': RtsFlags.GranFlags.GranSimStats.Suppressed = rtsTrue; +	    break; +	  case 'b': RtsFlags.GranFlags.GranSimStats.Binary = rtsTrue; +	    break; +	  case 's': RtsFlags.GranFlags.GranSimStats.Sparks = rtsTrue; +	    break; +	  case 'h': RtsFlags.GranFlags.GranSimStats.Heap = rtsTrue; +	    break; +	  case 'n': RtsFlags.GranFlags.GranSimStats.NewLogfile = rtsTrue; +	    break; +	  case 'g': RtsFlags.GranFlags.GranSimStats.Global = rtsTrue; +	    break; +	  default: barf("Unknown option -bP%c", rts_argv[arg][3]); +	  } +	  break; + +        case 's': +	  RtsFlags.GranFlags.GranSimStats.Sparks=rtsTrue; +	  break; + +        case 'h': +	  RtsFlags.GranFlags.GranSimStats.Heap=rtsTrue; +	  break; + +        case 'Y':   /* syntax: -bY<n>[,<n>]  n ... pos int */  +	  if (rts_argv[arg][3] != '\0') { +	    char *arg0, *tmp; +	     +	    arg0 = rts_argv[arg]+3; +	    if ((tmp = strstr(arg0,","))==NULL) { +	      RtsFlags.GranFlags.SparkPriority = decode(arg0); +	      fprintf(stderr,"SparkPriority: %u.\n",RtsFlags.GranFlags.SparkPriority); +	    } else { +	      *(tmp++) = '\0';  +	      RtsFlags.GranFlags.SparkPriority = decode(arg0); +	      RtsFlags.GranFlags.SparkPriority2 = decode(tmp); +	      fprintf(stderr,"SparkPriority: %u.\n", +		      RtsFlags.GranFlags.SparkPriority); +	      fprintf(stderr,"SparkPriority2:%u.\n", +		      RtsFlags.GranFlags.SparkPriority2); +	      if (RtsFlags.GranFlags.SparkPriority2 <  +		  RtsFlags.GranFlags.SparkPriority) { +		fprintf(stderr,"WARNING: 2nd pri < main pri (%u<%u); 2nd pri has no effect\n", +			RtsFlags.GranFlags.SparkPriority2, +			RtsFlags.GranFlags.SparkPriority); +	      } +	    } +	  } else { +	    /* plain pri spark is now invoked with -bX   +	       RtsFlags.GranFlags.DoPrioritySparking = 1; +	       fprintf(stderr,"PrioritySparking.\n"); +	    */ +	  } +	  break; + +        case 'Q': +	  if (rts_argv[arg][3] != '\0') { +	    RtsFlags.GranFlags.ThunksToPack = decode(rts_argv[arg]+3); +	  } else { +	    RtsFlags.GranFlags.ThunksToPack = 1; +	  } +	  fprintf(stderr,"Thunks To Pack in one packet: %u.\n", +		  RtsFlags.GranFlags.ThunksToPack); +	  break; +		       +        case 'e': +	  RtsFlags.GranFlags.RandomSteal = rtsFalse; +	  fprintf(stderr,"Deterministic mode (no random stealing)\n"); +		      break; + +	  /* The following class of options contains eXperimental */ +	  /* features in connection with exploiting granularity */ +	  /* information. I.e. if -bY is chosen these options */ +	  /* tell the RTS what to do with the supplied info --HWL */ + +        case 'W': +	  if (rts_argv[arg][3] != '\0') { +	    RtsFlags.GranFlags.packBufferSize_internal = decode(rts_argv[arg]+3); +	  } else { +	    RtsFlags.GranFlags.packBufferSize_internal = GRANSIM_DEFAULT_PACK_BUFFER_SIZE; +	  } +	  fprintf(stderr,"Size of GranSim internal pack buffer: %u.\n", +		  RtsFlags.GranFlags.packBufferSize_internal); +	  break; +  		       +        case 'X': +	  switch(rts_argv[arg][3]) { +	     +	    case '\0': +	      RtsFlags.GranFlags.DoPrioritySparking = 1; +	      fprintf(stderr,"Priority Sparking with Normal Priorities.\n"); +	      RtsFlags.GranFlags.InversePriorities = rtsFalse;  +	      RtsFlags.GranFlags.RandomPriorities = rtsFalse; +	      RtsFlags.GranFlags.IgnorePriorities = rtsFalse; +	      break; +			 +	    case 'I': +	      RtsFlags.GranFlags.DoPrioritySparking = 1; +	      fprintf(stderr,"Priority Sparking with Inverse Priorities.\n"); +	      RtsFlags.GranFlags.InversePriorities++;  +	      break; +	       +	    case 'R':  +	      RtsFlags.GranFlags.DoPrioritySparking = 1; +	      fprintf(stderr,"Priority Sparking with Random Priorities.\n"); +	      RtsFlags.GranFlags.RandomPriorities++; +	      break; +	       +	    case 'N': +	      RtsFlags.GranFlags.DoPrioritySparking = 1; +	      fprintf(stderr,"Priority Sparking with No Priorities.\n"); +	      RtsFlags.GranFlags.IgnorePriorities++; +	      break; +	       +	    default: +	      bad_option( rts_argv[arg] ); +	      break; +	  } +	  break; + +        case '-': +	  switch(rts_argv[arg][3]) { +	     +	    case 'C': +	      RtsFlags.GranFlags.DoAlwaysCreateThreads=rtsFalse; +	      RtsFlags.GranFlags.DoThreadMigration=rtsFalse; +	      break; + +	    case 'G': +	      RtsFlags.GranFlags.DoBulkFetching=rtsFalse; +	      break; +	       +	    case 'M': +	      RtsFlags.GranFlags.DoThreadMigration=rtsFalse; +	      break; + +	    case 'R': +	      RtsFlags.GranFlags.DoFairSchedule=rtsFalse; +	      break; + +	    case 'T': +	      RtsFlags.GranFlags.DoStealThreadsFirst=rtsFalse; +	      RtsFlags.GranFlags.DoThreadMigration=rtsFalse; +	      break; + +	    case 'Z': +	      RtsFlags.GranFlags.DoAsyncFetch=rtsFalse; +	      break; +	       +	    case 'N': +	      RtsFlags.GranFlags.PreferSparksOfLocalNodes=rtsFalse; +			 break; +			  +	    case 'P': +	      RtsFlags.GranFlags.GranSimStats.Suppressed=rtsTrue; +	      break; + +	    case 's': +	      RtsFlags.GranFlags.GranSimStats.Sparks=rtsFalse; +	      break; +	     +	    case 'h': +	      RtsFlags.GranFlags.GranSimStats.Heap=rtsFalse; +	      break; +	     +	    case 'b': +	      RtsFlags.GranFlags.GranSimStats.Binary=rtsFalse; +	      break; +			  +	    case 'X': +	      RtsFlags.GranFlags.DoPrioritySparking = rtsFalse; +	      break; + +	    case 'Y': +	      RtsFlags.GranFlags.DoPrioritySparking = rtsFalse; +	      RtsFlags.GranFlags.SparkPriority = rtsFalse; +	      break; + +	    case 'I': +	      RtsFlags.GranFlags.DoPriorityScheduling = rtsFalse; +	      break; + +	    case 'e': +	      RtsFlags.GranFlags.RandomSteal = rtsFalse; +	      break; + +	    default: +	      bad_option( rts_argv[arg] ); +	      break; +	  } +	  break; + +#  if defined(GRAN_CHECK) && defined(GRAN) +        case 'D': +	  switch(rts_argv[arg][3]) { +	    case 'Q':    /* Set pack buffer size (same as 'Q' in GUM) */ +	      if (rts_argv[arg][4] != '\0') { +		RtsFlags.GranFlags.packBufferSize = decode(rts_argv[arg]+4); +		fprintf(stderr,"Pack buffer size: %d\n", +			RtsFlags.GranFlags.packBufferSize); +	      } else { +    	    	fprintf(stderr, "setupRtsFlags: missing size of PackBuffer (for -Q)\n"); +    	    	*error = rtsTrue; +    	      } +	      break; + +	  default: +	      if (isdigit(rts_argv[arg][3])) {/* Set all debugging options in one */ +	    	/* hack warning: interpret the flags as a binary number */ +	    	nat n = decode(rts_argv[arg]+3); +		set_GranSim_debug_options(n); +	      } else { +		nat i; +		for (i=0; i<=MAX_GRAN_DEBUG_OPTION; i++)  +		  if (rts_argv[arg][3] == gran_debug_opts_flags[i]) +		    break; +		 +		if (i==MAX_GRAN_DEBUG_OPTION+1) { +		  fprintf(stderr, "Valid GranSim debug options are:\n"); +		  help_GranSim_debug_options(MAX_GRAN_DEBUG_MASK); +		  bad_option( rts_argv[arg] ); +		} else { // flag found; now set it +		  set_GranSim_debug_options(GRAN_DEBUG_MASK(i));  // 2^i +		} +	      } +	      break; +	       +#if 0 +	    case 'e':       /* event trace; also -bD1 */ +	      fprintf(stderr,"DEBUG: event_trace; printing event trace.\n"); +	      RtsFlags.GranFlags.Debug.event_trace = rtsTrue; +	      /* RtsFlags.GranFlags.event_trace=rtsTrue; */ +	      break; +	       +	    case 'E':       /* event statistics; also -bD2 */ +	      fprintf(stderr,"DEBUG: event_stats; printing event statistics.\n"); +	      RtsFlags.GranFlags.Debug.event_stats = rtsTrue; +	      /* RtsFlags.GranFlags.Debug |= 0x20; print event statistics   */ +	      break; +	       +	    case 'f':       /* thunkStealing; also -bD4 */ +	      fprintf(stderr,"DEBUG: thunkStealing; printing forwarding of FETCHNODES.\n"); +	      RtsFlags.GranFlags.Debug.thunkStealing = rtsTrue; +	      /* RtsFlags.GranFlags.Debug |= 0x2;  print fwd messages */ +	      break; + +	    case 'z':       /* blockOnFetch; also -bD8 */ +	      fprintf(stderr,"DEBUG: blockOnFetch; check for blocked on fetch.\n"); +	      RtsFlags.GranFlags.Debug.blockOnFetch = rtsTrue; +	      /* RtsFlags.GranFlags.Debug |= 0x4; debug non-reschedule-on-fetch */ +	      break; +	       +	    case 't':       /* blockOnFetch_sanity; also -bD16 */   +	      fprintf(stderr,"DEBUG: blockOnFetch_sanity; check for TSO asleep on fetch.\n"); +	      RtsFlags.GranFlags.Debug.blockOnFetch_sanity = rtsTrue; +	      /* RtsFlags.GranFlags.Debug |= 0x10; debug TSO asleep for fetch  */ +	      break; + +	    case 'S':       /* priSpark; also -bD32 */ +	      fprintf(stderr,"DEBUG: priSpark; priority sparking.\n"); +	      RtsFlags.GranFlags.Debug.priSpark = rtsTrue; +	      break; + +	    case 's':       /* priSched; also -bD64 */ +	      fprintf(stderr,"DEBUG: priSched; priority scheduling.\n"); +	      RtsFlags.GranFlags.Debug.priSched = rtsTrue; +	      break; + +	    case 'F':       /* findWork; also -bD128 */ +	      fprintf(stderr,"DEBUG: findWork; searching spark-pools (local & remote), thread queues for work.\n"); +	      RtsFlags.GranFlags.Debug.findWork = rtsTrue; +	      break; +	       +	    case 'g':       /* globalBlock; also -bD256 */ +	      fprintf(stderr,"DEBUG: globalBlock; blocking on remote closures (FETCHMEs etc in GUM).\n"); +	      RtsFlags.GranFlags.Debug.globalBlock = rtsTrue; +	      break; +	       +	    case 'G':       /* pack; also -bD512 */ +	      fprintf(stderr,"DEBUG: pack; routines for (un-)packing graph structures.\n"); +	      RtsFlags.GranFlags.Debug.pack = rtsTrue; +	      break; +	       +	    case 'P':       /* packBuffer; also -bD1024 */ +	      fprintf(stderr,"DEBUG: packBuffer; routines handling pack buffer (GranSim internal!).\n"); +	      RtsFlags.GranFlags.Debug.packBuffer = rtsTrue; +	      break; +	       +	    case 'o':       /* sortedQ; also -bD2048 */ +	      fprintf(stderr,"DEBUG: sortedQ; check whether spark/thread queues are sorted.\n"); +	      RtsFlags.GranFlags.Debug.sortedQ = rtsTrue; +	      break; +	       +	    case 'r':       /* randomSteal; also -bD4096 */ +	      fprintf(stderr,"DEBUG: randomSteal; stealing sparks/threads from random PEs.\n"); +	      RtsFlags.GranFlags.Debug.randomSteal = rtsTrue; +	      break; +	       +	    case 'q':       /* checkSparkQ; also -bD8192 */ +	      fprintf(stderr,"DEBUG: checkSparkQ; check consistency of the spark queues.\n"); +	      RtsFlags.GranFlags.Debug.checkSparkQ = rtsTrue; +	      break; +	       +	    case ':':       /* checkLight; also -bD16384 */ +	      fprintf(stderr,"DEBUG: checkLight; check GranSim-Light setup.\n"); +	      RtsFlags.GranFlags.Debug.checkLight = rtsTrue; +	      break; +	       +	    case 'b':       /* bq; also -bD32768 */ +	      fprintf(stderr,"DEBUG: bq; check blocking queues\n"); +	      RtsFlags.GranFlags.Debug.bq = rtsTrue; +	      break; +	       +	    case 'd':       /* all options turned on */ +	      fprintf(stderr,"DEBUG: all options turned on.\n"); +	      set_GranSim_debug_options(MAX_GRAN_DEBUG_MASK); +	      /* RtsFlags.GranFlags.Debug |= 0x40; */ +	      break; + +/*  	    case '\0': */ +/*  	      RtsFlags.GranFlags.Debug = 1; */ +/*  	      break; */ +#endif + +	  } +	  break; +#  endif  /* GRAN_CHECK */ +      default: +	bad_option( rts_argv[arg] ); +	break; +      } +} + +/* +  Interpret n as a binary number masking GranSim debug options and set the  +  correxponding option. See gran_debug_opts_strs for explanations of the flags. +*/ +static void +set_GranSim_debug_options(nat n) { +  nat i; + +  for (i=0; i<=MAX_GRAN_DEBUG_OPTION; i++)  +    if ((n>>i)&1) { +      fprintf(stderr, gran_debug_opts_strs[i]); +      switch (i) { +        case 0: RtsFlags.GranFlags.Debug.event_trace   = rtsTrue;  break; +        case 1: RtsFlags.GranFlags.Debug.event_stats   = rtsTrue;  break; +        case 2: RtsFlags.GranFlags.Debug.bq            = rtsTrue;  break; +        case 3: RtsFlags.GranFlags.Debug.pack          = rtsTrue;  break; +        case 4: RtsFlags.GranFlags.Debug.checkSparkQ   = rtsTrue;  break; +        case 5: RtsFlags.GranFlags.Debug.thunkStealing = rtsTrue;  break; +        case 6: RtsFlags.GranFlags.Debug.randomSteal   = rtsTrue;  break; +        case 7: RtsFlags.GranFlags.Debug.findWork      = rtsTrue;  break; +        case 8: RtsFlags.GranFlags.Debug.unused        = rtsTrue;  break; +        case 9: RtsFlags.GranFlags.Debug.pri           = rtsTrue;  break; +        case 10: RtsFlags.GranFlags.Debug.checkLight   = rtsTrue;  break; +        case 11: RtsFlags.GranFlags.Debug.sortedQ      = rtsTrue;  break; +        case 12: RtsFlags.GranFlags.Debug.blockOnFetch = rtsTrue;  break; +        case 13: RtsFlags.GranFlags.Debug.packBuffer   = rtsTrue;  break; +        case 14: RtsFlags.GranFlags.Debug.blockOnFetch_sanity = rtsTrue;  break; +        default: barf("set_GranSim_debug_options: only %d debug options expected"); +      } /* switch */ +    } /* if */ +} + +/* +  Print one line explanation for each of the GranSim debug options specified +  in the bitmask n. +*/ +static void +help_GranSim_debug_options(nat n) { +  nat i; + +  for (i=0; i<=MAX_GRAN_DEBUG_OPTION; i++)  +    if ((n>>i)&1)  +      fprintf(stderr, gran_debug_opts_strs[i]); +} + +# elif defined(PAR) + +static void +process_par_option(int arg, int *rts_argc, char *rts_argv[], rtsBool *error) +{ +  if (rts_argv[arg][1] != 'q') /* All GUM options start with -q */ +    return; +   +  /* Communication and task creation cost parameters */ +  switch(rts_argv[arg][2]) { +  case 'e':  /* -qe<n>  ... allow <n> local sparks */ +    if (rts_argv[arg][3] != '\0') { /* otherwise, stick w/ the default */ +      RtsFlags.ParFlags.maxLocalSparks +	= strtol(rts_argv[arg]+3, (char **) NULL, 10); +       +      if (RtsFlags.ParFlags.maxLocalSparks <= 0) { +	belch("setupRtsFlags: bad value for -e\n"); +	*error = rtsTrue; +      } +    } +    IF_PAR_DEBUG(verbose, +		 belch("-qe<n>: max %d local sparks",  +		       RtsFlags.ParFlags.maxLocalSparks)); +    break; +   +  case 't': +    if (rts_argv[arg][3] != '\0') { +      RtsFlags.ParFlags.maxThreads +	= strtol(rts_argv[arg]+3, (char **) NULL, 10); +    } else { +      belch("setupRtsFlags: missing size for -qt\n"); +      *error = rtsTrue; +    } +    IF_PAR_DEBUG(verbose, +		 belch("-qt<n>: max %d threads",  +		       RtsFlags.ParFlags.maxThreads)); +    break; + +  case 'f': +    if (rts_argv[arg][3] != '\0') +      RtsFlags.ParFlags.maxFishes = decode(rts_argv[arg]+3); +    else +      RtsFlags.ParFlags.maxFishes = MAX_FISHES; +    break; +    IF_PAR_DEBUG(verbose, +		 belch("-qf<n>: max %d fishes sent out at one time",  +		       RtsFlags.ParFlags.maxFishes)); +    break; +   + +  case 'd': +    if (rts_argv[arg][3] != '\0') { +      RtsFlags.ParFlags.fishDelay +	= strtol(rts_argv[arg]+3, (char **) NULL, 10); +    } else { +      belch("setupRtsFlags: missing fish delay time for -qd\n"); +      *error = rtsTrue; +    } +    IF_PAR_DEBUG(verbose, +		 belch("-qd<n>: fish delay time %d",  +		       RtsFlags.ParFlags.fishDelay)); +    break; + +  case 'O': +    RtsFlags.ParFlags.outputDisabled = rtsTrue; +    IF_PAR_DEBUG(verbose, +		 belch("-qO: output disabled")); +    break; +   +  case 'P': /* -qP for writing a log file */ +    RtsFlags.ParFlags.ParStats.Full = rtsTrue; +    /* same encoding as in GranSim after -bP */	 +    switch(rts_argv[arg][3]) { +    case '\0': break; // nothing special, just an ordinary profile +      //case '0': RtsFlags.ParFlags.ParStats.Suppressed = rtsTrue; +      //  break; +    case 'b': RtsFlags.ParFlags.ParStats.Binary = rtsTrue; +      break; +    case 's': RtsFlags.ParFlags.ParStats.Sparks = rtsTrue; +      break; +      //case 'h': RtsFlags.parFlags.ParStats.Heap = rtsTrue; +      //  break; +    case 'n': RtsFlags.ParFlags.ParStats.NewLogfile = rtsTrue; +      break; +    case 'g': RtsFlags.ParFlags.ParStats.Global = rtsTrue; +      break; +    default: barf("Unknown option -qP%c", rts_argv[arg][2]); +    } +    IF_PAR_DEBUG(verbose, +		 belch("(-qP) writing to log-file (RtsFlags.ParFlags.ParStats.Full=%s)", +		       (RtsFlags.ParFlags.ParStats.Full ? "rtsTrue" : "rtsFalse"))); +    break; +   +  case 'Q': /* -qQ<n> ... set pack buffer size to <n> */ +    if (rts_argv[arg][3] != '\0') { +      RtsFlags.ParFlags.packBufferSize = decode(rts_argv[arg]+3); +    } else { +      belch("setupRtsFlags: missing size of PackBuffer (for -Q)\n"); +      error = rtsTrue; +    } +    IF_PAR_DEBUG(verbose, +		 belch("-qQ<n>: pack buffer size set to %d",  +		       RtsFlags.ParFlags.packBufferSize)); +    break; + +# if defined(DEBUG)   +  case 'w': +    if (rts_argv[arg][3] != '\0') { +      RtsFlags.ParFlags.wait +	= strtol(rts_argv[arg]+3, (char **) NULL, 10); +    } else { +      RtsFlags.ParFlags.wait = 1000; +    } +    IF_PAR_DEBUG(verbose, +		 belch("-qw<n>: length of wait loop after synchr before reduction: %d",  +		       RtsFlags.ParFlags.wait)); +    break; + +  case 'D':  /* -qD ... all the debugging options */ +    if (isdigit(rts_argv[arg][3])) {/* Set all debugging options in one */ +      /* hack warning: interpret the flags as a binary number */ +      nat n = decode(rts_argv[arg]+3); +      set_par_debug_options(n); +    } else { +      nat i; +      for (i=0; i<=MAX_PAR_DEBUG_OPTION; i++)  +	if (rts_argv[arg][3] == par_debug_opts_flags[i]) +	  break; +	 +      if (i==MAX_PAR_DEBUG_OPTION+1) { +	fprintf(stderr, "Valid GUM debug options are:\n"); +	help_par_debug_options(MAX_PAR_DEBUG_MASK); +	bad_option( rts_argv[arg] ); +      } else { // flag found; now set it +	set_par_debug_options(PAR_DEBUG_MASK(i));  // 2^i +      } +    } +    break; +# endif +  default: +    belch("Unknown option -q%c", rts_argv[arg][2]); +    break; +  } /* switch */ +} + +/* +  Interpret n as a binary number masking Par debug options and set the  +  correxponding option. See par_debug_opts_strs for explanations of the flags. +*/ +static void +set_par_debug_options(nat n) { +  nat i; + +  for (i=0; i<=MAX_PAR_DEBUG_OPTION; i++)  +    if ((n>>i)&1) { +      fprintf(stderr, par_debug_opts_strs[i]); +      switch (i) { +        case 0: RtsFlags.ParFlags.Debug.verbose       = rtsTrue;  break; +        case 1: RtsFlags.ParFlags.Debug.trace         = rtsTrue;  break; +        case 2: RtsFlags.ParFlags.Debug.schedule      = rtsTrue;  break; +        case 3: RtsFlags.ParFlags.Debug.free          = rtsTrue;  break; +        case 4: RtsFlags.ParFlags.Debug.resume        = rtsTrue;  break; +        case 5: RtsFlags.ParFlags.Debug.weight        = rtsTrue;  break; +        case 6: RtsFlags.ParFlags.Debug.fetch         = rtsTrue;  break; +        case 7: RtsFlags.ParFlags.Debug.ack           = rtsTrue;  break; +        case 8: RtsFlags.ParFlags.Debug.fish          = rtsTrue;  break; +        case 9: RtsFlags.ParFlags.Debug.forward       = rtsTrue;  break; +        case 10: RtsFlags.ParFlags.Debug.pack          = rtsTrue;  break; +        default: barf("set_par_debug_options: only %d debug options expected"); +      } /* switch */ +    } /* if */ +} + +/* +  Print one line explanation for each of the GranSim debug options specified +  in the bitmask n. +*/ +static void +help_par_debug_options(nat n) { +  nat i; + +  for (i=0; i<=MAX_PAR_DEBUG_OPTION; i++)  +    if ((n>>i)&1)  +      fprintf(stderr, par_debug_opts_strs[i]); +} + +#endif /* GRAN */ + +//@node Aux fcts,  , GranSim specific options +//@subsection Aux fcts +  static FILE *		/* return NULL on error */  open_stats_file (      I_ arg, diff --git a/ghc/rts/RtsFlags.h b/ghc/rts/RtsFlags.h index 238e2b6b1f..e3febb3dab 100644 --- a/ghc/rts/RtsFlags.h +++ b/ghc/rts/RtsFlags.h @@ -1,5 +1,5 @@  /* ----------------------------------------------------------------------------- - * $Id: RtsFlags.h,v 1.19 2000/01/12 15:15:17 simonmar Exp $ + * $Id: RtsFlags.h,v 1.20 2000/01/13 14:34:04 hwloidl Exp $   *   * (c) The GHC Team, 1998-1999   * @@ -59,6 +59,8 @@ struct DEBUG_FLAGS {    rtsBool stable      : 1; /* 256 */    rtsBool prof        : 1; /* 512 */ +  rtsBool gran        : 1; /* 1024 */ +  rtsBool par         : 1; /* 2048 */  };  #if defined(PROFILING) || defined(PAR) @@ -124,15 +126,46 @@ struct CONCURRENT_FLAGS {  };  #ifdef PAR +/* currently the same as GRAN_STATS_FLAGS */ +struct PAR_STATS_FLAGS { +  rtsBool Full;       /* Full .gr profile (rtsTrue) or only END events? */ +  // rtsBool Suppressed; /* No .gr profile at all */ +  rtsBool Binary;     /* Binary profile? (not yet implemented) */ +  rtsBool Sparks;     /* Info on sparks in profile? */ +  rtsBool Heap;       /* Info on heap allocs in profile? */  +  rtsBool NewLogfile; /* Use new log-file format? (not yet implemented) */ +  rtsBool Global;     /* Global statistics? (printed on shutdown; no log file) */ +}; + +struct PAR_DEBUG_FLAGS {   +  /* flags to control debugging output in various subsystems */ +  rtsBool verbose    : 1; /*    1 */ +  rtsBool trace      : 1; /*    2 */ +  rtsBool schedule   : 1; /*    4 */ +  rtsBool free       : 1; /*    8 */ +  rtsBool resume     : 1; /*   16 */ +  rtsBool weight     : 1; /*   32 */ +  rtsBool fetch      : 1; /*   64 */ +  rtsBool ack        : 1; /*  128 */ +  rtsBool fish       : 1; /*  256 */ +  rtsBool forward    : 1; /*  512 */ +  rtsBool pack       : 1; /* 1024 */ +}; + +#define MAX_PAR_DEBUG_OPTION     10 +#define PAR_DEBUG_MASK(n)        ((nat)(ldexp(1,n))) +#define MAX_PAR_DEBUG_MASK       ((nat)(ldexp(1,(MAX_PAR_DEBUG_OPTION+1))-1)) +  struct PAR_FLAGS { -  rtsBool parallelStats; 	/* Gather parallel statistics */ -  rtsBool granSimStats;	   /* Full .gr profile (rtsTrue) or only END events? */ -  rtsBool granSimStats_Binary; -   -  rtsBool outputDisabled;	/* Disable output for performance purposes */ -   -  unsigned int packBufferSize; -  unsigned int maxLocalSparks; +  struct PAR_STATS_FLAGS ParStats;  /* profile and stats output */ +  struct PAR_DEBUG_FLAGS Debug;         /* debugging options */ +  rtsBool  outputDisabled;	  /* Disable output for performance purposes */ +  nat      packBufferSize; +  nat	   maxLocalSparks;        /* spark pool size */ +  nat      maxThreads;            /* thread pool size */ +  nat      maxFishes;             /* max number of active fishes */ +  rtsTime  fishDelay;             /* delay before sending a new fish */ +  long   wait;  };  #endif /* PAR */ @@ -141,53 +174,88 @@ struct PAR_FLAGS {    nat            nNodes;         /* number of threads to run simultaneously */    unsigned int	 maxLocalSparks;  }; -#endif +#endif /* SMP */  #ifdef GRAN +struct GRAN_STATS_FLAGS { +  rtsBool Full;       /* Full .gr profile (rtsTrue) or only END events? */ +  rtsBool Suppressed; /* No .gr profile at all */ +  rtsBool Binary;     /* Binary profile? (not yet implemented) */ +  rtsBool Sparks;     /* Info on sparks in profile? */ +  rtsBool Heap;       /* Info on heap allocs in profile? */  +  rtsBool NewLogfile; /* Use new log-file format? (not yet implemented) */ +  rtsBool Global;     /* Global statistics? (printed on shutdown; no log file) */ +}; + +struct GRAN_COST_FLAGS { +  /* Communication Cost Variables -- set in main program */ +  nat latency;              /* Latency for single packet */ +  nat additional_latency;   /* Latency for additional packets */ +  nat fetchtime;             +  nat lunblocktime;         /* Time for local unblock */ +  nat gunblocktime;         /* Time for global unblock */ +  nat mpacktime;            /* Cost of creating a packet */      +  nat munpacktime;	    /* Cost of receiving a packet */     +  nat mtidytime;	    /* Cost of cleaning up after send */ +   +  nat threadcreatetime;     /* Thread creation costs */ +  nat threadqueuetime;      /* Cost of adding a thread to the running/runnable queue */ +  nat threaddescheduletime; /* Cost of descheduling a thread */ +  nat threadscheduletime;   /* Cost of scheduling a thread */ +  nat threadcontextswitchtime;  /* Cost of context switch  */ +   +  /* Instruction Costs */ +  nat arith_cost;        /* arithmetic instructions (+,i,< etc) */ +  nat branch_cost;       /* branch instructions */  +  nat load_cost;         /* load into register */ +  nat store_cost;        /* store into memory */ +  nat float_cost;        /* floating point operations */ +   +  nat heapalloc_cost;    /* heap allocation costs */ +   +  /* Overhead for granularity control mechanisms */ +  /* overhead per elem of spark queue */ +  nat pri_spark_overhead; +  /* overhead per elem of thread queue */ +  nat pri_sched_overhead; +}; + +struct GRAN_DEBUG_FLAGS {   +  /* flags to control debugging output in various subsystems */ +  rtsBool event_trace    : 1; /*    1 */ +  rtsBool event_stats    : 1; /*    2 */ +  rtsBool bq             : 1; /*    4 */ +  rtsBool pack           : 1; /*    8 */ +  rtsBool checkSparkQ    : 1; /*   16 */ +  rtsBool thunkStealing  : 1; /*   32 */ +  rtsBool randomSteal  	 : 1; /*   64 */ +  rtsBool findWork     	 : 1; /*  128 */ +  rtsBool unused     	 : 1; /*  256 */ +  rtsBool pri     	 : 1; /*  512 */ +  rtsBool checkLight   	 : 1; /* 1024 */ +  rtsBool sortedQ      	 : 1; /* 2048 */ +  rtsBool blockOnFetch   : 1; /* 4096 */ +  rtsBool packBuffer     : 1; /* 8192 */ +  rtsBool blockOnFetch_sanity : 1; /*  16384 */ +}; + +#define MAX_GRAN_DEBUG_OPTION     14 +#define GRAN_DEBUG_MASK(n)        ((nat)(ldexp(1,n))) +#define MAX_GRAN_DEBUG_MASK       ((nat)(ldexp(1,(MAX_GRAN_DEBUG_OPTION+1))-1)) +  struct GRAN_FLAGS { -    rtsBool granSimStats;  /* Full .gr profile (rtsTrue) or only END events? */ -    rtsBool granSimStats_suppressed; /* No .gr profile at all */ -    rtsBool granSimStats_Binary; -    rtsBool granSimStats_Sparks; -    rtsBool granSimStats_Heap; -    rtsBool labelling; -    unsigned int	    packBufferSize; -    unsigned int	    packBufferSize_internal; - -    int proc;                      /* number of processors */ -    int max_fishes;                /* max number of spark or thread steals */ -    TIME time_slice;              /* max time slice of one reduction thread */ - -    /* Communication Cost Variables -- set in main program */ -    unsigned int gran_latency;              /* Latency for single packet */ -    unsigned int gran_additional_latency;   /* Latency for additional packets */ -    unsigned int gran_fetchtime;             -    unsigned int gran_lunblocktime;         /* Time for local unblock */ -    unsigned int gran_gunblocktime;         /* Time for global unblock */ -    unsigned int gran_mpacktime;            /* Cost of creating a packet */      -    unsigned int gran_munpacktime;	  /* Cost of receiving a packet */     -    unsigned int gran_mtidytime;		  /* Cost of cleaning up after send */ - -    unsigned int gran_threadcreatetime;     /* Thread creation costs */ -    unsigned int gran_threadqueuetime;      /* Cost of adding a thread to the running/runnable queue */ -    unsigned int gran_threaddescheduletime; /* Cost of descheduling a thread */ -    unsigned int gran_threadscheduletime;   /* Cost of scheduling a thread */ -    unsigned int gran_threadcontextswitchtime;  /* Cost of context switch  */ - -    /* Instruction Costs */ -    unsigned int gran_arith_cost;        /* arithmetic instructions (+,i,< etc) */ -    unsigned int gran_branch_cost;       /* branch instructions */  -    unsigned int gran_load_cost;         /* load into register */ -    unsigned int gran_store_cost;        /* store into memory */ -    unsigned int gran_float_cost;        /* floating point operations */ - -    unsigned int gran_heapalloc_cost;    /* heap allocation costs */ - -    /* Overhead for granularity control mechanisms */ -    /* overhead per elem of spark queue */ -    unsigned int gran_pri_spark_overhead; -    /* overhead per elem of thread queue */ -    unsigned int gran_pri_sched_overhead; +  struct GRAN_STATS_FLAGS GranSimStats;  /* profile and stats output */ +  struct GRAN_COST_FLAGS Costs;          /* cost metric for simulation */ +  struct GRAN_DEBUG_FLAGS Debug;         /* debugging options */ + +  // rtsBool labelling; +  nat  packBufferSize; +  nat  packBufferSize_internal; + +  PEs proc;                     /* number of processors */ +  rtsBool Fishing;              /* Simulate GUM style fishing mechanism? */ +  nat maxFishes;                /* max number of spark or thread steals */ +  rtsTime time_slice;           /* max time slice of one reduction thread */      /* GrAnSim-Light: This version puts no bound on the number of           processors but in exchange doesn't model communication costs @@ -198,30 +266,27 @@ struct GRAN_FLAGS {      rtsBool Light;      rtsBool DoFairSchedule ;        /* fair scheduling alg? default: unfair */ -    rtsBool DoReScheduleOnFetch ;   /* async. communication? */ +    rtsBool DoAsyncFetch;           /* async. communication? */      rtsBool DoStealThreadsFirst;    /* prefer threads over sparks when stealing */ -    rtsBool SimplifiedFetch;        /* fast but inaccurate fetch modelling */ -    rtsBool DoAlwaysCreateThreads;  /* eager thread creation */ -    rtsBool DoGUMMFetching;         /* bulk fetching */ -    rtsBool DoThreadMigration;      /* allow to move threads */ -    int      FetchStrategy;          /* what to do when waiting for data */ -    rtsBool PreferSparksOfLocalNodes; /* prefer local over global sparks */ -    rtsBool DoPrioritySparking;     /* sparks sorted by priorities */ -    rtsBool DoPriorityScheduling;   /* threads sorted by priorities */ -    int      SparkPriority;          /* threshold for cut-off mechanism */ -    int      SparkPriority2; -    rtsBool RandomPriorities; -    rtsBool InversePriorities; -    rtsBool IgnorePriorities; -    int      ThunksToPack;           /* number of thunks in packet + 1 */  -    rtsBool RandomSteal;            /* steal spark/thread from random proc */ -    rtsBool NoForward;              /* no forwarding of fetch messages */ -    rtsBool PrintFetchMisses;       /* print number of fetch misses */ - -    unsigned int	    debug; -    rtsBool event_trace; -    rtsBool event_trace_all; -    +  rtsBool DoAlwaysCreateThreads;  /* eager thread creation */ +  rtsBool DoBulkFetching;         /* bulk fetching */ +  rtsBool DoThreadMigration;      /* allow to move threads */ +  nat     FetchStrategy;         /* what to do when waiting for data */ +  rtsBool PreferSparksOfLocalNodes; /* prefer local over global sparks */ +  rtsBool DoPrioritySparking;     /* sparks sorted by priorities */ +  rtsBool DoPriorityScheduling;   /* threads sorted by priorities */ +  nat     SparkPriority;         /* threshold for cut-off mechanism */ +  nat     SparkPriority2; +  rtsBool RandomPriorities; +  rtsBool InversePriorities; +  rtsBool IgnorePriorities; +  nat     ThunksToPack;      /* number of thunks in packet + 1 */  +  rtsBool RandomSteal;        /* steal spark/thread from random proc */ +  rtsBool NoForward;        /* no forwarding of fetch messages */ + +  // unsigned int	    debug; +  //  rtsBool event_trace; +  //  rtsBool event_trace_all;  };  #endif /* GRAN */ diff --git a/ghc/rts/RtsStartup.c b/ghc/rts/RtsStartup.c index 0996ba037c..a589b18f1a 100644 --- a/ghc/rts/RtsStartup.c +++ b/ghc/rts/RtsStartup.c @@ -1,5 +1,5 @@  /* ----------------------------------------------------------------------------- - * $Id: RtsStartup.c,v 1.25 1999/12/20 10:34:37 simonpj Exp $ + * $Id: RtsStartup.c,v 1.26 2000/01/13 14:34:04 hwloidl Exp $   *   * (c) The GHC Team, 1998-1999   * @@ -25,7 +25,12 @@  # include "ProfHeap.h"  #endif -#ifdef PAR +#if defined(GRAN) +#include "GranSimRts.h" +#include "ParallelRts.h" +#endif + +#if defined(PAR)  #include "ParInit.h"  #include "Parallel.h"  #include "LLC.h" @@ -37,6 +42,9 @@  struct RTS_FLAGS RtsFlags;  static int rts_has_started_up = 0; +#if defined(PAR) +static ullong startTime = 0; +#endif  void  startupHaskell(int argc, char *argv[]) @@ -51,10 +59,6 @@ startupHaskell(int argc, char *argv[])     else       rts_has_started_up=1; -#if defined(PAR) -    int nPEs = 0;		    /* Number of PEs */ -#endif -      /* The very first thing we do is grab the start time...just in case we're       * collecting timing statistics.       */ @@ -62,13 +66,15 @@ startupHaskell(int argc, char *argv[])  #ifdef PAR  /* - *The parallel system needs to be initialised and synchronised before - *the program is run.   + * The parallel system needs to be initialised and synchronised before + * the program is run.     */ +    fprintf(stderr, "startupHaskell: argv[0]=%s\n", argv[0]);      if (*argv[0] == '-') {     /* Look to see whether we're the Main Thread */  	IAmMainThread = rtsTrue;          argv++; argc--;			/* Strip off flag argument */ -/*	fprintf(stderr, "I am Main Thread\n"); */ +	// IF_PAR_DEBUG(verbose, +		     fprintf(stderr, "[%x] I am Main Thread\n", mytid);      }      /*        * Grab the number of PEs out of the argument vector, and @@ -78,7 +84,6 @@ startupHaskell(int argc, char *argv[])      argv[1] = argv[0];      argv++; argc--;      initEachPEHook();                  /* HWL: hook to be execed on each PE */ -    SynchroniseSystem();  #endif      /* Set the RTS flags to default values. */ @@ -92,13 +97,10 @@ startupHaskell(int argc, char *argv[])      prog_argc = argc;      prog_argv = argv; -#ifdef PAR -   /* Initialise the parallel system -- before initHeap! */ -    initParallelSystem(); -   /* And start GranSim profiling if required: omitted for now -    *if (Rtsflags.ParFlags.granSimStats) -    *init_gr_profiling(rts_argc, rts_argv, prog_argc, prog_argv); -    */ +#if defined(PAR) +    /* NB: this really must be done after processing the RTS flags */ +    fprintf(stderr, "Synchronising system (%d PEs)\n", nPEs); +    SynchroniseSystem();             // calls initParallelSystem etc  #endif	/* PAR */      /* initialise scheduler data structures (needs to be done before @@ -106,6 +108,16 @@ startupHaskell(int argc, char *argv[])       */      initScheduler(); +#if defined(GRAN) +    /* And start GranSim profiling if required: */ +    if (RtsFlags.GranFlags.GranSimStats.Full) +      init_gr_simulation(rts_argc, rts_argv, prog_argc, prog_argv); +#elif defined(PAR) +    /* And start GUM profiling if required: */ +    if (RtsFlags.ParFlags.ParStats.Full) +      init_gr_simulation(rts_argc, rts_argv, prog_argc, prog_argv); +#endif	/* PAR || GRAN */ +      /* initialize the storage manager */      initStorage(); @@ -179,12 +191,14 @@ shutdownHaskell(void)    /* start timing the shutdown */    stat_startExit(); +#if !defined(GRAN)    /* Finalize any remaining weak pointers */    finalizeWeakPointersNow(); +#endif  #if defined(GRAN) -  #error FixMe. -  if (!RTSflags.GranFlags.granSimStats_suppressed) +  /* end_gr_simulation prints global stats if requested -- HWL */ +  if (!RtsFlags.GranFlags.GranSimStats.Suppressed)      end_gr_simulation();  #endif @@ -220,8 +234,12 @@ shutdownHaskell(void)  #endif    rts_has_started_up=0; -} +#if defined(PAR) +  shutdownParallelSystem(0); +#endif + +}  /*    * called from STG-land to exit the program @@ -230,7 +248,7 @@ shutdownHaskell(void)  void    stg_exit(I_ n)  { -#ifdef PAR +#if 0 /* def PAR */    par_exit(n);  #else    exit(n); diff --git a/ghc/rts/RtsUtils.c b/ghc/rts/RtsUtils.c index 28fb2f73da..5e53b7db62 100644 --- a/ghc/rts/RtsUtils.c +++ b/ghc/rts/RtsUtils.c @@ -1,5 +1,5 @@  /* ----------------------------------------------------------------------------- - * $Id: RtsUtils.c,v 1.12 2000/01/13 12:40:16 simonmar Exp $ + * $Id: RtsUtils.c,v 1.13 2000/01/13 14:34:04 hwloidl Exp $   *   * (c) The GHC Team, 1998-1999   * @@ -8,6 +8,7 @@   * ---------------------------------------------------------------------------*/  #include "Rts.h" +#include "RtsTypes.h"  #include "RtsAPI.h"  #include "RtsFlags.h"  #include "Hooks.h" @@ -23,6 +24,10 @@  #include <fcntl.h>  #endif +#ifdef HAVE_GETTIMEOFDAY +#include <sys/time.h> +#endif +  #include <stdarg.h>  /* variable-argument error function. */ @@ -182,7 +187,7 @@ resetGenSymZh(void) /* it's your funeral */     Get the current time as a string.  Used in profiling reports.     -------------------------------------------------------------------------- */ -#if defined(PROFILING) || defined(DEBUG) +#if defined(PROFILING) || defined(DEBUG) || defined(PAR) || defined(GRAN)  char *  time_str(void)  { @@ -219,6 +224,44 @@ resetNonBlockingFd(int fd)  #endif  } +#if 0 +static ullong startTime = 0; + +/* used in a parallel setup */ +ullong +msTime(void) +{ +# if defined(HAVE_GETCLOCK) && !defined(alpha_TARGET_ARCH) +    struct timespec tv; + +    if (getclock(TIMEOFDAY, &tv) != 0) { +	fflush(stdout); +	fprintf(stderr, "Clock failed\n"); +	stg_exit(EXIT_FAILURE); +    } +    return tv.tv_sec * LL(1000) + tv.tv_nsec / LL(1000000) - startTime; +# elif HAVE_GETTIMEOFDAY && !defined(alpha_TARGET_ARCH) +    struct timeval tv; +  +    if (gettimeofday(&tv, NULL) != 0) { +	fflush(stdout); +	fprintf(stderr, "Clock failed\n"); +	stg_exit(EXIT_FAILURE); +    } +    return tv.tv_sec * LL(1000) + tv.tv_usec / LL(1000) - startTime; +# else +    time_t t; +    if ((t = time(NULL)) == (time_t) -1) { +	fflush(stdout); +	fprintf(stderr, "Clock failed\n"); +	stg_exit(EXIT_FAILURE); +    } +    return t * LL(1000) - startTime; +# endif +} +#endif + +  /* -----------------------------------------------------------------------------     Print large numbers, with punctuation.     -------------------------------------------------------------------------- */ diff --git a/ghc/rts/RtsUtils.h b/ghc/rts/RtsUtils.h index 8f5581c472..79557e847a 100644 --- a/ghc/rts/RtsUtils.h +++ b/ghc/rts/RtsUtils.h @@ -1,5 +1,5 @@  /* ----------------------------------------------------------------------------- - * $Id: RtsUtils.h,v 1.6 2000/01/12 15:15:17 simonmar Exp $ + * $Id: RtsUtils.h,v 1.7 2000/01/13 14:34:04 hwloidl Exp $   *   * (c) The GHC Team, 1998-1999   * @@ -30,7 +30,8 @@ extern nat stg_strlen(char *str);  /*Defined in Main.c, but made visible here*/  extern void stg_exit(I_ n) __attribute__((noreturn)); -char * time_str(void); - +char *time_str(void);  char *ullong_format_string(ullong, char *, rtsBool); +//ullong   msTime(void); + diff --git a/ghc/rts/Sanity.c b/ghc/rts/Sanity.c index 920530a764..c0a602a9f6 100644 --- a/ghc/rts/Sanity.c +++ b/ghc/rts/Sanity.c @@ -1,5 +1,5 @@  /* ----------------------------------------------------------------------------- - * $Id: Sanity.c,v 1.14 1999/05/21 14:37:12 sof Exp $ + * $Id: Sanity.c,v 1.15 2000/01/13 14:34:04 hwloidl Exp $   *   * (c) The GHC Team, 1998-1999   * @@ -14,17 +14,36 @@   *   * ---------------------------------------------------------------------------*/ +//@menu +//* Includes::			 +//* Macros::			 +//* Stack sanity::		 +//* Heap Sanity::		 +//* TSO Sanity::		 +//* Thread Queue Sanity::	 +//* Blackhole Sanity::		 +//@end menu + +//@node Includes, Macros +//@subsection Includes +  #include "Rts.h" -#ifdef DEBUG +#ifdef DEBUG                                                   /* whole file */  #include "RtsFlags.h"  #include "RtsUtils.h"  #include "BlockAlloc.h"  #include "Sanity.h" +//@node Macros, Stack sanity, Includes +//@subsection Macros +  #define LOOKS_LIKE_PTR(r) (LOOKS_LIKE_STATIC_CLOSURE(r) || ((HEAP_ALLOCED(r) && Bdescr((P_)r)->free != (void *)-1))) +//@node Stack sanity, Heap Sanity, Macros +//@subsection Stack sanity +  /* -----------------------------------------------------------------------------     Check stack sanity     -------------------------------------------------------------------------- */ @@ -42,6 +61,7 @@ static StgOffset checkLargeBitmap( StgPtr payload,  void checkClosureShallow( StgClosure* p ); +//@cindex checkSmallBitmap  static StgOffset   checkSmallBitmap( StgPtr payload, StgWord32 bitmap )  { @@ -56,7 +76,7 @@ checkSmallBitmap( StgPtr payload, StgWord32 bitmap )      return i;  } - +//@cindex checkLargeBitmap  static StgOffset   checkLargeBitmap( StgPtr payload, StgLargeBitmap* large_bitmap )  { @@ -75,6 +95,7 @@ checkLargeBitmap( StgPtr payload, StgLargeBitmap* large_bitmap )      return i;  } +//@cindex checkStackClosure  StgOffset   checkStackClosure( StgClosure* c )  {     @@ -91,17 +112,28 @@ checkStackClosure( StgClosure* c )      case RET_BCO: /* small bitmap (<= 32 entries) */      case RET_SMALL:      case RET_VEC_SMALL: +            return 1 + checkSmallBitmap((StgPtr)c + 1,info->layout.bitmap); +            case UPDATE_FRAME:      case CATCH_FRAME:      case STOP_FRAME:      case SEQ_FRAME: -	    return 1 + checkSmallBitmap((StgPtr)c + 1,info->layout.bitmap); +#if defined(GRAN) +            return 2 + +#else +            return 1 + +#endif +	               checkSmallBitmap((StgPtr)c + 1,info->layout.bitmap);      case RET_BIG: /* large bitmap (> 32 entries) */      case RET_VEC_BIG:  	    return 1 + checkLargeBitmap((StgPtr)c + 1,info->layout.large_bitmap);      case FUN:      case FUN_STATIC: /* probably a slow-entry point return address: */ -	    return 1; +#if 0 && defined(GRAN) +            return 2; +#else +            return 1; +#endif      default:         	    /* if none of the above, maybe it's a closure which looks a         	     * little like an infotable @@ -118,6 +150,7 @@ checkStackClosure( StgClosure* c )   * chunks.   */ +//@cindex checkClosureShallow  void   checkClosureShallow( StgClosure* p )  { @@ -133,6 +166,7 @@ checkClosureShallow( StgClosure* p )  }  /* check an individual stack object */ +//@cindex checkStackObject  StgOffset   checkStackObject( StgPtr sp )  { @@ -151,6 +185,7 @@ checkStackObject( StgPtr sp )  }  /* check sections of stack between update frames */ +//@cindex checkStackChunk  void   checkStackChunk( StgPtr sp, StgPtr stack_end )  { @@ -160,9 +195,10 @@ checkStackChunk( StgPtr sp, StgPtr stack_end )      while (p < stack_end) {  	p += checkStackObject( p );      } -    ASSERT( p == stack_end ); +    // ASSERT( p == stack_end ); -- HWL  } +//@cindex checkStackChunk  StgOffset   checkClosure( StgClosure* p )  { @@ -332,13 +368,17 @@ checkClosure( StgClosure* p )      case BLOCKED_FETCH:      case FETCH_ME:      case EVACUATED: -	    barf("checkClosure: unimplemented/strange closure type"); +	    barf("checkClosure: unimplemented/strange closure type %d", +		 info->type);      default: -	    barf("checkClosure"); +	    barf("checkClosure (closure type %d)", info->type);      }  #undef LOOKS_LIKE_PTR  } +//@node Heap Sanity, TSO Sanity, Stack sanity +//@subsection Heap Sanity +  /* -----------------------------------------------------------------------------     Check Heap Sanity @@ -348,6 +388,7 @@ checkClosure( StgClosure* p )     all the objects in the remainder of the chain.     -------------------------------------------------------------------------- */ +//@cindex checkHeap  extern void   checkHeap(bdescr *bd, StgPtr start)  { @@ -377,6 +418,7 @@ checkHeap(bdescr *bd, StgPtr start)      }  } +//@cindex checkChain  extern void  checkChain(bdescr *bd)  { @@ -387,6 +429,7 @@ checkChain(bdescr *bd)  }  /* check stack - making sure that update frames are linked correctly */ +//@cindex checkStack  void   checkStack(StgPtr sp, StgPtr stack_end, StgUpdateFrame* su )  { @@ -415,6 +458,10 @@ checkStack(StgPtr sp, StgPtr stack_end, StgUpdateFrame* su )      ASSERT(stgCast(StgPtr,su) == stack_end);  } +//@node TSO Sanity, Thread Queue Sanity, Heap Sanity +//@subsection TSO Sanity + +//@cindex checkTSO  extern void  checkTSO(StgTSO *tso)  { @@ -437,6 +484,69 @@ checkTSO(StgTSO *tso)      checkStack(sp, stack_end, su);  } +#if defined(GRAN) +//@cindex checkTSOsSanity +extern void   +checkTSOsSanity(void) { +  nat i, tsos; +  StgTSO *tso; +   +  belch("Checking sanity of all runnable TSOs:"); +   +  for (i=0, tsos=0; i<RtsFlags.GranFlags.proc; i++) { +    for (tso=run_queue_hds[i]; tso!=END_TSO_QUEUE; tso=tso->link) { +      fprintf(stderr, "TSO %p on PE %d ...", tso, i); +      checkTSO(tso);  +      fprintf(stderr, "OK, "); +      tsos++; +    } +  } +   +  belch(" checked %d TSOs on %d PEs; ok\n", tsos, RtsFlags.GranFlags.proc); +} + +//@node Thread Queue Sanity, Blackhole Sanity, TSO Sanity +//@subsection Thread Queue Sanity + +// still GRAN only + +//@cindex checkThreadQSanity +extern rtsBool +checkThreadQSanity (PEs proc, rtsBool check_TSO_too)  +{ +  StgTSO *tso, *prev; + +  /* the NIL value for TSOs is END_TSO_QUEUE; thus, finding NULL is an error */ +  ASSERT(run_queue_hds[proc]!=NULL); +  ASSERT(run_queue_tls[proc]!=NULL); +  /* if either head or tail is NIL then the other one must be NIL, too */ +  ASSERT(run_queue_hds[proc]!=END_TSO_QUEUE || run_queue_tls[proc]==END_TSO_QUEUE); +  ASSERT(run_queue_tls[proc]!=END_TSO_QUEUE || run_queue_hds[proc]==END_TSO_QUEUE); +  for (tso=run_queue_hds[proc], prev=END_TSO_QUEUE;  +       tso!=END_TSO_QUEUE; +       prev=tso, tso=tso->link) { +    ASSERT((prev!=END_TSO_QUEUE || tso==run_queue_hds[proc]) && +	   (prev==END_TSO_QUEUE || prev->link==tso)); +    if (check_TSO_too) +      checkTSO(tso); +  } +  ASSERT(prev==run_queue_tls[proc]); +} + +//@cindex checkThreadQsSanity +extern rtsBool +checkThreadQsSanity (rtsBool check_TSO_too) +{ +  PEs p; +   +  for (p=0; p<RtsFlags.GranFlags.proc; p++) +    checkThreadQSanity(p, check_TSO_too); +} +#endif /* GRAN */ + +//@node Blackhole Sanity, Index, Thread Queue Sanity +//@subsection Blackhole Sanity +  /* -----------------------------------------------------------------------------     Check Blackhole Sanity @@ -448,7 +558,9 @@ checkTSO(StgTSO *tso)     the update frame list.     -------------------------------------------------------------------------- */ -rtsBool isBlackhole( StgTSO* tso, StgClosure* p ) +//@cindex isBlackhole +rtsBool  +isBlackhole( StgTSO* tso, StgClosure* p )  {    StgUpdateFrame* su = tso->su;    do { @@ -474,4 +586,26 @@ rtsBool isBlackhole( StgTSO* tso, StgClosure* p )    } while (1);  } +//@node Index,  , Blackhole Sanity +//@subsection Index + +//@index +//* checkChain::  @cindex\s-+checkChain +//* checkClosureShallow::  @cindex\s-+checkClosureShallow +//* checkHeap::  @cindex\s-+checkHeap +//* checkLargeBitmap::  @cindex\s-+checkLargeBitmap +//* checkSmallBitmap::  @cindex\s-+checkSmallBitmap +//* checkStack::  @cindex\s-+checkStack +//* checkStackChunk::  @cindex\s-+checkStackChunk +//* checkStackChunk::  @cindex\s-+checkStackChunk +//* checkStackClosure::  @cindex\s-+checkStackClosure +//* checkStackObject::  @cindex\s-+checkStackObject +//* checkTSO::  @cindex\s-+checkTSO +//* checkTSOsSanity::  @cindex\s-+checkTSOsSanity +//* checkThreadQSanity::  @cindex\s-+checkThreadQSanity +//* checkThreadQsSanity::  @cindex\s-+checkThreadQsSanity +//* isBlackhole::  @cindex\s-+isBlackhole +//@end index +  #endif /* DEBUG */ + diff --git a/ghc/rts/Sanity.h b/ghc/rts/Sanity.h index 6ab9c846d9..1bd2157884 100644 --- a/ghc/rts/Sanity.h +++ b/ghc/rts/Sanity.h @@ -1,5 +1,5 @@  /* ----------------------------------------------------------------------------- - * $Id: Sanity.h,v 1.4 1999/02/05 16:02:52 simonm Exp $ + * $Id: Sanity.h,v 1.5 2000/01/13 14:34:04 hwloidl Exp $   *   * (c) The GHC Team, 1998-1999   * @@ -13,6 +13,11 @@ extern void checkHeap  ( bdescr *bd, StgPtr start );  extern void checkChain ( bdescr *bd );  extern void checkStack ( StgPtr sp, StgPtr stack_end, StgUpdateFrame* su );  extern void checkTSO   ( StgTSO* tso ); +#if defined(GRAN) +extern void checkTSOsSanity(void); +extern rtsBool checkThreadQSanity (PEs proc, rtsBool check_TSO_too); +extern rtsBool checkThreadQsSanity (rtsBool check_TSO_too); +#endif  extern StgOffset checkClosure( StgClosure* p ); diff --git a/ghc/rts/Schedule.c b/ghc/rts/Schedule.c index 1a96f87d0e..d87f7ab99a 100644 --- a/ghc/rts/Schedule.c +++ b/ghc/rts/Schedule.c @@ -1,11 +1,18 @@ -/* ----------------------------------------------------------------------------- - * $Id: Schedule.c,v 1.40 2000/01/13 10:37:31 simonmar Exp $ +/* --------------------------------------------------------------------------- + * $Id: Schedule.c,v 1.41 2000/01/13 14:34:05 hwloidl Exp $   *   * (c) The GHC Team, 1998-1999   *   * Scheduler   * - * ---------------------------------------------------------------------------*/ + * The main scheduling code in GranSim is quite different from that in std + * (concurrent) Haskell: while concurrent Haskell just iterates over the + * threads in the runnable queue, GranSim is event driven, i.e. it iterates + * over the events in the global event queue.  -- HWL + * --------------------------------------------------------------------------*/ + +//@node Main scheduling code, , , +//@section Main scheduling code  /* Version with scheduler monitor support for SMPs. @@ -27,6 +34,23 @@     SDM & KH, 10/99  */ +//@menu +//* Includes::			 +//* Variables and Data structures::   +//* Prototypes::		 +//* Main scheduling loop::	 +//* Suspend and Resume::	 +//* Run queue code::		 +//* Garbage Collextion Routines::   +//* Blocking Queue Routines::	 +//* Exception Handling Routines::   +//* Debugging Routines::	 +//* Index::			 +//@end menu + +//@node Includes, Variables and Data structures, Main scheduling code, Main scheduling code +//@subsection Includes +  #include "Rts.h"  #include "SchedAPI.h"  #include "RtsUtils.h" @@ -48,9 +72,21 @@  #include "Sanity.h"  #include "Stats.h"  #include "Sparks.h" +#if defined(GRAN) || defined(PAR) +# include "GranSimRts.h" +# include "GranSim.h" +# include "ParallelRts.h" +# include "Parallel.h" +# include "ParallelDebug.h" +# include "FetchMe.h" +# include "HLC.h" +#endif  #include <stdarg.h> +//@node Variables and Data structures, Prototypes, Includes, Main scheduling code +//@subsection Variables and Data structures +  /* Main threads:   *   * These are the threads which clients have requested that we run.   @@ -65,6 +101,7 @@   *   * Main threads information is kept in a linked list:   */ +//@cindex StgMainThread  typedef struct StgMainThread_ {    StgTSO *         tso;    SchedulerStatus  stat; @@ -83,6 +120,47 @@ static StgMainThread *main_threads;  /* Thread queues.   * Locks required: sched_mutex.   */ + +#if DEBUG +char *whatNext_strs[] = { +  "ThreadEnterGHC", +  "ThreadRunGHC", +  "ThreadEnterHugs", +  "ThreadKilled", +  "ThreadComplete" +}; + +char *threadReturnCode_strs[] = { +  "HeapOverflow",			/* might also be StackOverflow */ +  "StackOverflow", +  "ThreadYielding", +  "ThreadBlocked", +  "ThreadFinished" +}; +#endif + +#if defined(GRAN) + +StgTSO* ActiveTSO = NULL; /* for assigning system costs; GranSim-Light only */ +// rtsTime TimeOfNextEvent, EndOfTimeSlice;            now in GranSim.c + +/*  +   In GranSim we have a runable and a blocked queue for each processor. +   In order to minimise code changes new arrays run_queue_hds/tls +   are created. run_queue_hd is then a short cut (macro) for +   run_queue_hds[CurrentProc] (see GranSim.h). +   -- HWL +*/ +StgTSO *run_queue_hds[MAX_PROC], *run_queue_tls[MAX_PROC]; +StgTSO *blocked_queue_hds[MAX_PROC], *blocked_queue_tls[MAX_PROC]; +StgTSO *ccalling_threadss[MAX_PROC]; + +#else /* !GRAN */ + +//@cindex run_queue_hd +//@cindex run_queue_tl +//@cindex blocked_queue_hd +//@cindex blocked_queue_tl  StgTSO *run_queue_hd, *run_queue_tl;  StgTSO *blocked_queue_hd, *blocked_queue_tl; @@ -93,6 +171,7 @@ static StgTSO *suspended_ccalling_threads;  static void GetRoots(void);  static StgTSO *threadStackOverflow(StgTSO *tso); +#endif  /* KH: The following two flags are shared memory locations.  There is no need         to lock them, since they are only unset at the end of a scheduler @@ -100,14 +179,17 @@ static StgTSO *threadStackOverflow(StgTSO *tso);  */  /* flag set by signal handler to precipitate a context switch */ +//@cindex context_switch  nat context_switch;  /* if this flag is set as well, give up execution */ +//@cindex interrupted  rtsBool interrupted;  /* Next thread ID to allocate.   * Locks required: sched_mutex   */ +//@cindex next_thread_id  StgThreadID next_thread_id = 1;  /* @@ -132,10 +214,19 @@ StgThreadID next_thread_id = 1;   * Locks required: sched_mutex.   */  #ifdef SMP -Capability *free_capabilities;	/* Available capabilities for running threads */ -nat n_free_capabilities;        /* total number of available capabilities */ +//@cindex free_capabilities +//@cindex n_free_capabilities +Capability *free_capabilities; /* Available capabilities for running threads */ +nat n_free_capabilities;       /* total number of available capabilities */ +#else +//@cindex MainRegTable +Capability MainRegTable;       /* for non-SMP, we have one global capability */ +#endif + +#if defined(GRAN) +StgTSO      *CurrentTSOs[MAX_PROC];  #else -Capability MainRegTable;	/* for non-SMP, we have one global capability */ +StgTSO      *CurrentTSO;  #endif  rtsBool ready_to_gc; @@ -143,6 +234,7 @@ rtsBool ready_to_gc;  /* All our current task ids, saved in case we need to kill them later.   */  #ifdef SMP +//@cindex task_ids  task_info *task_ids;  #endif @@ -157,6 +249,10 @@ static void sched_belch(char *s, ...);  #endif  #ifdef SMP +//@cindex sched_mutex +//@cindex term_mutex +//@cindex thread_ready_cond +//@cindex gc_pending_cond  pthread_mutex_t sched_mutex       = PTHREAD_MUTEX_INITIALIZER;  pthread_mutex_t term_mutex        = PTHREAD_MUTEX_INITIALIZER;  pthread_cond_t  thread_ready_cond = PTHREAD_COND_INITIALIZER; @@ -165,7 +261,35 @@ pthread_cond_t  gc_pending_cond   = PTHREAD_COND_INITIALIZER;  nat await_death;  #endif -/* ----------------------------------------------------------------------------- +#if defined(PAR) +StgTSO *LastTSO; +rtsTime TimeOfLastYield; +#endif + +/* + * The thread state for the main thread. +// ToDo: check whether not needed any more +StgTSO   *MainTSO; + */ + + +//@node Prototypes, Main scheduling loop, Variables and Data structures, Main scheduling code +//@subsection Prototypes + +#if 0 && defined(GRAN) +// ToDo: replace these with macros +static /* inline */ void    add_to_run_queue(StgTSO* tso);  +static /* inline */ void    push_on_run_queue(StgTSO* tso);  +static /* inline */ StgTSO *take_off_run_queue(StgTSO *tso); + +/* Thread management */ +void initScheduler(void); +#endif + +//@node Main scheduling loop, Suspend and Resume, Prototypes, Main scheduling code +//@subsection Main scheduling loop + +/* ---------------------------------------------------------------------------     Main scheduling loop.     We use round-robin scheduling, each thread returning to the @@ -184,18 +308,35 @@ nat await_death;        * waiting for work, or        * waiting for a GC to complete. -   -------------------------------------------------------------------------- */ - +   ------------------------------------------------------------------------ */ +//@cindex schedule  static void  schedule( void )  {    StgTSO *t;    Capability *cap;    StgThreadReturnCode ret; +#if defined(GRAN) +  rtsEvent *event; +#elif defined(PAR) +  rtsSpark spark; +  StgTSO *tso; +  GlobalTaskId pe; +#endif    ACQUIRE_LOCK(&sched_mutex); +#if defined(GRAN) +# error ToDo: implement GranSim scheduler +#elif defined(PAR) +  while (!GlobalStopPending) {          /* GlobalStopPending set in par_exit */ + +    if (PendingFetches != END_BF_QUEUE) { +        processFetches(); +    } +#else    while (1) { +#endif      /* If we're interrupted (the user pressed ^C, or some other       * termination condition occurred), kill all the currently running @@ -267,7 +408,7 @@ schedule( void )       * number of threads in the run queue equal to the number of       * free capabilities.       */ -#if defined(SMP) || defined(PAR) +#if defined(SMP)      {        nat n = n_free_capabilities;        StgTSO *tso = run_queue_hd; @@ -284,11 +425,12 @@ schedule( void )  	if (spark == NULL) {  	  break; /* no more sparks in the pool */  	} else { +	  // I'd prefer this to be done in activateSpark -- HWL  	  StgTSO *tso;  	  tso = createThread_(RtsFlags.GcFlags.initialStkSize, rtsTrue);  	  pushClosure(tso,spark);  	  PUSH_ON_RUN_QUEUE(tso); -#ifdef ToDo +#ifdef PAR  	  advisory_thread_count++;  #endif @@ -304,7 +446,7 @@ schedule( void )  	  pthread_cond_signal(&thread_ready_cond);        }      } -#endif /* SMP || PAR */ +#endif /* SMP */      /* Check whether any waiting threads need to be woken up.  If the       * run queue is empty, and there are no other tasks running, we @@ -375,10 +517,114 @@ schedule( void )        IF_DEBUG(scheduler, sched_belch("work now available"));      }  #endif + +#if defined(GRAN) +# error ToDo: implement GranSim scheduler +#elif defined(PAR) +    // ToDo: phps merge with spark activation above +    /* check whether we have local work and send requests if we have none */ +    if (run_queue_hd == END_TSO_QUEUE) {  /* no runnable threads */ +      /* :-[  no local threads => look out for local sparks */ +      if (advisory_thread_count < RtsFlags.ParFlags.maxThreads && +	  (pending_sparks_hd[REQUIRED_POOL] < pending_sparks_tl[REQUIRED_POOL] || +	   pending_sparks_hd[ADVISORY_POOL] < pending_sparks_tl[ADVISORY_POOL])) { +	/*  +	 * ToDo: add GC code check that we really have enough heap afterwards!! +	 * Old comment: +	 * If we're here (no runnable threads) and we have pending +	 * sparks, we must have a space problem.  Get enough space +	 * to turn one of those pending sparks into a +	 * thread...  +	 */ +	 +	spark = findSpark();                /* get a spark */ +	if (spark != (rtsSpark) NULL) { +	  tso = activateSpark(spark);       /* turn the spark into a thread */ +	  IF_PAR_DEBUG(verbose, +		       belch("== [%x] schedule: Created TSO %p (%d); %d threads active", +			     mytid, tso, tso->id, advisory_thread_count)); + +	  if (tso==END_TSO_QUEUE) { // failed to activate spark -> back to loop +	    belch("^^ failed to activate spark"); +	    goto next_thread; +	  }                         // otherwise fall through & pick-up new tso +	} else { +	  IF_PAR_DEBUG(verbose, +		       belch("^^ no local sparks (spark pool contains only NFs: %d)",  +			     spark_queue_len(ADVISORY_POOL))); +	  goto next_thread; +	} +      } else   +      /* =8-[  no local sparks => look for work on other PEs */ +      { +	/* +	 * We really have absolutely no work.  Send out a fish +	 * (there may be some out there already), and wait for +	 * something to arrive.  We clearly can't run any threads +	 * until a SCHEDULE or RESUME arrives, and so that's what +	 * we're hoping to see.  (Of course, we still have to +	 * respond to other types of messages.) +	 */ +	if (//!fishing &&   +	    outstandingFishes < RtsFlags.ParFlags.maxFishes ) { // && +	  // (last_fish_arrived_at+FISH_DELAY < CURRENT_TIME)) { +	  /* fishing set in sendFish, processFish; +	     avoid flooding system with fishes via delay */ +	  pe = choosePE(); +	  sendFish(pe, mytid, NEW_FISH_AGE, NEW_FISH_HISTORY,  +		   NEW_FISH_HUNGER); +	} +	 +	processMessages(); +	goto next_thread; +	// ReSchedule(0); +      } +    } else if (PacketsWaiting()) {  /* Look for incoming messages */ +      processMessages(); +    } + +    /* Now we are sure that we have some work available */ +    ASSERT(run_queue_hd != END_TSO_QUEUE); +    /* Take a thread from the run queue, if we have work */ +    t = take_off_run_queue(END_TSO_QUEUE); + +    /* ToDo: write something to the log-file +    if (RTSflags.ParFlags.granSimStats && !sameThread) +        DumpGranEvent(GR_SCHEDULE, RunnableThreadsHd); +    */ + +    CurrentTSO = t; + +    IF_DEBUG(scheduler, belch("--^^ %d sparks on [%#x] (hd=%x; tl=%x; lim=%x)",  +			      spark_queue_len(ADVISORY_POOL), CURRENT_PROC, +			      pending_sparks_hd[ADVISORY_POOL],  +			      pending_sparks_tl[ADVISORY_POOL],  +			      pending_sparks_lim[ADVISORY_POOL])); + +    IF_DEBUG(scheduler, belch("--== %d threads on [%#x] (hd=%x; tl=%x)",  +			      run_queue_len(), CURRENT_PROC, +			      run_queue_hd, run_queue_tl)); + +    if (t != LastTSO) { +      /*  +	 we are running a different TSO, so write a schedule event to log file +	 NB: If we use fair scheduling we also have to write  a deschedule  +	     event for LastTSO; with unfair scheduling we know that the +	     previous tso has blocked whenever we switch to another tso, so +	     we don't need it in GUM for now +      */ +      DumpRawGranEvent(CURRENT_PROC, CURRENT_PROC, +		       GR_SCHEDULE, t, (StgClosure *)NULL, 0, 0); +       +    } + +#else /* !GRAN && !PAR */      /* grab a thread from the run queue       */      t = POP_RUN_QUEUE(); + +#endif      /* grab a capability       */ @@ -403,6 +649,7 @@ schedule( void )      IF_DEBUG(scheduler,sched_belch("running thread %d", t->id)); +    /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */      /* Run the current thread        */      switch (cap->rCurrentTSO->whatNext) { @@ -433,6 +680,7 @@ schedule( void )      default:        barf("schedule: invalid whatNext field");      } +    /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */      /* Costs for the scheduler are assigned to CCS_SYSTEM */  #ifdef PROFILING @@ -487,6 +735,14 @@ schedule( void )        break;      case ThreadYielding: +#if defined(GRAN) +      IF_DEBUG(gran,  +	       DumpGranEvent(GR_DESCHEDULE, t)); +      globalGranStats.tot_yields++; +#elif defined(PAR) +      IF_DEBUG(par,  +	       DumpGranEvent(GR_DESCHEDULE, t)); +#endif        /* put the thread back on the run queue.  Then, if we're ready to         * GC, check whether this is the last task to stop.  If so, wake         * up the GC thread.  getThread will block during a GC until the @@ -507,6 +763,13 @@ schedule( void )        break;      case ThreadBlocked: +#if defined(GRAN) +# error ToDo: implement GranSim scheduler +#elif defined(PAR) +      IF_DEBUG(par,  +	       DumpGranEvent(GR_DESCHEDULE, t));  +#else +#endif        /* don't need to do anything.  Either the thread is blocked on         * I/O, in which case we'll have called addToBlockedQueue         * previously, or it's blocked on an MVar or Blackhole, in which @@ -527,6 +790,13 @@ schedule( void )         */        IF_DEBUG(scheduler,belch("thread %ld finished", t->id));        t->whatNext = ThreadComplete; +#if defined(GRAN) +      // ToDo: endThread(t, CurrentProc); // clean-up the thread +#elif defined(PAR) +      advisory_thread_count--; +      if (RtsFlags.ParFlags.ParStats.Full)  +	DumpEndEvent(CURRENT_PROC, t, rtsFalse /* not mandatory */); +#endif        break;      default: @@ -540,10 +810,11 @@ schedule( void )  #endif  #ifdef SMP -    if (ready_to_gc && n_free_capabilities == RtsFlags.ParFlags.nNodes) { +    if (ready_to_gc && n_free_capabilities == RtsFlags.ParFlags.nNodes)   #else -    if (ready_to_gc) { +    if (ready_to_gc)   #endif +      {        /* everybody back, start the GC.         * Could do it in this thread, or signal a condition var         * to do it in another thread.  Either way, we need to @@ -558,10 +829,26 @@ schedule( void )        pthread_cond_broadcast(&gc_pending_cond);  #endif      } +#if defined(GRAN) +  next_thread: +    IF_GRAN_DEBUG(unused, +		  print_eventq(EventHd)); + +    event = get_next_event(); + +#elif defined(PAR) +  next_thread: +    /* ToDo: wait for next message to arrive rather than busy wait */ + +#else /* GRAN */ +  /* not any more +  next_thread: +    t = take_off_run_queue(END_TSO_QUEUE); +  */ +#endif /* GRAN */    } /* end of while(1) */  } -  /* A hack for Hugs concurrency support.  Needs sanitisation (?) */  void deleteAllThreads ( void )  { @@ -577,8 +864,12 @@ void deleteAllThreads ( void )    blocked_queue_hd = blocked_queue_tl = END_TSO_QUEUE;  } +/* startThread and  insertThread are now in GranSim.c -- HWL */ -/* ----------------------------------------------------------------------------- +//@node Suspend and Resume, Run queue code, Main scheduling loop, Main scheduling code +//@subsection Suspend and Resume + +/* ---------------------------------------------------------------------------   * Suspending & resuming Haskell threads.   *    * When making a "safe" call to C (aka _ccall_GC), the task gives back @@ -591,7 +882,7 @@ void deleteAllThreads ( void )   * duration of the call, on the susepended_ccalling_threads queue.  We   * give out a token to the task, which it can use to resume the thread   * on return from the C function. - * -------------------------------------------------------------------------- */ + * ------------------------------------------------------------------------- */  StgInt  suspendThread( Capability *cap ) @@ -660,17 +951,18 @@ resumeThread( StgInt tok )    return cap;  } -/* ----------------------------------------------------------------------------- + +/* ---------------------------------------------------------------------------   * Static functions - * -------------------------------------------------------------------------- */ + * ------------------------------------------------------------------------ */  static void unblockThread(StgTSO *tso); -/* ----------------------------------------------------------------------------- +/* ---------------------------------------------------------------------------   * Comparing Thread ids.   *   * This is used from STG land in the implementation of the   * instances of Eq/Ord for ThreadIds. - * -------------------------------------------------------------------------- */ + * ------------------------------------------------------------------------ */  int cmp_thread(const StgTSO *tso1, const StgTSO *tso2)   {  @@ -682,7 +974,7 @@ int cmp_thread(const StgTSO *tso1, const StgTSO *tso2)    return 0;  } -/* ----------------------------------------------------------------------------- +/* ---------------------------------------------------------------------------     Create a new thread.     The new thread starts with the given stack size.  Before the @@ -692,19 +984,50 @@ int cmp_thread(const StgTSO *tso1, const StgTSO *tso2)     createGenThread() and createIOThread() (in SchedAPI.h) are     convenient packaged versions of this function. -   -------------------------------------------------------------------------- */ +   ------------------------------------------------------------------------ */ +//@cindex createThread +#if defined(GRAN) +/* currently pri (priority) is only used in a GRAN setup -- HWL */ +StgTSO * +createThread(nat stack_size, StgInt pri) +{ +  return createThread_(stack_size, rtsFalse, pri); +} +static StgTSO * +createThread_(nat size, rtsBool have_lock, StgInt pri) +{ +#else  StgTSO * -createThread(nat size) +createThread(nat stack_size)  { -  return createThread_(size, rtsFalse); +  return createThread_(stack_size, rtsFalse);  }  static StgTSO *  createThread_(nat size, rtsBool have_lock)  { -  StgTSO *tso; -  nat stack_size; +#endif +    StgTSO *tso; +    nat stack_size; + +    /* First check whether we should create a thread at all */ +#if defined(PAR) +  /* check that no more than RtsFlags.ParFlags.maxThreads threads are created */ +  if (advisory_thread_count >= RtsFlags.ParFlags.maxThreads) { +    threadsIgnored++; +    belch("{createThread}Daq ghuH: refusing to create another thread; no more than %d threads allowed (currently %d)", +	  RtsFlags.ParFlags.maxThreads, advisory_thread_count); +    return END_TSO_QUEUE; +  } +  threadsCreated++; +#endif + +#if defined(GRAN) +  ASSERT(!RtsFlags.GranFlags.Light || CurrentProc==0); +#endif + +  // ToDo: check whether size = stack_size - TSO_STRUCT_SIZEW    /* catch ridiculously small stack sizes */    if (size < MIN_STACK_WORDS + TSO_STRUCT_SIZEW) { @@ -716,9 +1039,13 @@ createThread_(nat size, rtsBool have_lock)    stack_size = size - TSO_STRUCT_SIZEW; +  // Hmm, this CCS_MAIN is not protected by a PROFILING cpp var;    SET_HDR(tso, &TSO_info, CCS_MAIN); -  tso->whatNext = ThreadEnterGHC; -   +#if defined(GRAN) +  SET_GRAN_HDR(tso, ThisPE); +#endif +  tso->whatNext     = ThreadEnterGHC; +    /* tso->id needs to be unique.  For now we use a heavyweight mutex to    	 protect the increment operation on next_thread_id.    	 In future, we could use an atomic increment instead. @@ -746,13 +1073,69 @@ createThread_(nat size, rtsBool have_lock)    SET_HDR((StgClosure*)tso->sp,(StgInfoTable *)&stg_stop_thread_info,CCS_MAIN);    tso->su = (StgUpdateFrame*)tso->sp; +  IF_DEBUG(scheduler,belch("---- Initialised TSO %ld (%p), stack size = %lx words",  +			   tso->id, tso, tso->stack_size)); + +  // ToDo: check this +#if defined(GRAN) +  tso->link = END_TSO_QUEUE; +  /* uses more flexible routine in GranSim */ +  insertThread(tso, CurrentProc); +#else +  add_to_run_queue(tso); +#endif + +#if defined(GRAN) +  tso->gran.pri = pri; +  tso->gran.magic = TSO_MAGIC; // debugging only +  tso->gran.sparkname   = 0; +  tso->gran.startedat   = CURRENT_TIME;  +  tso->gran.exported    = 0; +  tso->gran.basicblocks = 0; +  tso->gran.allocs      = 0; +  tso->gran.exectime    = 0; +  tso->gran.fetchtime   = 0; +  tso->gran.fetchcount  = 0; +  tso->gran.blocktime   = 0; +  tso->gran.blockcount  = 0; +  tso->gran.blockedat   = 0; +  tso->gran.globalsparks = 0; +  tso->gran.localsparks  = 0; +  if (RtsFlags.GranFlags.Light) +    tso->gran.clock  = Now; /* local clock */ +  else +    tso->gran.clock  = 0; + +  IF_DEBUG(gran,printTSO(tso)); +#elif defined(PAR) +  tso->par.sparkname   = 0; +  tso->par.startedat   = CURRENT_TIME;  +  tso->par.exported    = 0; +  tso->par.basicblocks = 0; +  tso->par.allocs      = 0; +  tso->par.exectime    = 0; +  tso->par.fetchtime   = 0; +  tso->par.fetchcount  = 0; +  tso->par.blocktime   = 0; +  tso->par.blockcount  = 0; +  tso->par.blockedat   = 0; +  tso->par.globalsparks = 0; +  tso->par.localsparks  = 0; +#endif + +#if defined(GRAN) +  globalGranStats.tot_threads_created++; +  globalGranStats.threads_created_on_PE[CurrentProc]++; +  globalGranStats.tot_sq_len += spark_queue_len(CurrentProc); +  globalGranStats.tot_sq_probes++; +#endif  +    IF_DEBUG(scheduler,sched_belch("created thread %ld, stack size = %lx words",   				 tso->id, tso->stack_size));    return tso;  } - -/* ----------------------------------------------------------------------------- +/* ---------------------------------------------------------------------------   * scheduleThread()   *   * scheduleThread puts a thread on the head of the runnable queue. @@ -760,7 +1143,7 @@ createThread_(nat size, rtsBool have_lock)   * The caller of scheduleThread must create the thread using e.g.   * createThread and push an appropriate closure   * on this thread's stack before the scheduler is invoked. - * -------------------------------------------------------------------------- */ + * ------------------------------------------------------------------------ */  void  scheduleThread(StgTSO *tso) @@ -779,14 +1162,13 @@ scheduleThread(StgTSO *tso)    RELEASE_LOCK(&sched_mutex);  } - -/* ----------------------------------------------------------------------------- +/* ---------------------------------------------------------------------------   * startTasks()   *   * Start up Posix threads to run each of the scheduler tasks.   * I believe the task ids are not needed in the system as defined. -  * KH @ 25/10/99 - * -------------------------------------------------------------------------- */ + *  KH @ 25/10/99 + * ------------------------------------------------------------------------ */  #ifdef SMP  static void * @@ -797,7 +1179,7 @@ taskStart( void *arg STG_UNUSED )  }  #endif -/* ----------------------------------------------------------------------------- +/* ---------------------------------------------------------------------------   * initScheduler()   *   * Initialise the scheduler.  This resets all the queues - if the @@ -805,7 +1187,7 @@ taskStart( void *arg STG_UNUSED )   * next pass.   *   * This now calls startTasks(), so should only be called once!  KH @ 25/10/99 - * -------------------------------------------------------------------------- */ + * ------------------------------------------------------------------------ */  #ifdef SMP  static void @@ -819,12 +1201,26 @@ term_handler(int sig STG_UNUSED)  }  #endif -void initScheduler(void) +//@cindex initScheduler +void  +initScheduler(void)  { +#if defined(GRAN) +  nat i; + +  for (i=0; i<=MAX_PROC; i++) { +    run_queue_hds[i]      = END_TSO_QUEUE; +    run_queue_tls[i]      = END_TSO_QUEUE; +    blocked_queue_hds[i]  = END_TSO_QUEUE; +    blocked_queue_tls[i]  = END_TSO_QUEUE; +    ccalling_threadss[i]  = END_TSO_QUEUE; +  } +#else    run_queue_hd      = END_TSO_QUEUE;    run_queue_tl      = END_TSO_QUEUE;    blocked_queue_hd  = END_TSO_QUEUE;    blocked_queue_tl  = END_TSO_QUEUE; +#endif     suspended_ccalling_threads  = END_TSO_QUEUE; @@ -1009,42 +1405,127 @@ waitThread(StgTSO *tso, /*out*/StgClosure **ret)    return stat;  } -   -/* ----------------------------------------------------------------------------- -   Debugging: why is a thread blocked -   -------------------------------------------------------------------------- */ -#ifdef DEBUG -void printThreadBlockage(StgTSO *tso) +//@node Run queue code, Garbage Collextion Routines, Suspend and Resume, Main scheduling code +//@subsection Run queue code  + +#if 0 +/*  +   NB: In GranSim we have many run queues; run_queue_hd is actually a macro +       unfolding to run_queue_hds[CurrentProc], thus CurrentProc is an +       implicit global variable that has to be correct when calling these +       fcts -- HWL  +*/ + +/* Put the new thread on the head of the runnable queue. + * The caller of createThread better push an appropriate closure + * on this thread's stack before the scheduler is invoked. + */ +static /* inline */ void +add_to_run_queue(tso) +StgTSO* tso;   { -  switch (tso->why_blocked) { -  case BlockedOnRead: -    fprintf(stderr,"blocked on read from fd %d", tso->block_info.fd); -    break; -  case BlockedOnWrite: -    fprintf(stderr,"blocked on write to fd %d", tso->block_info.fd); -    break; -  case BlockedOnDelay: -    fprintf(stderr,"blocked on delay of %d ms", tso->block_info.delay); -    break; -  case BlockedOnMVar: -    fprintf(stderr,"blocked on an MVar"); -    break; -  case BlockedOnException: -    fprintf(stderr,"blocked on delivering an exception to thread %d", -	    tso->block_info.tso->id); -    break; -  case BlockedOnBlackHole: -    fprintf(stderr,"blocked on a black hole"); -    break; -  case NotBlocked: -    fprintf(stderr,"not blocked"); -    break; +  ASSERT(tso!=run_queue_hd && tso!=run_queue_tl); +  tso->link = run_queue_hd; +  run_queue_hd = tso; +  if (run_queue_tl == END_TSO_QUEUE) { +    run_queue_tl = tso;    }  } -#endif -/* ----------------------------------------------------------------------------- +/* Put the new thread at the end of the runnable queue. */ +static /* inline */ void +push_on_run_queue(tso) +StgTSO* tso;  +{ +  ASSERT(get_itbl((StgClosure *)tso)->type == TSO); +  ASSERT(run_queue_hd!=NULL && run_queue_tl!=NULL); +  ASSERT(tso!=run_queue_hd && tso!=run_queue_tl); +  if (run_queue_hd == END_TSO_QUEUE) { +    run_queue_hd = tso; +  } else { +    run_queue_tl->link = tso; +  } +  run_queue_tl = tso; +} + +/*  +   Should be inlined because it's used very often in schedule.  The tso +   argument is actually only needed in GranSim, where we want to have the +   possibility to schedule *any* TSO on the run queue, irrespective of the +   actual ordering. Therefore, if tso is not the nil TSO then we traverse +   the run queue and dequeue the tso, adjusting the links in the queue.  +*/ +//@cindex take_off_run_queue +static /* inline */ StgTSO* +take_off_run_queue(StgTSO *tso) { +  StgTSO *t, *prev; + +  /*  +     qetlaHbogh Qu' ngaSbogh ghomDaQ {tso} yIteq! + +     if tso is specified, unlink that tso from the run_queue (doesn't have +     to be at the beginning of the queue); GranSim only  +  */ +  if (tso!=END_TSO_QUEUE) { +    /* find tso in queue */ +    for (t=run_queue_hd, prev=END_TSO_QUEUE;  +	 t!=END_TSO_QUEUE && t!=tso; +	 prev=t, t=t->link)  +      /* nothing */ ; +    ASSERT(t==tso); +    /* now actually dequeue the tso */ +    if (prev!=END_TSO_QUEUE) { +      ASSERT(run_queue_hd!=t); +      prev->link = t->link; +    } else { +      /* t is at beginning of thread queue */ +      ASSERT(run_queue_hd==t); +      run_queue_hd = t->link; +    } +    /* t is at end of thread queue */ +    if (t->link==END_TSO_QUEUE) { +      ASSERT(t==run_queue_tl); +      run_queue_tl = prev; +    } else { +      ASSERT(run_queue_tl!=t); +    } +    t->link = END_TSO_QUEUE; +  } else { +    /* take tso from the beginning of the queue; std concurrent code */ +    t = run_queue_hd; +    if (t != END_TSO_QUEUE) { +      run_queue_hd = t->link; +      t->link = END_TSO_QUEUE; +      if (run_queue_hd == END_TSO_QUEUE) { +	run_queue_tl = END_TSO_QUEUE; +      } +    } +  } +  return t; +} + +#endif /* 0 */ + +nat +run_queue_len(void) +{ +  nat i; +  StgTSO *tso; + +  for (i=0, tso=run_queue_hd;  +       tso != END_TSO_QUEUE; +       i++, tso=tso->link) +    /* nothing */ + +  return i; +} + + +//@node Garbage Collextion Routines, Blocking Queue Routines, Run queue code, Main scheduling code +//@subsection Garbage Collextion Routines + +/* ---------------------------------------------------------------------------     Where are the roots that we know about?          - all the threads on the runnable queue @@ -1052,7 +1533,7 @@ void printThreadBlockage(StgTSO *tso)  	- all the thread currently executing a _ccall_GC          - all the "main threads" -   -------------------------------------------------------------------------- */ +   ------------------------------------------------------------------------ */  /* This has to be protected either by the scheduler monitor, or by the  	garbage collection monitor (probably the latter). @@ -1062,12 +1543,36 @@ void printThreadBlockage(StgTSO *tso)  static void GetRoots(void)  {    StgMainThread *m; +  nat i; + +#if defined(GRAN) +  for (i=0; i<=RtsFlags.GranFlags.proc; i++) { +    if ((run_queue_hds[i] != END_TSO_QUEUE) && ((run_queue_hds[i] != NULL))) +      run_queue_hds[i]    = (StgTSO *)MarkRoot((StgClosure *)run_queue_hds[i]); +    if ((run_queue_tls[i] != END_TSO_QUEUE) && ((run_queue_tls[i] != NULL))) +      run_queue_tls[i]    = (StgTSO *)MarkRoot((StgClosure *)run_queue_tls[i]); +     +    if ((blocked_queue_hds[i] != END_TSO_QUEUE) && ((blocked_queue_hds[i] != NULL))) +      blocked_queue_hds[i] = (StgTSO *)MarkRoot((StgClosure *)blocked_queue_hds[i]); +    if ((blocked_queue_tls[i] != END_TSO_QUEUE) && ((blocked_queue_tls[i] != NULL))) +      blocked_queue_tls[i] = (StgTSO *)MarkRoot((StgClosure *)blocked_queue_tls[i]); +    if ((ccalling_threadss[i] != END_TSO_QUEUE) && ((ccalling_threadss[i] != NULL))) +      ccalling_threadss[i] = (StgTSO *)MarkRoot((StgClosure *)ccalling_threadss[i]); +  } +  markEventQueue(); +#elif defined(PAR) +  run_queue_hd      = (StgTSO *)MarkRoot((StgClosure *)run_queue_hd); +  run_queue_tl      = (StgTSO *)MarkRoot((StgClosure *)run_queue_tl); +  blocked_queue_hd  = (StgTSO *)MarkRoot((StgClosure *)blocked_queue_hd); +  blocked_queue_tl  = (StgTSO *)MarkRoot((StgClosure *)blocked_queue_tl); +#else    run_queue_hd      = (StgTSO *)MarkRoot((StgClosure *)run_queue_hd);    run_queue_tl      = (StgTSO *)MarkRoot((StgClosure *)run_queue_tl);    blocked_queue_hd  = (StgTSO *)MarkRoot((StgClosure *)blocked_queue_hd);    blocked_queue_tl  = (StgTSO *)MarkRoot((StgClosure *)blocked_queue_tl); +#endif     for (m = main_threads; m != NULL; m = m->link) {      m->tso = (StgTSO *)MarkRoot((StgClosure *)m->tso); @@ -1205,10 +1710,93 @@ threadStackOverflow(StgTSO *tso)    return dest;  } -/* ----------------------------------------------------------------------------- +//@node Blocking Queue Routines, Exception Handling Routines, Garbage Collextion Routines, Main scheduling code +//@subsection Blocking Queue Routines + +/* ---------------------------------------------------------------------------     Wake up a queue that was blocked on some resource. -   -------------------------------------------------------------------------- */ +   ------------------------------------------------------------------------ */ + +// ToDo: check push_on_run_queue vs. PUSH_ON_RUN_QUEUE + +#if defined(GRAN) +# error FixME +#elif defined(PAR) +static inline void +unblockCount ( StgBlockingQueueElement *bqe, StgClosure *node ) +{ +  /* write RESUME events to log file and +     update blocked and fetch time (depending on type of the orig closure) */ +  if (RtsFlags.ParFlags.ParStats.Full) { +    DumpRawGranEvent(CURRENT_PROC, CURRENT_PROC,  +		     GR_RESUMEQ, ((StgTSO *)bqe), ((StgTSO *)bqe)->block_info.closure, +		     0, 0 /* spark_queue_len(ADVISORY_POOL) */); + +    switch (get_itbl(node)->type) { +	case FETCH_ME_BQ: +	  ((StgTSO *)bqe)->par.fetchtime += CURRENT_TIME-((StgTSO *)bqe)->par.blockedat; +	  break; +	case RBH: +	case FETCH_ME: +	case BLACKHOLE_BQ: +	  ((StgTSO *)bqe)->par.blocktime += CURRENT_TIME-((StgTSO *)bqe)->par.blockedat; +	  break; +	default: +	  barf("{unblockOneLocked}Daq Qagh: unexpected closure in blocking queue"); +	} +      } +} +#endif + +#if defined(GRAN) +# error FixME +#elif defined(PAR) +static StgBlockingQueueElement * +unblockOneLocked(StgBlockingQueueElement *bqe, StgClosure *node) +{ +    StgBlockingQueueElement *next; + +    switch (get_itbl(bqe)->type) { +    case TSO: +      ASSERT(((StgTSO *)bqe)->why_blocked != NotBlocked); +      /* if it's a TSO just push it onto the run_queue */ +      next = bqe->link; +      // ((StgTSO *)bqe)->link = END_TSO_QUEUE; // debugging? +      PUSH_ON_RUN_QUEUE((StgTSO *)bqe);  +      THREAD_RUNNABLE(); +      unblockCount(bqe, node); +      /* reset blocking status after dumping event */ +      ((StgTSO *)bqe)->why_blocked = NotBlocked; +      break; + +    case BLOCKED_FETCH: +      /* if it's a BLOCKED_FETCH put it on the PendingFetches list */ +      next = bqe->link; +      bqe->link = PendingFetches; +      PendingFetches = bqe; +      break; +# if defined(DEBUG) +      /* can ignore this case in a non-debugging setup;  +	 see comments on RBHSave closures above */ +    case CONSTR: +      /* check that the closure is an RBHSave closure */ +      ASSERT(get_itbl((StgClosure *)bqe) == &RBH_Save_0_info || +	     get_itbl((StgClosure *)bqe) == &RBH_Save_1_info || +	     get_itbl((StgClosure *)bqe) == &RBH_Save_2_info); +      break; + +    default: +      barf("{unblockOneLocked}Daq Qagh: Unexpected IP (%#lx; %s) in blocking queue at %#lx\n", +	   get_itbl((StgClosure *)bqe), info_type((StgClosure *)bqe),  +	   (StgClosure *)bqe); +# endif +    } +  // IF_DEBUG(scheduler,sched_belch("waking up thread %ld", tso->id)); +  return next; +} + +#else /* !GRAN && !PAR */  static StgTSO *  unblockOneLocked(StgTSO *tso)  { @@ -1223,7 +1811,20 @@ unblockOneLocked(StgTSO *tso)    IF_DEBUG(scheduler,sched_belch("waking up thread %ld", tso->id));    return next;  } +#endif +#if defined(GRAN) +# error FixME +#elif defined(PAR) +inline StgTSO * +unblockOne(StgTSO *tso, StgClosure *node) +{ +  ACQUIRE_LOCK(&sched_mutex); +  tso = unblockOneLocked(tso, node); +  RELEASE_LOCK(&sched_mutex); +  return tso; +} +#else  inline StgTSO *  unblockOne(StgTSO *tso)  { @@ -1232,7 +1833,35 @@ unblockOne(StgTSO *tso)    RELEASE_LOCK(&sched_mutex);    return tso;  } +#endif +#if defined(GRAN) +# error FixME +#elif defined(PAR) +void  +awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node) +{ +  StgBlockingQueueElement *bqe, *next; + +  ACQUIRE_LOCK(&sched_mutex); + +  IF_PAR_DEBUG(verbose,  +	       belch("## AwBQ for node %p on [%x]: ", +		     node, mytid)); + +  ASSERT(get_itbl(q)->type == TSO ||            +  	 get_itbl(q)->type == BLOCKED_FETCH ||  +  	 get_itbl(q)->type == CONSTR);  + +  bqe = q; +  while (get_itbl(bqe)->type==TSO ||  +	 get_itbl(bqe)->type==BLOCKED_FETCH) { +    bqe = unblockOneLocked(bqe, node); +  } +  RELEASE_LOCK(&sched_mutex); +} + +#else   /* !GRAN && !PAR */  void  awakenBlockedQueue(StgTSO *tso)  { @@ -1242,11 +1871,275 @@ awakenBlockedQueue(StgTSO *tso)    }    RELEASE_LOCK(&sched_mutex);  } +#endif -/* ----------------------------------------------------------------------------- +#if 0 +// ngoq ngo' + +#if defined(GRAN) +/*  +   Awakening a blocking queue in GranSim means checking for each of the +   TSOs in the queue whether they are local or not, issuing a ResumeThread +   or an UnblockThread event, respectively. The basic iteration over the +   blocking queue is the same as in the standard setup.   +*/ +void +awaken_blocked_queue(StgBlockingQueueElement *q, StgClosure *node) +{ +  StgBlockingQueueElement *bqe, *next; +  StgTSO *tso; +  PEs node_loc, tso_loc; +  rtsTime bq_processing_time = 0; +  nat len = 0, len_local = 0; + +  IF_GRAN_DEBUG(bq,  +		belch("## AwBQ for node %p on PE %d @ %ld by TSO %d (%p): ", \ +		      node, CurrentProc, CurrentTime[CurrentProc],  +		      CurrentTSO->id, CurrentTSO)); + +  node_loc = where_is(node); + +  ASSERT(get_itbl(q)->type == TSO ||   // q is either a TSO or an RBHSave +	 get_itbl(q)->type == CONSTR); // closure (type constructor) +  ASSERT(is_unique(node)); + +  /* FAKE FETCH: magically copy the node to the tso's proc; +     no Fetch necessary because in reality the node should not have been  +     moved to the other PE in the first place +  */ +  if (CurrentProc!=node_loc) { +    IF_GRAN_DEBUG(bq,  +		  belch("## node %p is on PE %d but CurrentProc is %d (TSO %d); assuming fake fetch and adjusting bitmask (old: %#x)", +			node, node_loc, CurrentProc, CurrentTSO->id,  +			// CurrentTSO, where_is(CurrentTSO), +			node->header.gran.procs)); +    node->header.gran.procs = (node->header.gran.procs) | PE_NUMBER(CurrentProc); +    IF_GRAN_DEBUG(bq,  +		  belch("## new bitmask of node %p is %#x", +			node, node->header.gran.procs)); +    if (RtsFlags.GranFlags.GranSimStats.Global) { +      globalGranStats.tot_fake_fetches++; +    } +  } + +  next = q; +  // ToDo: check: ASSERT(CurrentProc==node_loc); +  while (get_itbl(next)->type==TSO) { // q != END_TSO_QUEUE) { +    bqe = next; +    next = bqe->link; +    /*  +       bqe points to the current element in the queue +       next points to the next element in the queue +    */ +    tso = (StgTSO *)bqe;  // wastes an assignment to get the type right +    tso_loc = where_is(tso); +    if (IS_LOCAL_TO(PROCS(node),tso_loc)) { // TSO is local +      /* !fake_fetch => TSO is on CurrentProc is same as IS_LOCAL_TO */ +      ASSERT(CurrentProc!=node_loc || tso_loc==CurrentProc); +      bq_processing_time += RtsFlags.GranFlags.Costs.lunblocktime; +      // insertThread(tso, node_loc); +      new_event(tso_loc, tso_loc, +		CurrentTime[CurrentProc]+bq_processing_time, +		ResumeThread, +		tso, node, (rtsSpark*)NULL); +      tso->link = END_TSO_QUEUE; // overwrite link just to be sure  +      len_local++; +      len++; +    } else { // TSO is remote (actually should be FMBQ) +      bq_processing_time += RtsFlags.GranFlags.Costs.mpacktime; +      bq_processing_time += RtsFlags.GranFlags.Costs.gunblocktime; +      new_event(tso_loc, CurrentProc,  +		CurrentTime[CurrentProc]+bq_processing_time+ +		RtsFlags.GranFlags.Costs.latency, +		UnblockThread, +		tso, node, (rtsSpark*)NULL); +      tso->link = END_TSO_QUEUE; // overwrite link just to be sure  +      bq_processing_time += RtsFlags.GranFlags.Costs.mtidytime; +      len++; +    }       +    /* the thread-queue-overhead is accounted for in either Resume or UnblockThread */ +    IF_GRAN_DEBUG(bq, +		  fprintf(stderr," %s TSO %d (%p) [PE %d] (blocked_on=%p) (next=%p) ,", +			  (node_loc==tso_loc ? "Local" : "Global"),  +			  tso->id, tso, CurrentProc, tso->block_info.closure, tso->link)) +    tso->block_info.closure = NULL; +    IF_DEBUG(scheduler,belch("-- Waking up thread %ld (%p)",  +			     tso->id, tso)); +  } + +  /* if this is the BQ of an RBH, we have to put back the info ripped out of +     the closure to make room for the anchor of the BQ */ +  if (next!=END_BQ_QUEUE) { +    ASSERT(get_itbl(node)->type == RBH && get_itbl(next)->type == CONSTR); +    /* +    ASSERT((info_ptr==&RBH_Save_0_info) || +	   (info_ptr==&RBH_Save_1_info) || +	   (info_ptr==&RBH_Save_2_info)); +    */ +    /* cf. convertToRBH in RBH.c for writing the RBHSave closure */ +    ((StgRBH *)node)->blocking_queue = ((StgRBHSave *)next)->payload[0]; +    ((StgRBH *)node)->mut_link       = ((StgRBHSave *)next)->payload[1]; + +    IF_GRAN_DEBUG(bq, +		  belch("## Filled in RBH_Save for %p (%s) at end of AwBQ", +			node, info_type(node))); +  } + +  /* statistics gathering */ +  if (RtsFlags.GranFlags.GranSimStats.Global) { +    globalGranStats.tot_bq_processing_time += bq_processing_time; +    globalGranStats.tot_bq_len += len;      // total length of all bqs awakened +    globalGranStats.tot_bq_len_local += len_local;  // same for local TSOs only +    globalGranStats.tot_awbq++;             // total no. of bqs awakened +  } +  IF_GRAN_DEBUG(bq, +		fprintf(stderr,"## BQ Stats of %p: [%d entries, %d local] %s\n", +			node, len, len_local, (next!=END_TSO_QUEUE) ? "RBH" : "")); +} + +#elif defined(PAR) + +/* +  Awakening a blocking queue in GUM has to check whether an entry in the +  queue is a normal TSO or a BLOCKED_FETCH. The later indicates that a TSO is +  waiting for the result of this computation on another PE. Thus, when +  finding a BLOCKED_FETCH we have to send off a message to that PE.  +  Actually, we defer sending off a message, by just putting the BLOCKED_FETCH +  onto the PendingFetches queue, which will be later traversed by +  processFetches, sending off a RESUME message for each BLOCKED_FETCH. + +  NB: There is no check for an RBHSave closure (type CONSTR) in the code  +      below. The reason is, if we awaken the BQ of an RBH closure (RBHSaves  +      only exist at the end of such BQs) we know that the closure has been +      unpacked successfully on the other PE, and we can discard the info +      contained in the RBHSave closure. The current closure will be turned  +      into a FetchMe closure anyway. +*/ +void  +awaken_blocked_queue(StgBlockingQueueElement *q, StgClosure *node) +{ +  StgBlockingQueueElement *bqe, *next; + +  IF_PAR_DEBUG(verbose,  +	       belch("## AwBQ for node %p on [%x]: ", +		     node, mytid)); + +  ASSERT(get_itbl(q)->type == TSO ||            +  	 get_itbl(q)->type == BLOCKED_FETCH ||  +  	 get_itbl(q)->type == CONSTR);  + +  next = q; +  while (get_itbl(next)->type==TSO ||  +	 get_itbl(next)->type==BLOCKED_FETCH) { +    bqe = next; +    switch (get_itbl(bqe)->type) { +    case TSO: +      /* if it's a TSO just push it onto the run_queue */ +      next = bqe->link; +#if defined(DEBUG) +      ((StgTSO *)bqe)->link = END_TSO_QUEUE; // debugging only +#endif +      push_on_run_queue((StgTSO *)bqe); // HWL: was: PUSH_ON_RUN_QUEUE(tso); + +      /* write RESUME events to log file and +	 update blocked and fetch time (depending on type of the orig closure) */ +      if (RtsFlags.ParFlags.ParStats.Full) { +	DumpRawGranEvent(CURRENT_PROC, CURRENT_PROC,  +			 GR_RESUMEQ, ((StgTSO *)bqe), ((StgTSO *)bqe)->block_info.closure, +			 0, spark_queue_len(ADVISORY_POOL)); + +	switch (get_itbl(node)->type) { +	case FETCH_ME_BQ: +	  ((StgTSO *)bqe)->par.fetchtime += CURRENT_TIME-((StgTSO *)bqe)->par.blockedat; +	  break; +	case RBH: +	case FETCH_ME: +	case BLACKHOLE_BQ: +	  ((StgTSO *)bqe)->par.blocktime += CURRENT_TIME-((StgTSO *)bqe)->par.blockedat; +	  break; +	default: +	  barf("{awaken_blocked_queue}Daq Qagh: unexpected closure %p (%s) with blocking queue", +	       node, info_type(node)); +	} +      } +      /* reset block_info.closure field after dumping event */ +      ((StgTSO *)bqe)->block_info.closure = NULL; + +      /* rest of this branch is debugging only */ +      IF_PAR_DEBUG(verbose, +		   fprintf(stderr," TSO %d (%p) [PE %lx] (block_info.closure=%p) (next=%p) ,", +			   ((StgTSO *)bqe)->id, (StgTSO *)bqe, +			   mytid, ((StgTSO *)bqe)->block_info.closure, ((StgTSO *)bqe)->link)); + +      IF_DEBUG(scheduler, +	       if (!RtsFlags.ParFlags.Debug.verbose) +	         belch("-- Waking up thread %ld (%p)",  +		       ((StgTSO *)bqe)->id, (StgTSO *)bqe)); +      break; + +    case BLOCKED_FETCH: +      /* if it's a BLOCKED_FETCH put it on the PendingFetches list */ +      next = bqe->link; +      bqe->link = PendingFetches; +      PendingFetches = bqe; +      // bqe.tso->block_info.closure = NULL; + +      /* rest of this branch is debugging only */ +      IF_PAR_DEBUG(verbose, +		   fprintf(stderr," BLOCKED_FETCH (%p) on node %p [PE %lx] (next=%p) ,", +			   ((StgBlockedFetch *)bqe),  +			   ((StgBlockedFetch *)bqe)->node,  +			   mytid, ((StgBlockedFetch *)bqe)->link)); +      break; + +# if defined(DEBUG) +      /* can ignore this case in a non-debugging setup;  +	 see comments on RBHSave closures above */ +    case CONSTR: +      /* check that the closure is an RBHSave closure */ +      ASSERT(get_itbl((StgClosure *)bqe) == &RBH_Save_0_info || +	     get_itbl((StgClosure *)bqe) == &RBH_Save_1_info || +	     get_itbl((StgClosure *)bqe) == &RBH_Save_2_info); +      break; + +    default: +      barf("{awaken_blocked_queue}Daq Qagh: Unexpected IP (%#lx; %s) in blocking queue at %#lx\n", +	   get_itbl((StgClosure *)bqe), info_type((StgClosure *)bqe),  +	   (StgClosure *)bqe); +# endif +    } +  } +} + +#else /* !GRAN && !PAR */ + +void  +awaken_blocked_queue(StgTSO *q) { awakenBlockedQueue(q); } + +/* +{ +  StgTSO *tso; + +  while (q != END_TSO_QUEUE) { +    ASSERT(get_itbl(q)->type == TSO); +    tso = q; +    q = tso->link; +    push_on_run_queue(tso); // HWL: was: PUSH_ON_RUN_QUEUE(tso); +    //tso->block_info.closure = NULL; +    IF_DEBUG(scheduler, belch("-- Waking up thread %ld (%p)", tso->id, tso)); +  } +} +*/ +#endif /* GRAN */ +#endif /* 0 */ + +//@node Exception Handling Routines, Debugging Routines, Blocking Queue Routines, Main scheduling code +//@subsection Exception Handling Routines + +/* ---------------------------------------------------------------------------     Interrupt execution     - usually called inside a signal handler so it mustn't do anything fancy.    -   -------------------------------------------------------------------------- */ +   ------------------------------------------------------------------------ */  void  interruptStgRts(void) @@ -1260,6 +2153,7 @@ interruptStgRts(void)     This is for use when we raise an exception in another thread, which     may be blocked. +   This has nothing to do with the UnblockThread event in GranSim. -- HWL     -------------------------------------------------------------------------- */  static void @@ -1593,11 +2487,202 @@ raiseAsync(StgTSO *tso, StgClosure *exception)    barf("raiseAsync");  } +//@node Debugging Routines, Index, Exception Handling Routines, Main scheduling code +//@subsection Debugging Routines +  /* ----------------------------------------------------------------------------- -   Debuggery... +   Debugging: why is a thread blocked     -------------------------------------------------------------------------- */  #ifdef DEBUG + +void printThreadBlockage(StgTSO *tso) +{ +  switch (tso->why_blocked) { +  case BlockedOnRead: +    fprintf(stderr,"blocked on read from fd %d", tso->block_info.fd); +    break; +  case BlockedOnWrite: +    fprintf(stderr,"blocked on write to fd %d", tso->block_info.fd); +    break; +  case BlockedOnDelay: +    fprintf(stderr,"blocked on delay of %d ms", tso->block_info.delay); +    break; +  case BlockedOnMVar: +    fprintf(stderr,"blocked on an MVar"); +    break; +  case BlockedOnException: +    fprintf(stderr,"blocked on delivering an exception to thread %d", +	    tso->block_info.tso->id); +    break; +  case BlockedOnBlackHole: +    fprintf(stderr,"blocked on a black hole"); +    break; +  case NotBlocked: +    fprintf(stderr,"not blocked"); +    break; +#if defined(PAR) +  case BlockedOnGA: +    fprintf(stderr,"blocked on global address"); +    break; +#endif +  } +} + +/*  +   Print a whole blocking queue attached to node (debugging only). +*/ +//@cindex print_bq +# if defined(PAR) +void  +print_bq (StgClosure *node) +{ +  StgBlockingQueueElement *bqe; +  StgTSO *tso; +  rtsBool end; + +  fprintf(stderr,"## BQ of closure %p (%s): ", +	  node, info_type(node)); + +  /* should cover all closures that may have a blocking queue */ +  ASSERT(get_itbl(node)->type == BLACKHOLE_BQ || +	 get_itbl(node)->type == FETCH_ME_BQ || +	 get_itbl(node)->type == RBH); +     +  ASSERT(node!=(StgClosure*)NULL);         // sanity check +  /*  +     NB: In a parallel setup a BQ of an RBH must end with an RBH_Save closure; +  */ +  for (bqe = ((StgBlockingQueue*)node)->blocking_queue, end = (bqe==END_BQ_QUEUE); +       !end; // iterate until bqe points to a CONSTR +       end = (get_itbl(bqe)->type == CONSTR) || (bqe->link==END_BQ_QUEUE), bqe = end ? END_BQ_QUEUE : bqe->link) { +    ASSERT(bqe != END_BQ_QUEUE);             // sanity check +    ASSERT(bqe != (StgTSO*)NULL);            // sanity check +    /* types of closures that may appear in a blocking queue */ +    ASSERT(get_itbl(bqe)->type == TSO ||            +	   get_itbl(bqe)->type == BLOCKED_FETCH ||  +	   get_itbl(bqe)->type == CONSTR);  +    /* only BQs of an RBH end with an RBH_Save closure */ +    ASSERT(get_itbl(bqe)->type != CONSTR || get_itbl(node)->type == RBH); + +    switch (get_itbl(bqe)->type) { +    case TSO: +      fprintf(stderr," TSO %d (%x),", +	      ((StgTSO *)bqe)->id, ((StgTSO *)bqe)); +      break; +    case BLOCKED_FETCH: +      fprintf(stderr," BF (node=%p, ga=((%x, %d, %x)),", +	      ((StgBlockedFetch *)bqe)->node,  +	      ((StgBlockedFetch *)bqe)->ga.payload.gc.gtid, +	      ((StgBlockedFetch *)bqe)->ga.payload.gc.slot, +	      ((StgBlockedFetch *)bqe)->ga.weight); +      break; +    case CONSTR: +      fprintf(stderr," %s (IP %p),", +	      (get_itbl(bqe) == &RBH_Save_0_info ? "RBH_Save_0" : +	       get_itbl(bqe) == &RBH_Save_1_info ? "RBH_Save_1" : +	       get_itbl(bqe) == &RBH_Save_2_info ? "RBH_Save_2" : +	       "RBH_Save_?"), get_itbl(bqe)); +      break; +    default: +      barf("Unexpected closure type %s in blocking queue of %p (%s)", +	   info_type(bqe), node, info_type(node)); +      break; +    } +  } /* for */ +  fputc('\n', stderr); +} +# elif defined(GRAN) +void  +print_bq (StgClosure *node) +{ +  StgBlockingQueueElement *bqe; +  StgTSO *tso; +  PEs node_loc, tso_loc; +  rtsBool end; + +  /* should cover all closures that may have a blocking queue */ +  ASSERT(get_itbl(node)->type == BLACKHOLE_BQ || +	 get_itbl(node)->type == FETCH_ME_BQ || +	 get_itbl(node)->type == RBH); +     +  ASSERT(node!=(StgClosure*)NULL);         // sanity check +  node_loc = where_is(node); + +  fprintf(stderr,"## BQ of closure %p (%s) on [PE %d]: ", +	  node, info_type(node), node_loc); + +  /*  +     NB: In a parallel setup a BQ of an RBH must end with an RBH_Save closure; +  */ +  for (bqe = ((StgBlockingQueue*)node)->blocking_queue, end = (bqe==END_BQ_QUEUE); +       !end; // iterate until bqe points to a CONSTR +       end = (get_itbl(bqe)->type == CONSTR) || (bqe->link==END_BQ_QUEUE), bqe = end ? END_BQ_QUEUE : bqe->link) { +    ASSERT(bqe != END_BQ_QUEUE);             // sanity check +    ASSERT(bqe != (StgTSO*)NULL);            // sanity check +    /* types of closures that may appear in a blocking queue */ +    ASSERT(get_itbl(bqe)->type == TSO ||            +	   get_itbl(bqe)->type == CONSTR);  +    /* only BQs of an RBH end with an RBH_Save closure */ +    ASSERT(get_itbl(bqe)->type != CONSTR || get_itbl(node)->type == RBH); + +    tso_loc = where_is((StgClosure *)bqe); +    switch (get_itbl(bqe)->type) { +    case TSO: +      fprintf(stderr," TSO %d (%x) on [PE %d],", +	      ((StgTSO *)bqe)->id, ((StgTSO *)bqe), tso_loc); +      break; +    case CONSTR: +      fprintf(stderr," %s (IP %p),", +	      (get_itbl(bqe) == &RBH_Save_0_info ? "RBH_Save_0" : +	       get_itbl(bqe) == &RBH_Save_1_info ? "RBH_Save_1" : +	       get_itbl(bqe) == &RBH_Save_2_info ? "RBH_Save_2" : +	       "RBH_Save_?"), get_itbl(bqe)); +      break; +    default: +      barf("Unexpected closure type %s in blocking queue of %p (%s)", +	   info_type(bqe), node, info_type(node)); +      break; +    } +  } /* for */ +  fputc('\n', stderr); +} +#else +/*  +   Nice and easy: only TSOs on the blocking queue +*/ +void  +print_bq (StgClosure *node) +{ +  StgTSO *tso; + +  ASSERT(node!=(StgClosure*)NULL);         // sanity check +  for (tso = ((StgBlockingQueue*)node)->blocking_queue; +       tso != END_TSO_QUEUE;  +       tso=tso->link) { +    ASSERT(tso!=(StgTSO*)NULL && tso!=END_TSO_QUEUE);   // sanity check +    ASSERT(get_itbl(tso)->type == TSO);  // guess what, sanity check +    fprintf(stderr," TSO %d (%x),", tso->id, tso); +  } +  fputc('\n', stderr); +} +# endif + +/* A debugging function used all over the place in GranSim and GUM. +   Dummy function in other setups. +*/ +# if !defined(GRAN) && !defined(PAR) +char * +info_type(StgClosure *closure){  +  return "petaQ"; +} + +char * +info_type_by_ip(StgInfoTable *ip){  +  return "petaQ"; +} +#endif +  static void  sched_belch(char *s, ...)  { @@ -1611,4 +2696,33 @@ sched_belch(char *s, ...)    vfprintf(stderr, s, ap);    fprintf(stderr, "\n");  } -#endif + +#endif /* DEBUG */ + +//@node Index,  , Debugging Routines, Main scheduling code +//@subsection Index + +//@index +//* MainRegTable::  @cindex\s-+MainRegTable +//* StgMainThread::  @cindex\s-+StgMainThread +//* awaken_blocked_queue::  @cindex\s-+awaken_blocked_queue +//* blocked_queue_hd::  @cindex\s-+blocked_queue_hd +//* blocked_queue_tl::  @cindex\s-+blocked_queue_tl +//* context_switch::  @cindex\s-+context_switch +//* createThread::  @cindex\s-+createThread +//* free_capabilities::  @cindex\s-+free_capabilities +//* gc_pending_cond::  @cindex\s-+gc_pending_cond +//* initScheduler::  @cindex\s-+initScheduler +//* interrupted::  @cindex\s-+interrupted +//* n_free_capabilities::  @cindex\s-+n_free_capabilities +//* next_thread_id::  @cindex\s-+next_thread_id +//* print_bq::  @cindex\s-+print_bq +//* run_queue_hd::  @cindex\s-+run_queue_hd +//* run_queue_tl::  @cindex\s-+run_queue_tl +//* sched_mutex::  @cindex\s-+sched_mutex +//* schedule::  @cindex\s-+schedule +//* take_off_run_queue::  @cindex\s-+take_off_run_queue +//* task_ids::  @cindex\s-+task_ids +//* term_mutex::  @cindex\s-+term_mutex +//* thread_ready_cond::  @cindex\s-+thread_ready_cond +//@end index diff --git a/ghc/rts/Schedule.h b/ghc/rts/Schedule.h index f559efcb67..1c93099c57 100644 --- a/ghc/rts/Schedule.h +++ b/ghc/rts/Schedule.h @@ -1,13 +1,26 @@  /* ----------------------------------------------------------------------------- - * $Id: Schedule.h,v 1.12 2000/01/12 15:15:18 simonmar Exp $ + * $Id: Schedule.h,v 1.13 2000/01/13 14:34:05 hwloidl Exp $   *   * (c) The GHC Team 1998-1999   *   * Prototypes for functions in Schedule.c    * (RTS internal scheduler interface)   * - * ---------------------------------------------------------------------------*/ + * -------------------------------------------------------------------------*/ +//@menu +//* Scheduler Functions::	 +//* Scheduler Vars and Data Types::   +//* Some convenient macros::	 +//* Index::			 +//@end menu + +//@node Scheduler Functions, Scheduler Vars and Data Types +//@subsection Scheduler Functions + +//@cindex initScheduler +//@cindex exitScheduler +//@cindex startTasks  /* initScheduler(), exitScheduler(), startTasks()   *    * Called from STG :  no @@ -19,6 +32,7 @@ void exitScheduler( void );  void startTasks( void );  #endif +//@cindex awakenBlockedQueue  /* awakenBlockedQueue()   *   * Takes a pointer to the beginning of a blocked TSO queue, and @@ -27,8 +41,15 @@ void startTasks( void );   * Called from STG :  yes   * Locks assumed   :  none   */ +#if defined(GRAN) +# error FixME +#elif defined(PAR) +void awakenBlockedQueue(StgBlockingQueueElement *q, StgClosure *node); +#else  void awakenBlockedQueue(StgTSO *tso); +#endif +//@cindex unblockOne  /* unblockOne()   *   * Takes a pointer to the beginning of a blocked TSO queue, and @@ -37,8 +58,15 @@ void awakenBlockedQueue(StgTSO *tso);   * Called from STG : yes   * Locks assumed   : none   */ +#if defined(GRAN) +# error FixME +#elif defined(PAR) +StgTSO *unblockOne(StgTSO *tso, StgClosure *node); +#else  StgTSO *unblockOne(StgTSO *tso); +#endif +//@cindex raiseAsync  /* raiseAsync()   *   * Raises an exception asynchronously in the specified thread. @@ -48,6 +76,7 @@ StgTSO *unblockOne(StgTSO *tso);   */  void raiseAsync(StgTSO *tso, StgClosure *exception); +//@cindex awaitEvent  /* awaitEvent()   *   * Raises an exception asynchronously in the specified thread. @@ -57,6 +86,33 @@ void raiseAsync(StgTSO *tso, StgClosure *exception);   */  void awaitEvent(rtsBool wait);  /* In Select.c */ +// ToDo: check whether all fcts below are used in the SMP version, too +//@cindex awaken_blocked_queue +#if defined(GRAN) +void    awaken_blocked_queue(StgBlockingQueueElement *q, StgClosure *node); +void    unlink_from_bq(StgTSO* tso, StgClosure* node); +void    initThread(StgTSO *tso, nat stack_size, StgInt pri); +#elif defined(PAR) +nat     run_queue_len(void); +void    awaken_blocked_queue(StgBlockingQueueElement *q, StgClosure *node); +void    initThread(StgTSO *tso, nat stack_size); +#else +char   *info_type(StgClosure *closure);    // dummy +char   *info_type_by_ip(StgInfoTable *ip); // dummy +void    awaken_blocked_queue(StgTSO *q); +void    initThread(StgTSO *tso, nat stack_size); +#endif + +// debugging only +#ifdef DEBUG +extern void printThreadBlockage(StgTSO *tso); +#endif +void    print_bq (StgClosure *node); + +//@node Scheduler Vars and Data Types, Some convenient macros, Scheduler Functions +//@subsection Scheduler Vars and Data Types + +//@cindex context_switch  /* Context switch flag.   * Locks required  : sched_mutex   */ @@ -65,6 +121,7 @@ extern rtsBool interrupted;  extern  nat ticks_since_select; +//@cindex Capability  /* Capability type   */  typedef StgRegTable Capability; @@ -85,16 +142,16 @@ extern Capability MainRegTable;  extern  StgTSO *run_queue_hd, *run_queue_tl;  extern  StgTSO *blocked_queue_hd, *blocked_queue_tl; -#ifdef DEBUG -extern void printThreadBlockage(StgTSO *tso); -#endif -  #ifdef SMP +//@cindex sched_mutex +//@cindex thread_ready_cond +//@cindex gc_pending_cond  extern pthread_mutex_t sched_mutex;  extern pthread_cond_t  thread_ready_cond;  extern pthread_cond_t  gc_pending_cond;  #endif +//@cindex task_info  #ifdef SMP  typedef struct {    pthread_t id; @@ -108,9 +165,19 @@ typedef struct {  extern task_info *task_ids;  #endif +#if !defined(GRAN) +extern  StgTSO *run_queue_hd, *run_queue_tl; +extern  StgTSO *blocked_queue_hd, *blocked_queue_tl; +#endif +  /* Needed by Hugs.   */  void interruptStgRts ( void ); +// ?? needed -- HWL +void raiseAsync(StgTSO *tso, StgClosure *exception); + +//@node Some convenient macros, Index, Scheduler Vars and Data Types +//@subsection Some convenient macros  /* -----------------------------------------------------------------------------   * Some convenient macros... @@ -119,6 +186,7 @@ void interruptStgRts ( void );  #define END_TSO_QUEUE  ((StgTSO *)(void*)&END_TSO_QUEUE_closure)  #define END_CAF_LIST   ((StgCAF *)(void*)&END_TSO_QUEUE_closure) +//@cindex APPEND_TO_RUN_QUEUE  /* Add a thread to the end of the run queue.   * NOTE: tso->link should be END_TSO_QUEUE before calling this macro.   */ @@ -131,6 +199,7 @@ void interruptStgRts ( void );      }						\      run_queue_tl = tso; +//@cindex PUSH_ON_RUN_QUEUE  /* Push a thread on the beginning of the run queue.  Used for   * newly awakened threads, so they get run as soon as possible.   */ @@ -140,7 +209,8 @@ void interruptStgRts ( void );      if (run_queue_tl == END_TSO_QUEUE) {	\        run_queue_tl = tso;			\      } -     + +//@cindex POP_RUN_QUEUE      /* Pop the first thread off the runnable queue.   */  #define POP_RUN_QUEUE()				\ @@ -155,6 +225,7 @@ void interruptStgRts ( void );      t;						\    }) +//@cindex APPEND_TO_BLOCKED_QUEUE  /* Add a thread to the end of the blocked queue.   */  #define APPEND_TO_BLOCKED_QUEUE(tso)		\ @@ -166,6 +237,7 @@ void interruptStgRts ( void );      }						\      blocked_queue_tl = tso; +//@cindex THREAD_RUNNABLE  /* Signal that a runnable thread has become available, in   * case there are any waiting tasks to execute it.   */ @@ -179,3 +251,27 @@ void interruptStgRts ( void );  #define THREAD_RUNNABLE()  /* nothing */  #endif +//@node Index,  , Some convenient macros +//@subsection Index + +//@index +//* APPEND_TO_BLOCKED_QUEUE::  @cindex\s-+APPEND_TO_BLOCKED_QUEUE +//* APPEND_TO_RUN_QUEUE::  @cindex\s-+APPEND_TO_RUN_QUEUE +//* Capability::  @cindex\s-+Capability +//* POP_RUN_QUEUE    ::  @cindex\s-+POP_RUN_QUEUE     +//* PUSH_ON_RUN_QUEUE::  @cindex\s-+PUSH_ON_RUN_QUEUE +//* THREAD_RUNNABLE::  @cindex\s-+THREAD_RUNNABLE +//* awaitEvent::  @cindex\s-+awaitEvent +//* awakenBlockedQueue::  @cindex\s-+awakenBlockedQueue +//* awaken_blocked_queue::  @cindex\s-+awaken_blocked_queue +//* context_switch::  @cindex\s-+context_switch +//* exitScheduler::  @cindex\s-+exitScheduler +//* gc_pending_cond::  @cindex\s-+gc_pending_cond +//* initScheduler::  @cindex\s-+initScheduler +//* raiseAsync::  @cindex\s-+raiseAsync +//* sched_mutex::  @cindex\s-+sched_mutex +//* startTasks::  @cindex\s-+startTasks +//* task_info::  @cindex\s-+task_info +//* thread_ready_cond::  @cindex\s-+thread_ready_cond +//* unblockOne::  @cindex\s-+unblockOne +//@end index diff --git a/ghc/rts/StgMiscClosures.hc b/ghc/rts/StgMiscClosures.hc index 8c436d8b04..4809be7348 100644 --- a/ghc/rts/StgMiscClosures.hc +++ b/ghc/rts/StgMiscClosures.hc @@ -1,5 +1,5 @@  /* ----------------------------------------------------------------------------- - * $Id: StgMiscClosures.hc,v 1.30 1999/11/30 11:43:26 simonmar Exp $ + * $Id: StgMiscClosures.hc,v 1.31 2000/01/13 14:34:05 hwloidl Exp $   *   * (c) The GHC Team, 1998-1999   * @@ -9,12 +9,17 @@  #include "Rts.h"  #include "RtsUtils.h" +#include "RtsFlags.h"  #include "StgMiscClosures.h"  #include "HeapStackCheck.h"   /* for stg_gen_yield */  #include "Storage.h"  #include "StoragePriv.h"  #include "ProfRts.h"  #include "SMP.h" +#if defined(GRAN) || defined(PAR) +# include "GranSimRts.h"      /* for DumpRawGranEvent */ +# include "StgRun.h"	/* for StgReturn and register saving */ +#endif  #ifdef HAVE_STDIO_H  #include <stdio.h> @@ -25,6 +30,20 @@   */  #define DUMP_ERRMSG(msg) STGCALL2(fprintf,stderr,msg) +/* +  Template for the entry code of non-enterable closures. +*/ + +#define NON_ENTERABLE_ENTRY_CODE(type)					\ +STGFUN(type##_entry)							\ +{									\ +  FB_									\ +    DUMP_ERRMSG(#type " object entered!\n");                            \ +    STGCALL1(raiseError, errorHandler);					\ +    stg_exit(EXIT_FAILURE); /* not executed */				\ +  FE_									\ +} +  /* -----------------------------------------------------------------------------     Entry code for an indirection. @@ -185,6 +204,11 @@ INFO_TABLE(BLACKHOLE_info, BLACKHOLE_entry,0,2,BLACKHOLE,,EF_,0,0);  STGFUN(BLACKHOLE_entry)  {    FB_ +#if defined(GRAN) +    /* Before overwriting TSO_LINK */ +    STGCALL3(GranSimBlock,CurrentTSO,CurrentProc,(StgClosure *)R1.p /*Node*/); +#endif +  #ifdef SMP      CMPXCHG(R1.cl->header.info, &BLACKHOLE_info, &WHITEHOLE_info);  #endif @@ -192,15 +216,43 @@ STGFUN(BLACKHOLE_entry)      TICK_ENT_BH();      /* Put ourselves on the blocking queue for this black hole */ -    CurrentTSO->link = (StgTSO *)&END_TSO_QUEUE_closure; +#if defined(GRAN) || defined(PAR) +    /* in fact, only difference is the type of the end-of-queue marker! */ +    CurrentTSO->link = END_BQ_QUEUE; +    ((StgBlockingQueue *)R1.p)->blocking_queue = (StgBlockingQueueElement *)CurrentTSO; +#else +    CurrentTSO->link = END_TSO_QUEUE;      ((StgBlockingQueue *)R1.p)->blocking_queue = CurrentTSO; +#endif +    /* jot down why and on what closure we are blocked */      CurrentTSO->why_blocked = BlockedOnBlackHole;      CurrentTSO->block_info.closure = R1.cl; +    /* closure is mutable since something has just been added to its BQ */      recordMutable((StgMutClosure *)R1.cl);      /* Change the BLACKHOLE into a BLACKHOLE_BQ */      ((StgBlockingQueue *)R1.p)->header.info = &BLACKHOLE_BQ_info; + +#if defined(PAR) +    /* Save the Thread State here, before calling RTS routines below! */ +    SAVE_THREAD_STATE(1); + +    /* if collecting stats update the execution time etc */ +    if (RtsFlags.ParFlags.ParStats.Full) { +      /* Note that CURRENT_TIME may perform an unsafe call */ +      //rtsTime now = CURRENT_TIME; /* Now */ +      CurrentTSO->par.exectime += CURRENT_TIME - CurrentTSO->par.blockedat; +      CurrentTSO->par.blockcount++; +      CurrentTSO->par.blockedat = CURRENT_TIME; +      DumpRawGranEvent(CURRENT_PROC, thisPE, +		       GR_BLOCK, CurrentTSO, (StgClosure *)R1.p, 0); +    } + +    THREAD_RETURN(1);  /* back to the scheduler */   +#else      /* stg_gen_block is too heavyweight, use a specialised one */      BLOCK_NP(1); +#endif +    FE_  } @@ -208,6 +260,11 @@ INFO_TABLE(BLACKHOLE_BQ_info, BLACKHOLE_BQ_entry,1,1,BLACKHOLE_BQ,,EF_,0,0);  STGFUN(BLACKHOLE_BQ_entry)  {    FB_ +#if defined(GRAN) +    /* Before overwriting TSO_LINK */ +    STGCALL3(GranSimBlock,CurrentTSO,CurrentProc,(StgClosure *)R1.p /*Node*/); +#endif +  #ifdef SMP      CMPXCHG(R1.cl->header.info, &BLACKHOLE_BQ_info, &WHITEHOLE_info);  #endif @@ -215,42 +272,156 @@ STGFUN(BLACKHOLE_BQ_entry)      TICK_ENT_BH();      /* Put ourselves on the blocking queue for this black hole */ -    CurrentTSO->why_blocked = BlockedOnBlackHole; -    CurrentTSO->block_info.closure = R1.cl;      CurrentTSO->link = ((StgBlockingQueue *)R1.p)->blocking_queue;      ((StgBlockingQueue *)R1.p)->blocking_queue = CurrentTSO; +    /* jot down why and on what closure we are blocked */ +    CurrentTSO->why_blocked = BlockedOnBlackHole; +    CurrentTSO->block_info.closure = R1.cl;  #ifdef SMP      ((StgBlockingQueue *)R1.p)->header.info = &BLACKHOLE_BQ_info;  #endif +#if defined(PAR) +    /* Save the Thread State here, before calling RTS routines below! */ +    SAVE_THREAD_STATE(1); + +    /* if collecting stats update the execution time etc */ +    if (RtsFlags.ParFlags.ParStats.Full) { +      /* Note that CURRENT_TIME may perform an unsafe call */ +      //rtsTime now = CURRENT_TIME; /* Now */ +      CurrentTSO->par.exectime += CURRENT_TIME - CurrentTSO->par.blockedat; +      CurrentTSO->par.blockcount++; +      CurrentTSO->par.blockedat = CURRENT_TIME; +      DumpRawGranEvent(CURRENT_PROC, thisPE, +		       GR_BLOCK, CurrentTSO, (StgClosure *)R1.p, 0); +    } + +    THREAD_RETURN(1);  /* back to the scheduler */   +#else      /* stg_gen_block is too heavyweight, use a specialised one */      BLOCK_NP(1); +#endif +  FE_ +} + +/* +   Revertible black holes are needed in the parallel world, to handle +   negative acknowledgements of messages containing updatable closures. +   The idea is that when the original message is transmitted, the closure +   is turned into a revertible black hole...an object which acts like a +   black hole when local threads try to enter it, but which can be reverted +   back to the original closure if necessary. + +   It's actually a lot like a blocking queue (BQ) entry, because revertible +   black holes are initially set up with an empty blocking queue. +*/ + +#if defined(PAR) || defined(GRAN) + +INFO_TABLE(RBH_info, RBH_entry,1,1,RBH,,EF_,0,0); +STGFUN(RBH_entry) +{ +  FB_ +# if defined(GRAN) +    /* mainly statistics gathering for GranSim simulation */ +    STGCALL3(GranSimBlock,CurrentTSO,CurrentProc,(StgClosure *)R1.p /*Node*/); +# endif + +    /* exactly the same as a BLACKHOLE_BQ_entry -- HWL */ +    /* Put ourselves on the blocking queue for this black hole */ +    CurrentTSO->link = ((StgBlockingQueue *)R1.p)->blocking_queue; +    ((StgBlockingQueue *)R1.p)->blocking_queue = CurrentTSO; +    /* jot down why and on what closure we are blocked */ +    CurrentTSO->why_blocked = BlockedOnBlackHole; +    CurrentTSO->block_info.closure = R1.cl; + +#if defined(PAR) +    /* Save the Thread State here, before calling RTS routines below! */ +    SAVE_THREAD_STATE(1); + +    /* if collecting stats update the execution time etc */ +    if (RtsFlags.ParFlags.ParStats.Full) { +      /* Note that CURRENT_TIME may perform an unsafe call */ +      //rtsTime now = CURRENT_TIME; /* Now */ +      CurrentTSO->par.exectime += CURRENT_TIME - CurrentTSO->par.blockedat; +      CurrentTSO->par.blockcount++; +      CurrentTSO->par.blockedat = CURRENT_TIME; +      DumpRawGranEvent(CURRENT_PROC, thisPE, +		       GR_BLOCK, CurrentTSO, (StgClosure *)R1.p, 0); +    } + +    THREAD_RETURN(1);  /* back to the scheduler */   +#else +    /* saves thread state and leaves thread in ThreadEnterGHC state; */ +    /* stg_gen_block is too heavyweight, use a specialised one */ +    BLOCK_NP(1);  +#endif +    FE_  } +INFO_TABLE(RBH_Save_0_info, RBH_Save_0_entry,0,2,CONSTR,,EF_,0,0); +NON_ENTERABLE_ENTRY_CODE(RBH_Save_0); + +INFO_TABLE(RBH_Save_1_info, RBH_Save_1_entry,1,1,CONSTR,,EF_,0,0); +NON_ENTERABLE_ENTRY_CODE(RBH_Save_1); + +INFO_TABLE(RBH_Save_2_info, RBH_Save_2_entry,2,0,CONSTR,,EF_,0,0); +NON_ENTERABLE_ENTRY_CODE(RBH_Save_2); +#endif /* defined(PAR) || defined(GRAN) */ +  /* identical to BLACKHOLEs except for the infotag */  INFO_TABLE(CAF_BLACKHOLE_info, CAF_BLACKHOLE_entry,0,2,CAF_BLACKHOLE,,EF_,0,0);  STGFUN(CAF_BLACKHOLE_entry)  {    FB_ +#if defined(GRAN) +    /* mainly statistics gathering for GranSim simulation */ +    STGCALL3(GranSimBlock,CurrentTSO,CurrentProc,(StgClosure *)R1.p /*Node*/); +#endif +  #ifdef SMP      CMPXCHG(R1.cl->header.info, &CAF_BLACKHOLE_info, &WHITEHOLE_info); +#endif      TICK_ENT_BH();      /* Put ourselves on the blocking queue for this black hole */ -    CurrentTSO->link = (StgTSO *)&END_TSO_QUEUE_closure; +#if defined(GRAN) || defined(PAR) +    /* in fact, only difference is the type of the end-of-queue marker! */ +    CurrentTSO->link = END_BQ_QUEUE; +    ((StgBlockingQueue *)R1.p)->blocking_queue = (StgBlockingQueueElement *)CurrentTSO; +#else +    CurrentTSO->link = END_TSO_QUEUE;      ((StgBlockingQueue *)R1.p)->blocking_queue = CurrentTSO; +#endif +    /* jot down why and on what closure we are blocked */      CurrentTSO->why_blocked = BlockedOnBlackHole;      CurrentTSO->block_info.closure = R1.cl; +    /* closure is mutable since something has just been added to its BQ */      recordMutable((StgMutClosure *)R1.cl);      /* Change the CAF_BLACKHOLE into a BLACKHOLE_BQ */      ((StgBlockingQueue *)R1.p)->header.info = &BLACKHOLE_BQ_info; -    /* stg_gen_block is too heavyweight, use a specialised one */ -    BLOCK_NP(1); +#if defined(PAR) +    /* Save the Thread State here, before calling RTS routines below! */ +    SAVE_THREAD_STATE(1); + +    /* if collecting stats update the execution time etc */ +    if (RtsFlags.ParFlags.ParStats.Full) { +      /* Note that CURRENT_TIME may perform an unsafe call */ +      //rtsTime now = CURRENT_TIME; /* Now */ +      CurrentTSO->par.exectime += CURRENT_TIME - CurrentTSO->par.blockedat; +      CurrentTSO->par.blockcount++; +      CurrentTSO->par.blockedat = CURRENT_TIME; +      DumpRawGranEvent(CURRENT_PROC, thisPE, +		       GR_BLOCK, CurrentTSO, (StgClosure *)R1.p, 0); +    } + +    THREAD_RETURN(1);  /* back to the scheduler */    #else -    JMP_(BLACKHOLE_entry); +    /* stg_gen_block is too heavyweight, use a specialised one */ +    BLOCK_NP(1);  #endif    FE_ @@ -301,17 +472,9 @@ EF_(BCO_entry) {  /* -----------------------------------------------------------------------------     Some static info tables for things that don't get entered, and     therefore don't need entry code (i.e. boxed but unpointed objects) +   NON_ENTERABLE_ENTRY_CODE now defined at the beginning of the file     -------------------------------------------------------------------------- */ -#define NON_ENTERABLE_ENTRY_CODE(type)					\ -STGFUN(type##_entry)							\ -{									\ -  FB_									\ -    DUMP_ERRMSG(#type " object entered!\n");                            \ -    STGCALL1(shutdownHaskellAndExit, EXIT_FAILURE);			\ -  FE_									\ -} -  INFO_TABLE(TSO_info, TSO_entry, 0,0,TSO,,EF_,0,0);  NON_ENTERABLE_ENTRY_CODE(TSO); diff --git a/ghc/rts/Storage.h b/ghc/rts/Storage.h index c996edf858..17076bf852 100644 --- a/ghc/rts/Storage.h +++ b/ghc/rts/Storage.h @@ -1,5 +1,5 @@  /* ----------------------------------------------------------------------------- - * $Id: Storage.h,v 1.13 1999/11/11 11:49:26 simonmar Exp $ + * $Id: Storage.h,v 1.14 2000/01/13 14:34:05 hwloidl Exp $   *   * (c) The GHC Team, 1998-1999   * @@ -178,5 +178,10 @@ updateWithPermIndirection(const StgInfoTable *info, StgClosure *p1, StgClosure *  extern StgCAF* enteredCAFs; +#if defined(DEBUG) +void printMutOnceList(generation *gen); +void printMutableList(generation *gen); +#endif DEBUG +  #endif STORAGE_H diff --git a/ghc/rts/Updates.hc b/ghc/rts/Updates.hc index 53f24763aa..38e69e826d 100644 --- a/ghc/rts/Updates.hc +++ b/ghc/rts/Updates.hc @@ -1,5 +1,5 @@  /* ----------------------------------------------------------------------------- - * $Id: Updates.hc,v 1.24 1999/12/01 14:34:39 simonmar Exp $ + * $Id: Updates.hc,v 1.25 2000/01/13 14:34:05 hwloidl Exp $   *   * (c) The GHC Team, 1998-1999   * @@ -13,6 +13,9 @@  #include "HeapStackCheck.h"  #include "Storage.h"  #include "ProfRts.h" +#if defined(GRAN) || defined(PAR) +# include "FetchMe.h" +#endif  /*    The update frame return address must be *polymorphic*, that means @@ -245,11 +248,6 @@ EXTFUN(stg_update_PAP)       */      Fun = R1.cl; -#if defined(GRAN_COUNT) -#error Fixme. -      ++nPAPs; -#endif -      /* Just copy the whole block of stack between the stack pointer       * and the update frame pointer.       */ diff --git a/ghc/rts/parallel/0Hash.c b/ghc/rts/parallel/0Hash.c new file mode 100644 index 0000000000..56e6646495 --- /dev/null +++ b/ghc/rts/parallel/0Hash.c @@ -0,0 +1,321 @@ +/*----------------------------------------------------------------------------- + * $Id: 0Hash.c,v 1.2 2000/01/13 14:34:06 hwloidl Exp $ + * + * (c) The AQUA Project, Glasgow University, 1995-1998 + * (c) The GHC Team, 1999 + * + * Dynamically expanding linear hash tables, as described in + * Per-\AAke Larson, ``Dynamic Hash Tables,'' CACM 31(4), April 1988, + * pp. 446 -- 457. + * -------------------------------------------------------------------------- */ + +/*  +   Replaced with ghc/rts/Hash.c in the new RTS +*/ + +#if 0 + +#include "Rts.h" +#include "Hash.h" +#include "RtsUtils.h" + +#define HSEGSIZE    1024    /* Size of a single hash table segment */ +			    /* Also the minimum size of a hash table */ +#define HDIRSIZE    1024    /* Size of the segment directory */ +			    /* Maximum hash table size is HSEGSIZE * HDIRSIZE */ +#define HLOAD	    5	    /* Maximum average load of a single hash bucket */ + +#define HCHUNK	    (1024 * sizeof(W_) / sizeof(HashList)) +			    /* Number of HashList cells to allocate in one go */ + + +/* Linked list of (key, data) pairs for separate chaining */ +struct hashlist { +    StgWord key; +    void *data; +    struct hashlist *next;  /* Next cell in bucket chain (same hash value) */ +}; + +typedef struct hashlist HashList; + +struct hashtable { +    int split;		    /* Next bucket to split when expanding */ +    int max;		    /* Max bucket of smaller table */ +    int mask1;		    /* Mask for doing the mod of h_1 (smaller table) */ +    int mask2;		    /* Mask for doing the mod of h_2 (larger table) */ +    int kcount;		    /* Number of keys */ +    int bcount;		    /* Number of buckets */ +    HashList **dir[HDIRSIZE];	/* Directory of segments */ +}; + +/* ----------------------------------------------------------------------------- + * Hash first using the smaller table.  If the bucket is less than the + * next bucket to be split, re-hash using the larger table. + * -------------------------------------------------------------------------- */ + +static int +hash(HashTable *table, W_ key) +{ +    int bucket; + +    /* Strip the boring zero bits */ +    key /= sizeof(StgWord); + +    /* Mod the size of the hash table (a power of 2) */ +    bucket = key & table->mask1; + +    if (bucket < table->split) { +	/* Mod the size of the expanded hash table (also a power of 2) */ +	bucket = key & table->mask2; +    } +    return bucket; +} + +/* ----------------------------------------------------------------------------- + * Allocate a new segment of the dynamically growing hash table. + * -------------------------------------------------------------------------- */ + +static void +allocSegment(HashTable *table, int segment) +{ +    table->dir[segment] = stgMallocBytes(HSEGSIZE * sizeof(HashList *),  +					 "allocSegment"); +} + + +/* ----------------------------------------------------------------------------- + * Expand the larger hash table by one bucket, and split one bucket + * from the smaller table into two parts.  Only the bucket referenced + * by @table->split@ is affected by the expansion. + * -------------------------------------------------------------------------- */ + +static void +expand(HashTable *table) +{ +    int oldsegment; +    int oldindex; +    int newbucket; +    int newsegment; +    int newindex; +    HashList *hl; +    HashList *next; +    HashList *old, *new; + +    if (table->split + table->max >= HDIRSIZE * HSEGSIZE) +	/* Wow!  That's big.  Too big, so don't expand. */ +	return; + +    /* Calculate indices of bucket to split */ +    oldsegment = table->split / HSEGSIZE; +    oldindex = table->split % HSEGSIZE; + +    newbucket = table->max + table->split; + +    /* And the indices of the new bucket */ +    newsegment = newbucket / HSEGSIZE; +    newindex = newbucket % HSEGSIZE; + +    if (newindex == 0) +	allocSegment(table, newsegment); + +    if (++table->split == table->max) { +	table->split = 0; +	table->max *= 2; +	table->mask1 = table->mask2; +	table->mask2 = table->mask2 << 1 | 1; +    } +    table->bcount++; + +    /* Split the bucket, paying no attention to the original order */ + +    old = new = NULL; +    for (hl = table->dir[oldsegment][oldindex]; hl != NULL; hl = next) { +	next = hl->next; +	if (hash(table, hl->key) == newbucket) { +	    hl->next = new; +	    new = hl; +	} else { +	    hl->next = old; +	    old = hl; +	} +    } +    table->dir[oldsegment][oldindex] = old; +    table->dir[newsegment][newindex] = new; + +    return; +} + +void * +lookupHashTable(HashTable *table, StgWord key) +{ +    int bucket; +    int segment; +    int index; +    HashList *hl; + +    bucket = hash(table, key); +    segment = bucket / HSEGSIZE; +    index = bucket % HSEGSIZE; + +    for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) +	if (hl->key == key) +	    return hl->data; + +    /* It's not there */ +    return NULL; +} + +/* ----------------------------------------------------------------------------- + * We allocate the hashlist cells in large chunks to cut down on malloc + * overhead.  Although we keep a free list of hashlist cells, we make + * no effort to actually return the space to the malloc arena. + * -------------------------------------------------------------------------- */ + +static HashList *freeList = NULL; + +static HashList * +allocHashList(void) +{ +    HashList *hl, *p; + +    if ((hl = freeList) != NULL) { +	freeList = hl->next; +    } else { +        hl = stgMallocBytes(HCHUNK * sizeof(HashList), "allocHashList"); + +	freeList = hl + 1; +	for (p = freeList; p < hl + HCHUNK - 1; p++) +	    p->next = p + 1; +	p->next = NULL; +    } +    return hl; +} + +static void +freeHashList(HashList *hl) +{ +    hl->next = freeList; +    freeList = hl; +} + +void +insertHashTable(HashTable *table, StgWord key, void *data) +{ +    int bucket; +    int segment; +    int index; +    HashList *hl; + +    /* We want no duplicates */ +    ASSERT(lookupHashTable(table, key) == NULL); +     +    /* When the average load gets too high, we expand the table */ +    if (++table->kcount >= HLOAD * table->bcount) +	expand(table); + +    bucket = hash(table, key); +    segment = bucket / HSEGSIZE; +    index = bucket % HSEGSIZE; + +    hl = allocHashList(); + +    hl->key = key; +    hl->data = data; +    hl->next = table->dir[segment][index]; +    table->dir[segment][index] = hl; + +} + +void * +removeHashTable(HashTable *table, StgWord key, void *data) +{ +    int bucket; +    int segment; +    int index; +    HashList *hl; +    HashList *prev = NULL; + +    bucket = hash(table, key); +    segment = bucket / HSEGSIZE; +    index = bucket % HSEGSIZE; + +    for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) { +	if (hl->key == key && (data == NULL || hl->data == data)) { +	    if (prev == NULL) +		table->dir[segment][index] = hl->next; +	    else +		prev->next = hl->next; +	    table->kcount--; +	    return hl->data; +	} +	prev = hl; +    } + +    /* It's not there */ +    ASSERT(data == NULL); +    return NULL; +} + +/* ----------------------------------------------------------------------------- + * When we free a hash table, we are also good enough to free the + * data part of each (key, data) pair, as long as our caller can tell + * us how to do it. + * -------------------------------------------------------------------------- */ + +void +freeHashTable(HashTable *table, void (*freeDataFun)(void *) ) +{ +    long segment; +    long index; +    HashList *hl; +    HashList *next; + +    /* The last bucket with something in it is table->max + table->split - 1 */ +    segment = (table->max + table->split - 1) / HSEGSIZE; +    index = (table->max + table->split - 1) % HSEGSIZE; + +    while (segment >= 0) { +	while (index >= 0) { +	    for (hl = table->dir[segment][index]; hl != NULL; hl = next) { +		next = hl->next; +		if (freeDataFun != NULL) +		    (*freeDataFun)(hl->data); +		freeHashList(hl); +	    } +	    index--; +	} +	free(table->dir[segment]); +	segment--; +	index = HSEGSIZE - 1; +    } +    free(table); +} + +/* ----------------------------------------------------------------------------- + * When we initialize a hash table, we set up the first segment as well, + * initializing all of the first segment's hash buckets to NULL. + * -------------------------------------------------------------------------- */ + +HashTable * +allocHashTable(void) +{ +    HashTable *table; +    HashList **hb; + +    table = stgMallocBytes(sizeof(HashTable),"allocHashTable"); + +    allocSegment(table, 0); + +    for (hb = table->dir[0]; hb < table->dir[0] + HSEGSIZE; hb++) +	*hb = NULL; + +    table->split = 0; +    table->max = HSEGSIZE; +    table->mask1 = HSEGSIZE - 1; +    table->mask2 = 2 * HSEGSIZE - 1; +    table->kcount = 0; +    table->bcount = HSEGSIZE; + +    return table; +} +#endif diff --git a/ghc/rts/parallel/0Parallel.h b/ghc/rts/parallel/0Parallel.h new file mode 100644 index 0000000000..d52bf00fc2 --- /dev/null +++ b/ghc/rts/parallel/0Parallel.h @@ -0,0 +1,414 @@ +/* +  Time-stamp: <Mon Oct 04 1999 14:50:28 Stardate: [-30]3692.88 hwloidl> +  +  Definitions for parallel machines. + +This section contains definitions applicable only to programs compiled +to run on a parallel machine, i.e. on GUM. Some of these definitions +are also used when simulating parallel execution, i.e. on GranSim. +  */ + +/* +  ToDo: Check the PAR specfic part of this file  +        Move stuff into Closures.h and ClosureMacros.h  +	Clean-up GRAN specific code +  -- HWL +  */ + +#ifndef PARALLEL_H +#define PARALLEL_H + +#if defined(PAR) || defined(GRAN)        /* whole file */ + +#include "Rts.h" +#include "GranSim.h" +//#include "ClosureTypes.h" + +//@menu +//* Basic definitions::		 +//* Externs and types::		 +//* Dummy defs::		 +//* Par specific fixed headers::   +//* Parallel only heap objects::   +//* Packing definitions::	 +//* End of File::		 +//@end menu +//*/ + +//@node Basic definitions, Externs and types +//@section Basic definitions + +/* SET_PAR_HDR and SET_STATIC_PAR_HDR now live in ClosureMacros.h */ + +/* Needed for dumping routines */ +#if defined(PAR) +# define TIME                      ullong +# define CURRENT_TIME              msTime() +# define TIME_ON_PROC(p)           msTime() +# define CURRENT_PROC              thisPE +# define BINARY_STATS              RtsFlags.ParFlags.granSimStats_Binary +#elif defined(GRAN) +# define TIME                      rtsTime +# define CURRENT_TIME              CurrentTime[CurrentProc] +# define TIME_ON_PROC(p)           CurrentTime[p] +# define CURRENT_PROC              CurrentProc +# define BINARY_STATS              RtsFlags.GranFlags.granSimStats_Binary +#endif + +#if defined(PAR) +#  define MAX_PES	256		/* Maximum number of processors */ +	/* MAX_PES is enforced by SysMan, which does not +	   allow more than this many "processors". +	   This is important because PackGA [GlobAddr.lc] +	   **assumes** that a PE# can fit in 8+ bits. +	*/ +#endif + +//@node Externs and types, Dummy defs, Basic definitions +//@section Externs and types + +#if defined(PAR) +/* GUM: one spark queue on each PE, and each PE sees only its own spark queue */ +extern rtsSparkQ pending_sparks_hd; +extern rtsSparkQ pending_sparks_tl; +#elif defined(GRAN) +/* GranSim: a globally visible array of spark queues */ +extern rtsSparkQ pending_sparks_hds[]; +extern rtsSparkQ pending_sparks_tls[]; +#endif +extern unsigned int /* nat */ spark_queue_len(PEs proc); + +extern StgInt SparksAvail;     /* How many sparks are available */ + +/* prototypes of spark routines */ +/* ToDo: check whether all have to be visible -- HWL */ +#if defined(GRAN) +rtsSpark *newSpark(StgClosure *node, StgInt name, StgInt gran_info, StgInt size_info, StgInt par_info, StgInt local); +void disposeSpark(rtsSpark *spark); +void disposeSparkQ(rtsSparkQ spark); +void add_to_spark_queue(rtsSpark *spark); +void delete_from_spark_queue (rtsSpark *spark); +#endif + +#define STATS_FILENAME_MAXLEN	128 + +/* Where to write the log file */ +//extern FILE *gr_file; +extern char gr_filename[STATS_FILENAME_MAXLEN]; + +#if defined(GRAN) +int init_gr_simulation(char *rts_argv[], int rts_argc, char *prog_argv[], int prog_argc); +void end_gr_simulation(void); +#endif  + +#if defined(PAR) +extern I_ do_sp_profile; + +extern P_ PendingFetches; +extern GLOBAL_TASK_ID *PEs; + +extern rtsBool IAmMainThread, GlobalStopPending; +extern rtsBool fishing; +extern GLOBAL_TASK_ID SysManTask; +extern int seed;			/*pseudo-random-number generator seed:*/ +					/*Initialised in ParInit*/ +extern I_ threadId;                     /*Number of Threads that have existed on a PE*/ +extern GLOBAL_TASK_ID mytid; + +extern int  nPEs; + +extern rtsBool InGlobalGC;  	/* Are we in the midst of performing global GC */ + +extern HashTable *pGAtoGALAtable; +extern HashTable *LAtoGALAtable; +extern GALA *freeIndirections; +extern GALA *liveIndirections; +extern GALA *freeGALAList; +extern GALA *liveRemoteGAs; +extern int thisPE; + +void RunParallelSystem (StgPtr program_closure); +void initParallelSystem(); +void SynchroniseSystem(); + +void registerTask (GLOBAL_TASK_ID gtid); +globalAddr *LAGAlookup (P_ addr); +P_ GALAlookup (globalAddr *ga); +globalAddr *MakeGlobal (P_ addr, rtsBool preferred); +globalAddr *setRemoteGA (P_ addr, globalAddr *ga, rtsBool preferred); +void splitWeight (globalAddr *to, globalAddr *from); +globalAddr *addWeight (globalAddr *ga); +void initGAtables(); +W_ taskIDtoPE (GLOBAL_TASK_ID gtid); +void RebuildLAGAtable(); + +void *lookupHashTable (HashTable *table, StgWord key); +void insertHashTable (HashTable *table, StgWord key, void *data); +void freeHashTable (HashTable *table, void (*freeDataFun) ((void *data))); +HashTable *allocHashTable(); +void *removeHashTable (HashTable *table, StgWord key, void *data); +#endif /* PAR */ + +/* Interface for dumping routines (i.e. writing to log file) */ +void DumpGranEvent(GranEventType name, StgTSO *tso); +void DumpRawGranEvent(PEs proc, PEs p, GranEventType name,  + 	              StgTSO *tso, StgClosure *node, StgInt sparkname, StgInt len); +//void DumpEndEvent(PEs proc, StgTSO *tso, rtsBool mandatory_thread); + +//@node Dummy defs, Par specific fixed headers, Externs and types +//@section Dummy defs + +/* +Get this out of the way.  These are all null definitions. +*/ + + +//#  define GA_HDR_SIZE			0  +//#  define GA(closure)	        	/*nothing */  +   +//#  define SET_GA(closure,ga)		/* nothing */  +//#  define SET_STATIC_GA(closure)	/* nothing */  +//#  define SET_GRAN_HDR(closure,pe)      /* nothing */  +//#  define SET_STATIC_PROCS(closure)	/* nothing */  +   +//#  define SET_TASK_ACTIVITY(act)	/* nothing */  + +#if defined(GRAN) + +#  define GA_HDR_SIZE			1 + +#  define PROCS_HDR_POSN		PAR_HDR_POSN +#  define PROCS_HDR_SIZE		1 + +/* Accessing components of the field */ +#  define PROCS(closure)	        ((closure)->header.gran.procs) +/* SET_PROCS is now SET_GRAN_HEADER in ClosureMacros.h. */ +#endif + + +//@node Par specific fixed headers, Parallel only heap objects, Dummy defs +//@section Par specific fixed headers + +/* +Definitions relating to the entire parallel-only fixed-header field. + +On GUM, the global addresses for each local closure are stored in a separate +hash table, rather then with the closure in the heap.  We call @getGA@ to +look up the global address associated with a local closure (0 is returned +for local closures that have no global address), and @setGA@ to store a new +global address for a local closure which did not previously have one. +*/ + +#if defined(PAR)  + +#  define GA_HDR_SIZE			0 +   +#  define GA(closure)		        getGA(closure) +   +#  define SET_GA(closure, ga)             setGA(closure,ga) +#  define SET_STATIC_GA(closure) +#  define SET_GRAN_HDR(closure,pe) +#  define SET_STATIC_PROCS(closure) +   +#  define MAX_GA_WEIGHT			0	/* Treat as 2^n */ +   +W_ PackGA ((W_, int)); +   /* There was a PACK_GA macro here; but we turned it into the PackGA +      routine [GlobAddr.lc] (because it needs to do quite a bit of +      paranoia checking.  Phil & Will (95/08) +   */ + +/* At the moment, there is no activity profiling for GUM.  This may change. */ +#  define SET_TASK_ACTIVITY(act)        /* nothing */ +#endif + +//@node Parallel only heap objects, Packing definitions, Par specific fixed headers +//@section Parallel only heap objects + +// NB: The following definitons are BOTH for GUM and GrAnSim -- HWL + +/*   All in Closures.h and CLosureMacros.h */ + +//@node Packing definitions, End of File, Parallel only heap objects +//@section Packing definitions + +//@menu +//* GUM::			 +//* GranSim::			 +//@end menu +//*/ + +//@node GUM, GranSim, Packing definitions, Packing definitions +//@subsection GUM + +#if defined(PAR)  +/* +Symbolic constants for the packing code. + +This constant defines how many words of data we can pack into a single +packet in the parallel (GUM) system. +*/ + +//@menu +//* Externs::			 +//* Prototypes::		 +//* Macros::			 +//@end menu +//*/ + +//@node Externs, Prototypes, GUM, GUM +//@subsubsection Externs + +extern W_      *PackBuffer;      /* size: can be set via option */ +extern long *buffer;             /* HWL_ */ +extern W_ *freeBuffer;           /* HWL_ */ +extern W_ *packBuffer;           /* HWL_ */ + +extern void    InitPackBuffer(STG_NO_ARGS); +extern void    InitMoreBuffers(STG_NO_ARGS); +extern void    InitPendingGABuffer(W_ size);  +extern void    AllocClosureQueue(W_ size); + +//@node Prototypes, Macros, Externs, GUM +//@subsubsection Prototypes + +void	InitPackBuffer(); +P_      PackTSO (P_ tso, W_ *size); +P_      PackStkO (P_ stko, W_ *size); +P_	AllocateHeap (W_ size);          /* Doesn't belong */ + +void    InitClosureQueue (); +P_      DeQueueClosure(); +void    QueueClosure (P_ closure); +rtsBool QueueEmpty(); +void    PrintPacket (P_ buffer); + +P_      get_closure_info (P_ closure, W_ *size, W_ *ptrs, W_ *nonptrs, W_ *vhs, char *type); + +rtsBool isOffset (globalAddr *ga), +	isFixed (globalAddr *ga); + +void    doGlobalGC(); + +P_      PackNearbyGraph (P_ closure,W_ *size); +P_      UnpackGraph (W_ *buffer, globalAddr **gamap, W_ *nGAs); + + +//@node Macros,  , Prototypes, GUM +//@subsubsection Macros + +#    define PACK_HEAP_REQUIRED  \ +      ((RtsFlags.ParFlags.packBufferSize - PACK_HDR_SIZE) / (PACK_GA_SIZE + _FHS) * (SPEC_HS + 2)) + +#  define MAX_GAS 	(RtsFlags.ParFlags.packBufferSize / PACK_GA_SIZE) + + +#  define PACK_GA_SIZE	3	/* Size of a packed GA in words */ +			        /* Size of a packed fetch-me in words */ +#  define PACK_FETCHME_SIZE (PACK_GA_SIZE + FIXED_HS) + +#  define PACK_HDR_SIZE	1	/* Words of header in a packet */ + +#  define PACK_PLC_SIZE	2	/* Size of a packed PLC in words */ + +#endif /* PAR */ + +//@node GranSim,  , GUM, Packing definitions +//@subsection GranSim + +#if defined(GRAN) +/* ToDo: Check which of the PAR routines are needed in GranSim -- HWL */ + +//@menu +//* Types::			 +//* Prototypes::		 +//* Macros::			 +//@end menu +//*/ + +//@node Types, Prototypes, GranSim, GranSim +//@subsubsection Types + +typedef struct rtsPackBuffer_ { +  StgInt /* nat */           size; +  StgInt /* nat */           unpacked_size; +  StgTSO       *tso; +  StgClosure  **buffer;   +} rtsPackBuffer; + +//@node Prototypes, Macros, Types, GranSim +//@subsubsection Prototypes + + +/* main packing functions */ +/* +rtsPackBuffer *PackNearbyGraph(StgClosure* closure, StgTSO* tso, nat *packbuffersize); +rtsPackBuffer *PackOneNode(StgClosure* closure, StgTSO* tso, nat *packbuffersize); +void PrintPacket(rtsPackBuffer *buffer); +StgClosure *UnpackGraph(rtsPackBuffer* buffer); +*/ +/* important auxiliary functions */ + +//StgInfoTable *get_closure_info(StgClosure* node, nat *size, nat *ptrs, nat *nonptrs, nat *vhs, char *info_hdr_ty); +int IS_BLACK_HOLE(StgClosure* node); +StgClosure *IS_INDIRECTION(StgClosure* node); +int IS_THUNK(StgClosure* closure); +char *display_info_type(StgClosure* closure, char *str); + +/*  +OLD CODE -- HWL +void  InitPackBuffer(void); +P_    AllocateHeap (W_ size); +P_    PackNearbyGraph (P_ closure, P_ tso, W_ *packbuffersize); +P_    PackOneNode (P_ closure, P_ tso, W_ *packbuffersize); +P_    UnpackGraph (P_ buffer); + +void    InitClosureQueue (void); +P_      DeQueueClosure(void); +void    QueueClosure (P_ closure); +// rtsBool QueueEmpty(); +void    PrintPacket (P_ buffer); +*/ + +// StgInfoTable *get_closure_info(StgClosure* node, unsigned int /* nat */ *size, unsigned int /* nat */ *ptrs, unsigned int /* nat */ *nonptrs, unsigned int /* nat */ *vhs, char *info_hdr_ty); +// int /* rtsBool */ IS_BLACK_HOLE(StgClosure* node)          ; + +//@node Macros,  , Prototypes, GranSim +//@subsubsection Macros + +/* These are needed in the packing code to get the size of the packet +   right. The closures itself are never built in GrAnSim. */ +#  define FETCHME_VHS				IND_VHS +#  define FETCHME_HS				IND_HS +   +#  define FETCHME_GA_LOCN                       FETCHME_HS +   +#  define FETCHME_CLOSURE_SIZE(closure)		IND_CLOSURE_SIZE(closure) +#  define FETCHME_CLOSURE_NoPTRS(closure)		0L +#  define FETCHME_CLOSURE_NoNONPTRS(closure)	(IND_CLOSURE_SIZE(closure)-IND_VHS) +   +#  define MAX_GAS 	(RtsFlags.GranFlags.packBufferSize / PACK_GA_SIZE) +#  define PACK_GA_SIZE	3	/* Size of a packed GA in words */ +			        /* Size of a packed fetch-me in words */ +#  define PACK_FETCHME_SIZE (PACK_GA_SIZE + FIXED_HS) +#  define PACK_HDR_SIZE	4	/* Words of header in a packet */ + +#    define PACK_HEAP_REQUIRED  \ +      (RtsFlags.GranFlags.packBufferSize * sizeofW(StgClosure*) + \ +      2 * sizeofW(StgInt) + sizeofW(StgTSO*)) + +#    define PACK_FLAG_LOCN           0   +#    define PACK_TSO_LOCN            1 +#    define PACK_UNPACKED_SIZE_LOCN  2 +#    define PACK_SIZE_LOCN           3 +#    define MAGIC_PACK_FLAG          0xfabc + +#endif   /* GRAN */ + +//@node End of File,  , Packing definitions +//@section End of File + +#endif /* defined(PAR) || defined(GRAN)         whole file */ +#endif /* Parallel_H */ + + diff --git a/ghc/rts/parallel/0Unpack.c b/ghc/rts/parallel/0Unpack.c new file mode 100644 index 0000000000..fc4a8e50c3 --- /dev/null +++ b/ghc/rts/parallel/0Unpack.c @@ -0,0 +1,440 @@ +/* +  Time-stamp: <Wed Jan 12 2000 13:29:08 Stardate: [-30]4193.85 hwloidl> + +  Unpacking closures which have been exported to remote processors + +  This module defines routines for unpacking closures in the parallel +  runtime system (GUM). + +  In the case of GrAnSim, this module defines routines for *simulating* the +  unpacking of closures as it is done in the parallel runtime system. +*/ + +/*  +   Code in this file has been merged with Pack.c  +*/ + +#if 0 + +//@node Unpacking closures, , , +//@section Unpacking closures + +//@menu +//* Includes::			 +//* Prototypes::		 +//* GUM code::			 +//* GranSim Code::		 +//* Index::			 +//@end menu +//*/ + +//@node Includes, Prototypes, Unpacking closures, Unpacking closures +//@subsection Includes + +#include "Rts.h" +#include "RtsFlags.h" +#include "GranSimRts.h" +#include "ParallelRts.h" +#include "ParallelDebug.h" +#include "FetchMe.h" +#include "Storage.h" + +//@node Prototypes, GUM code, Includes, Unpacking closures +//@subsection Prototypes + +void     InitPacking(void); +# if defined(PAR) +void            InitPackBuffer(void); +# endif +/* Interface for ADT of closure queues */ +void    	  AllocClosureQueue(nat size); +void    	  InitClosureQueue(void); +rtsBool 	  QueueEmpty(void); +void    	  QueueClosure(StgClosure *closure); +StgClosure *DeQueueClosure(void); + +StgPtr AllocateHeap(nat size); + +//@node GUM code, GranSim Code, Prototypes, Unpacking closures +//@subsection GUM code + +#if defined(PAR)  + +//@node Local Definitions,  , GUM code, GUM code +//@subsubsection Local Definitions + +//@cindex PendingGABuffer +static globalAddr *PendingGABuffer;   +/* is initialised in main; */ + +//@cindex InitPendingGABuffer +void +InitPendingGABuffer(size) +nat size;  +{ +  PendingGABuffer = (globalAddr *)  +                      stgMallocBytes((size-PACK_HDR_SIZE)*2*sizeof(globalAddr), +				     "InitPendingGABuffer"); +} + +/* +  @CommonUp@ commons up two closures which we have discovered to be +  variants of the same object.  One is made an indirection to the other.  */ + +//@cindex CommonUp +void +CommonUp(StgClosure *src, StgClosure *dst) +{ +  StgBlockingQueueElement *bqe; + +  ASSERT(src != dst); +  switch (get_itbl(src)->type) { +  case BLACKHOLE_BQ: +    bqe = ((StgBlockingQueue *)src)->blocking_queue; +    break; + +  case FETCH_ME_BQ: +    bqe = ((StgFetchMeBlockingQueue *)src)->blocking_queue; +    break; +     +  case RBH: +    bqe = ((StgRBH *)src)->blocking_queue; +    break; +     +  case BLACKHOLE: +  case FETCH_ME: +    bqe = END_BQ_QUEUE; +    break; + +  default: +    /* Don't common up anything else */ +    return; +  } +  /* We do not use UPD_IND because that would awaken the bq, too */ +  // UPD_IND(src, dst); +  updateWithIndirection(get_itbl(src), src, dst); +  //ASSERT(!IS_BIG_MOTHER(INFO_PTR(dst))); +  if (bqe != END_BQ_QUEUE) +    awaken_blocked_queue(bqe, src); +} + +/* +  @UnpackGraph@ unpacks the graph contained in a message buffer.  It +  returns a pointer to the new graph.  The @gamap@ parameter is set to +  point to an array of (oldGA,newGA) pairs which were created as a result +  of unpacking the buffer; @nGAs@ is set to the number of GA pairs which +  were created. + +  The format of graph in the pack buffer is as defined in @Pack.lc@.  */ + +//@cindex UnpackGraph +StgClosure * +UnpackGraph(packBuffer, gamap, nGAs) +rtsPackBuffer *packBuffer; +globalAddr **gamap; +nat *nGAs; +{ +  nat size, ptrs, nonptrs, vhs; +  StgWord **buffer, **bufptr, **slotptr; +  globalAddr ga, *gaga; +  StgClosure *closure, *existing, +             *graphroot, *graph, *parent; +  StgInfoTable *ip, *oldip; +  nat bufsize, i, +      pptr = 0, pptrs = 0, pvhs; +  char str[80]; + +  InitPackBuffer();                  /* in case it isn't already init'd */ +  graphroot = (StgClosure *)NULL; + +  gaga = PendingGABuffer; + +  InitClosureQueue(); + +  /* Unpack the header */ +  bufsize = packBuffer->size; +  buffer = packBuffer->buffer; +  bufptr = buffer; + +  /* allocate heap */ +  if (bufsize > 0) { +    graph = allocate(bufsize); +    ASSERT(graph != NULL); +  } + +  parent = (StgClosure *)NULL; + +  do { +    /* This is where we will ultimately save the closure's address */ +    slotptr = bufptr; + +    /* First, unpack the next GA or PLC */ +    ga.weight = (rtsWeight) *bufptr++; + +    if (ga.weight > 0) { +      ga.payload.gc.gtid = (GlobalTaskId) *bufptr++; +      ga.payload.gc.slot = (int) *bufptr++; +    } else +      ga.payload.plc = (StgPtr) *bufptr++; +     +    /* Now unpack the closure body, if there is one */ +    if (isFixed(&ga)) { +      /* No more to unpack; just set closure to local address */ +      IF_PAR_DEBUG(pack, +		   belch("Unpacked PLC at %x", ga.payload.plc));  +      closure = ga.payload.plc; +    } else if (isOffset(&ga)) { +      /* No more to unpack; just set closure to cached address */ +      ASSERT(parent != (StgClosure *)NULL); +      closure = (StgClosure *) buffer[ga.payload.gc.slot]; +    } else { +      /* Now we have to build something. */ + +      ASSERT(bufsize > 0); + +      /* +       * Close your eyes.  You don't want to see where we're looking. You +       * can't get closure info until you've unpacked the variable header, +       * but you don't know how big it is until you've got closure info. +       * So...we trust that the closure in the buffer is organized the +       * same way as they will be in the heap...at least up through the +       * end of the variable header. +       */ +      ip = get_closure_info(bufptr, &size, &ptrs, &nonptrs, &vhs, str); +	   +      /*  +	 Remember, the generic closure layout is as follows: +	 +-------------------------------------------------+ +	 | FIXED HEADER | VARIABLE HEADER | PTRS | NON-PRS | +	 +-------------------------------------------------+ +      */ +      /* Fill in the fixed header */ +      for (i = 0; i < FIXED_HS; i++) +	((StgPtr)graph)[i] = *bufptr++; + +      if (ip->type == FETCH_ME) +	size = ptrs = nonptrs = vhs = 0; + +      /* Fill in the packed variable header */ +      for (i = 0; i < vhs; i++) +	((StgPtr)graph)[FIXED_HS + i] = *bufptr++; + +      /* Pointers will be filled in later */ + +      /* Fill in the packed non-pointers */ +      for (i = 0; i < nonptrs; i++) +	((StgPtr)graph)[FIXED_HS + i + vhs + ptrs] = *bufptr++; +                 +      /* Indirections are never packed */ +      // ASSERT(INFO_PTR(graph) != (W_) Ind_info_TO_USE); + +      /* Add to queue for processing */ +      QueueClosure(graph); +	 +      /* +       * Common up the new closure with any existing closure having the same +       * GA +       */ + +      if ((existing = GALAlookup(&ga)) == NULL) { +	globalAddr *newGA; +	/* Just keep the new object */ +	IF_PAR_DEBUG(pack, +		     belch("Unpacking new (%x, %d, %x)\n",  +			   ga.payload.gc.gtid, ga.payload.gc.slot, ga.weight)); + +	closure = graph; +	newGA = setRemoteGA(graph, &ga, rtsTrue); +	if (ip->type == FETCH_ME) +	  // FETCHME_GA(closure) = newGA; +	  ((StgFetchMe *)closure)->ga = newGA; +      } else { +	/* Two closures, one global name.  Someone loses */ +	oldip = get_itbl(existing); + +	if ((oldip->type == FETCH_ME || IS_BLACK_HOLE(existing)) && +	    ip->type != FETCH_ME) { + +	  /* What we had wasn't worth keeping */ +	  closure = graph; +	  CommonUp(existing, graph); +	} else { + +	  /* +	   * Either we already had something worthwhile by this name or +	   * the new thing is just another FetchMe.  However, the thing we +	   * just unpacked has to be left as-is, or the child unpacking +	   * code will fail.  Remember that the way pointer words are +	   * filled in depends on the info pointers of the parents being +	   * the same as when they were packed. +	   */ +	  IF_PAR_DEBUG(pack, +		       belch("Unpacking old (%x, %d, %x), keeping %#lx",  +			     ga.payload.gc.gtid, ga.payload.gc.slot, ga.weight, +			     existing)); + +	  closure = existing; +	} +	/* Pool the total weight in the stored ga */ +	(void) addWeight(&ga); +      } + +      /* Sort out the global address mapping */ +      if ((ip_THUNK(ip) && !ip_UNPOINTED(ip)) ||  +	  (ip_MUTABLE(ip) && ip->type != FETCH_ME)) { +	/* Make up new GAs for single-copy closures */ +	globalAddr *newGA = makeGlobal(closure, rtsTrue); +	 +	ASSERT(closure == graph); + +	/* Create an old GA to new GA mapping */ +	*gaga++ = ga; +	splitWeight(gaga, newGA); +	ASSERT(gaga->weight == 1L << (BITS_IN(unsigned) - 1)); +	gaga++; +      } +      graph += FIXED_HS + (size < MIN_UPD_SIZE ? MIN_UPD_SIZE : size); +    } + +    /* +     * Set parent pointer to point to chosen closure.  If we're at the top of +     * the graph (our parent is NULL), then we want to arrange to return the +     * chosen closure to our caller (possibly in place of the allocated graph +     * root.) +     */ +    if (parent == NULL) +      graphroot = closure; +    else +      ((StgPtr)parent)[FIXED_HS + pvhs + pptr] = (StgWord) closure; + +    /* Save closure pointer for resolving offsets */ +    *slotptr = (StgWord) closure; + +    /* Locate next parent pointer */ +    pptr++; +    while (pptr + 1 > pptrs) { +      parent = DeQueueClosure(); + +      if (parent == NULL) +	break; +      else { +	(void) get_closure_info(parent, &size, &pptrs, &nonptrs, +					&pvhs, str); +	pptr = 0; +      } +    } +  } while (parent != NULL); + +  ASSERT(bufsize == 0 || graph - 1 <= SAVE_Hp); + +  *gamap = PendingGABuffer; +  *nGAs = (gaga - PendingGABuffer) / 2; + +  /* ToDo: are we *certain* graphroot has been set??? WDP 95/07 */ +  ASSERT(graphroot!=NULL); +  return (graphroot); +} +#endif  /* PAR */ + +//@node GranSim Code, Index, GUM code, Unpacking closures +//@subsection GranSim Code + +/* +   For GrAnSim: In general no actual unpacking should be necessary. We just +   have to walk over the graph and set the bitmasks appropriately. -- HWL */ + +//@node Unpacking,  , GranSim Code, GranSim Code +//@subsubsection Unpacking + +#if defined(GRAN) +void +CommonUp(StgClosure *src, StgClosure *dst) +{ +  barf("CommonUp: should never be entered in a GranSim setup"); +} + +/* This code fakes the unpacking of a somewhat virtual buffer */ +StgClosure* +UnpackGraph(buffer) +rtsPackBuffer* buffer; +{ +  nat size, ptrs, nonptrs, vhs, +      bufptr = 0; +  StgClosure *closure, *graphroot, *graph; +  StgInfoTable *ip; +  StgWord bufsize, unpackedsize, +          pptr = 0, pptrs = 0, pvhs; +  StgTSO* tso; +  char str[240], str1[80]; +  int i; + +  bufptr = 0; +  graphroot = buffer->buffer[0]; + +  tso = buffer->tso; + +  /* Unpack the header */ +  unpackedsize = buffer->unpacked_size; +  bufsize = buffer->size; + +  IF_GRAN_DEBUG(pack, +		belch("<<< Unpacking <<%d>> (buffer @ %p):\n    (root @ %p, PE %d,size=%d), demanded by TSO %d (%p)[PE %d]", +		      buffer->id, buffer, graphroot, where_is(graphroot),  +		      bufsize, tso->id, tso,  +		      where_is((StgClosure *)tso))); + +  do { +    closure = buffer->buffer[bufptr++]; /* that's all we need for GrAnSim -- HWL */ +       +    /* Actually only ip is needed; rest is useful for TESTING -- HWL */ +    ip = get_closure_info(closure,  +			  &size, &ptrs, &nonptrs, &vhs, str); +       +    IF_GRAN_DEBUG(pack, +		  sprintf(str, "**    (%p): Changing bitmask[%s]: 0x%x ", +			  closure, (closure_HNF(closure) ? "NF" : "__"), +			  PROCS(closure))); + +    if (ip->type == RBH) { +      closure->header.gran.procs = PE_NUMBER(CurrentProc);    /* Move node */ +       +      IF_GRAN_DEBUG(pack, +		    strcat(str, " (converting RBH) "));  + +      convertFromRBH(closure);   /* In GUM that's done by convertToFetchMe */ +    } else if (IS_BLACK_HOLE(closure)) { +      closure->header.gran.procs |= PE_NUMBER(CurrentProc); /* Copy node */ +    } else if ( closure->header.gran.procs & PE_NUMBER(CurrentProc) == 0 ) { +      if (closure_HNF(closure)) +	closure->header.gran.procs |= PE_NUMBER(CurrentProc); /* Copy node */ +      else +	closure->header.gran.procs = PE_NUMBER(CurrentProc);  /* Move node */ +    } + +    IF_GRAN_DEBUG(pack, +		  sprintf(str1, "0x%x",   PROCS(closure)); strcat(str, str1)); +    IF_GRAN_DEBUG(pack, belch(str)); +     +  } while (bufptr<buffer->size) ;   /*  (parent != NULL);  */ + +  /* In GrAnSim we allocate pack buffers dynamically! -- HWL */ +  free(buffer->buffer); +  free(buffer); + +  IF_GRAN_DEBUG(pack, +		belch("PrintGraph of %p is:", graphroot); PrintGraph(graphroot,0)); + +  return (graphroot); +} +#endif  /* GRAN */ +#endif + +//@node Index,  , GranSim Code, Unpacking closures +//@subsection Index + +//@index +//* CommonUp::  @cindex\s-+CommonUp +//* InitPendingGABuffer::  @cindex\s-+InitPendingGABuffer +//* PendingGABuffer::  @cindex\s-+PendingGABuffer +//* UnpackGraph::  @cindex\s-+UnpackGraph +//@end index diff --git a/ghc/rts/parallel/FetchMe.h b/ghc/rts/parallel/FetchMe.h new file mode 100644 index 0000000000..ebbb8dd6f2 --- /dev/null +++ b/ghc/rts/parallel/FetchMe.h @@ -0,0 +1,22 @@ +/* ----------------------------------------------------------------------------- + * $Id: FetchMe.h,v 1.2 2000/01/13 14:34:06 hwloidl Exp $ + * + * Closure types for the parallel system. + * + * ---------------------------------------------------------------------------*/ + +EI_(FETCH_ME_info); +EF_(FETCH_ME_entry); + +EI_(FETCH_ME_BQ_info); +EF_(FETCH_ME_BQ_entry); + +EI_(BLOCKED_FETCH_info); +EF_(BLOCKED_FETCH_entry); + +EI_(RBH_Save_0_info); +EF_(RBH_Save_0_entry); +EI_(RBH_Save_1_info); +EF_(RBH_Save_1_entry); +EI_(RBH_Save_2_info); +EF_(RBH_Save_2_entry); diff --git a/ghc/rts/parallel/FetchMe.hc b/ghc/rts/parallel/FetchMe.hc new file mode 100644 index 0000000000..01f1f14b99 --- /dev/null +++ b/ghc/rts/parallel/FetchMe.hc @@ -0,0 +1,214 @@ +/* ---------------------------------------------------------------------------- + Time-stamp: <Wed Jan 12 2000 13:39:33 Stardate: [-30]4193.88 hwloidl> + $Id: FetchMe.hc,v 1.2 2000/01/13 14:34:06 hwloidl Exp $ + + Entry code for a FETCH_ME closure + + This module defines routines for handling remote pointers (@FetchMe@s) + in GUM.  It is threaded (@.hc@) because @FetchMe_entry@ will be + called during evaluation. + + * --------------------------------------------------------------------------*/ +  +#ifdef PAR /* all of it */ + +//@menu +//* Includes::			 +//* Info tables::		 +//* Index::			 +//@end menu + +//@node Includes, Info tables +//@subsection Includes + +#include "Rts.h" +#include "RtsFlags.h" +#include "RtsUtils.h" +#include "Storage.h" +#include "GranSim.h" +#include "GranSimRts.h" +#include "Parallel.h" +#include "ParallelRts.h" +#include "FetchMe.h" +#include "HLC.h" +#include "StgRun.h"	/* for StgReturn and register saving */ + +/* -------------------------------------------------------------------------- +   FETCH_ME closures. + +   A FETCH_ME closure represents data that currently resides on +   another PE.  We issue a fetch message, and wait for the data to be +   retrieved. + +   About the difference between std and PAR in returning to the RTS: +   in PAR we call RTS functions from within the entry code (see also +   BLACKHOLE_entry and friends in StgMiscClosures.hc); therefore, we +   have to save the thread state before calling these functions ---  +   this is done via SAVE_THREAD_STATE; we then just load the return +   code into R1 before jumping into the RTS --- this is done via +   THREAD_RETURN; so, in short we have something like +     SAVE_THREAD_STATE + THREAD_RETURN = BLOCK_NP +    +   ------------------------------------------------------------------------ */ + +//@node Info tables, Index, Includes +//@subsection Info tables + +//@cindex FETCH_ME_info +INFO_TABLE(FETCH_ME_info, FETCH_ME_entry, 0,2, FETCH_ME, const, EF_,0,0); +//@cindex FETCH_ME_entry +STGFUN(FETCH_ME_entry) +{ +  extern globalAddr *rga_GLOBAL; +  extern globalAddr *lga_GLOBAL; +  extern globalAddr fmbqga_GLOBAL; +  extern StgClosure *p_GLOBAL; +  /*  +  globalAddr *rga; +  globalAddr *lga; +  globalAddr fmbqga; +  StgClosure *p; +  */ + +  rga_GLOBAL = ((StgFetchMe *)R1.p)->ga; +  ASSERT(rga->payload.gc.gtid != mytid); + +  /* Turn the FETCH_ME into a FETCH_ME_BQ, and place the current thread +   * on the blocking queue. +   */ +  // R1.cl->header.info = FETCH_ME_BQ_info; +  SET_INFO((StgClosure *)R1.cl, &FETCH_ME_BQ_info); + +  CurrentTSO->link = END_BQ_QUEUE; +  ((StgFetchMeBlockingQueue *)R1.cl)->blocking_queue = (StgBlockingQueueElement *)CurrentTSO; + +  /* record onto which closure the current thread is blcoking */ +  CurrentTSO->block_info.closure = R1.cl; +  //recordMutable((StgMutClosure *)R1.cl); +  p_GLOBAL = R1.cl; + +  /* Save the Thread State here, before calling RTS routines below! */ +  //BLOCK_NP_NO_JUMP(1); +  SAVE_THREAD_STATE(1); + +  /* unknown junk... needed? --SDM  yes, want to see what's happening -- HWL */ +  if (RtsFlags.ParFlags.ParStats.Full) { +    /* Note that CURRENT_TIME may perform an unsafe call */ +    //rtsTime now = CURRENT_TIME; /* Now */ +    CurrentTSO->par.exectime += CURRENT_TIME - CurrentTSO->par.blockedat; +    CurrentTSO->par.fetchcount++; +    /* TSO_QUEUE(CurrentTSO) = Q_FETCHING; */ +    CurrentTSO->par.blockedat = CURRENT_TIME; +    /* we are about to send off a FETCH message, so dump a FETCH event */ +    DumpRawGranEvent(CURRENT_PROC, taskIDtoPE(rga_GLOBAL->payload.gc.gtid), +		     GR_FETCH, CurrentTSO, (StgClosure *)R1.p, 0); +  } + +  /* Phil T. claims that this was a workaround for a hard-to-find +   * bug, hence I'm leaving it out for now --SDM  +   */ +  /* Assign a brand-new global address to the newly created FMBQ */ +  lga_GLOBAL = makeGlobal(p_GLOBAL, rtsFalse); +  splitWeight(&fmbqga_GLOBAL, lga_GLOBAL); +  ASSERT(fmbqga_GLOBAL.weight == 1L << (BITS_IN(unsigned) - 1)); + +  /* I *hope* it's ok to call this from STG land. --SDM */ +  STGCALL3(sendFetch, rga_GLOBAL, &fmbqga_GLOBAL, 0/*load*/); + +  // sendFetch now called from processTheRealFetch, to make SDM happy +  //theGlobalFromGA.payload.gc.gtid = rga->payload.gc.gtid; +  //theGlobalFromGA.payload.gc.slot = rga->payload.gc.slot; +  //theGlobalFromGA.weight = rga->weight; +  //theGlobalToGA.payload.gc.gtid = fmbqga.payload.gc.gtid; +  //theGlobalToGA.payload.gc.slot = fmbqga.payload.gc.slot; +  //theGlobalToGA.weight = fmbqga.weight; + +  // STGCALL6(fprintf,stderr,"%% Fetching %p from remote PE ((%x,%d,%x))\n",R1.p,rga->payload.gc.gtid, rga->payload.gc.slot, rga->weight); + +  THREAD_RETURN(1); /* back to the scheduler */   +  // was: BLOCK_NP(1);  +  FE_ +} + +/* --------------------------------------------------------------------------- +   FETCH_ME_BQ +    +   On the first entry of a FETCH_ME closure, we turn the closure into +   a FETCH_ME_BQ, which behaves just like a BLACKHOLE_BQ.  Any thread +   entering the FETCH_ME_BQ will be placed in the blocking queue. +   When the data arrives from the remote PE, all waiting threads are +   woken up and the FETCH_ME_BQ is overwritten with the fetched data. + +   FETCH_ME_BQ_entry is a copy of BLACKHOLE_BQ_entry -- HWL +   ------------------------------------------------------------------------ */ + +INFO_TABLE(FETCH_ME_BQ_info, FETCH_ME_BQ_entry,0,2,FETCH_ME_BQ,const,EF_,0,0); +//@cindex FETCH_ME_BQ_info +STGFUN(FETCH_ME_BQ_entry) +{ +  FB_ +    TICK_ENT_BH(); + +    /* Put ourselves on the blocking queue for this black hole */ +    CurrentTSO->block_info.closure = R1.cl; +    CurrentTSO->link = ((StgBlockingQueue *)R1.p)->blocking_queue; +    ((StgBlockingQueue *)R1.p)->blocking_queue = CurrentTSO; + +#if defined(PAR) +    /* Save the Thread State here, before calling RTS routines below! */ +    SAVE_THREAD_STATE(1); + +    if (RtsFlags.ParFlags.ParStats.Full) { +      /* Note that CURRENT_TIME may perform an unsafe call */ +      //rtsTime now = CURRENT_TIME; /* Now */ +      CurrentTSO->par.exectime += CURRENT_TIME - CurrentTSO->par.blockedat; +      CurrentTSO->par.blockcount++; +      CurrentTSO->par.blockedat = CURRENT_TIME; +      DumpRawGranEvent(CURRENT_PROC, thisPE, +		       GR_BLOCK, CurrentTSO, (StgClosure *)R1.p, 0); +    } + +    THREAD_RETURN(1);  /* back to the scheduler */   +#else +    /* stg_gen_block is too heavyweight, use a specialised one */ +    BLOCK_NP(1); +#endif +  FE_ +} + +/* --------------------------------------------------------------------------- +   BLOCKED_FETCH_BQ +    +   A BLOCKED_FETCH closure only ever exists in the blocking queue of a +   globally visible closure i.e. one with a GA. A BLOCKED_FETCH closure +   indicates that a TSO on another PE is waiting for the result of this +   computation. Thus, when updating the closure, the result has to be sent +   to that PE. The relevant routines handling that are awaken_blocked_queue +   and blockFetch (for putting BLOCKED_FETCH closure into a BQ). +*/ + +//@cindex BLOCKED_FETCH_info +INFO_TABLE(BLOCKED_FETCH_info, BLOCKED_FETCH_entry,0,2,BLOCKED_FETCH,const,EF_,0,0); +//@cindex BLOCKED_FETCH_entry +STGFUN(BLOCKED_FETCH_entry) +{ +  FB_ +    /* see NON_ENTERABLE_ENTRY_CODE in StgMiscClosures.hc */ +    fprintf(stderr,"Qagh: BLOCKED_FETCH entered!\n"); +    STGCALL1(raiseError, errorHandler); +    stg_exit(EXIT_FAILURE); /* not executed */ +  FE_ +} + +#endif /* PAR */ + +//@node Index,  , Info tables +//@subsection Index + +//@index +//* BLOCKED_FETCH_entry::  @cindex\s-+BLOCKED_FETCH_entry +//* BLOCKED_FETCH_info::  @cindex\s-+BLOCKED_FETCH_info +//* FETCH_ME_BQ_info::  @cindex\s-+FETCH_ME_BQ_info +//* FETCH_ME_entry::  @cindex\s-+FETCH_ME_entry +//* FETCH_ME_info::  @cindex\s-+FETCH_ME_info +//@end index diff --git a/ghc/rts/parallel/Global.c b/ghc/rts/parallel/Global.c new file mode 100644 index 0000000000..59eda0bc1c --- /dev/null +++ b/ghc/rts/parallel/Global.c @@ -0,0 +1,828 @@ +/* --------------------------------------------------------------------------- +   Time-stamp: <Sat Dec 04 1999 21:28:56 Stardate: [-30]3999.47 hwloidl> +   $Id: Global.c,v 1.2 2000/01/13 14:34:06 hwloidl Exp $ + +   (c) The AQUA/Parade Projects, Glasgow University, 1995 +       The GdH/APART 624 Projects, Heriot-Watt University, Edinburgh, 1999 + +   Global Address Manipulation. +    +   The GALA and LAGA tables for mapping global addresses to local addresses  +   (i.e. heap pointers) are defined here. We use the generic hash tables +   defined in Hash.c. +   ------------------------------------------------------------------------- */ + +#ifdef PAR /* whole file */ + +//@menu +//* Includes::			 +//* Global tables and lists::	 +//* Fcts on GALA tables::	 +//* Interface to taskId-PE table::   +//* Interface to LAGA table::	 +//* Interface to GALA table::	 +//* GC functions for GALA tables::   +//* Index::			 +//@end menu + +//@node Includes, Global tables and lists, Global Address Manipulation, Global Address Manipulation +//@subsection Includes + +#include "Rts.h" +#include "RtsFlags.h" +#include "RtsUtils.h" +#include "Storage.h" +#include "Hash.h" +#include "ParallelRts.h" + +/* +  @globalAddr@ structures are allocated in chunks to reduce malloc overhead. +*/ + +//@node Global tables and lists, Fcts on GALA tables, Includes, Global Address Manipulation +//@subsection Global tables and lists + +//@cindex thisPE +int thisPE; + +//@menu +//* Free lists::		 +//* Hash tables::		 +//@end menu + +//@node Free lists, Hash tables, Global tables and lists, Global tables and lists +//@subsubsection Free lists + +/* Free list of GALA entries */ +GALA *freeGALAList = NULL; + +/* Number of globalAddr cells to allocate in one go */ +#define GCHUNK	    (1024 * sizeof(StgWord) / sizeof(GALA)) + +/* Free list of indirections */ + +//@cindex nextIndirection +static StgInt nextIndirection = 0; +//@cindex freeIndirections +GALA *freeIndirections = NULL; + +/* The list of live indirections has to be marked for GC (see makeGlobal) */ +//@cindex liveIndirections +GALA *liveIndirections = NULL; + +/* The list of remote indirections has to be marked for GC (see setRemoteGA) */ +//@cindex liveRemoteGAs +GALA *liveRemoteGAs = NULL; + +//@node Hash tables,  , Free lists, Global tables and lists +//@subsubsection Hash tables + +/* Mapping global task ids PEs */ +//@cindex taskIDtoPEtable +HashTable *taskIDtoPEtable = NULL; + +static int nextPE = 0; + +/* LAGA table: StgClosure* -> globalAddr* +               (Remember: globalAddr = (GlobalTaskId, Slot, Weight)) +   Mapping local to global addresses (see interface below)  +*/ + +//@cindex LAtoGALAtable +HashTable *LAtoGALAtable = NULL; + +/* GALA table: globalAddr* -> StgClosure* +               (Remember: globalAddr = (GlobalTaskId, Slot, Weight)) +   Mapping global to local addresses (see interface below)  +*/ + +//@cindex pGAtoGALAtable +HashTable *pGAtoGALAtable = NULL; + +//@node Fcts on GALA tables, Interface to taskId-PE table, Global tables and lists, Global Address Manipulation +//@subsection Fcts on GALA tables + +//@cindex allocGALA +static GALA * +allocGALA(void) +{ +  GALA *gl, *p; + +  if ((gl = freeGALAList) != NULL) { +    freeGALAList = gl->next; +  } else { +    gl = (GALA *) stgMallocBytes(GCHUNK * sizeof(GALA), "allocGALA"); + +    freeGALAList = gl + 1; +    for (p = freeGALAList; p < gl + GCHUNK - 1; p++) +      p->next = p + 1; +    p->next = NULL; +  } +  return gl; +} + +//@node Interface to taskId-PE table, Interface to LAGA table, Fcts on GALA tables, Global Address Manipulation +//@subsection Interface to taskId-PE table + +/* +  We don't really like GLOBAL_TASK_ID, so we keep a table of TASK_ID to +  PE mappings.  The idea is that a PE identifier will fit in 16 bits, whereas  +  a TASK_ID may not. +*/ + +//@cindex taskIDtoPE +PEs +taskIDtoPE(GlobalTaskId gtid) +{ +  return (PEs) lookupHashTable(taskIDtoPEtable, gtid); +} + +//@cindex registerTask +void  +registerTask(gtid) +GlobalTaskId gtid; +{ +  if (gtid == mytid) +    thisPE = nextPE; + +  insertHashTable(taskIDtoPEtable, gtid, (void *) (StgWord) nextPE++); +} + +//@node Interface to LAGA table, Interface to GALA table, Interface to taskId-PE table, Global Address Manipulation +//@subsection Interface to LAGA table + +/* +  The local address to global address mapping returns a globalAddr structure +  (pe task id, slot, weight) for any closure in the local heap which has a +  global identity.  Such closures may be copies of normal form objects with +  a remote `master' location, @FetchMe@ nodes referencing remote objects, or +  globally visible objects in the local heap (for which we are the master). +*/ + +//@cindex LAGAlookup +globalAddr * +LAGAlookup(addr) +StgClosure *addr; +{ +  GALA *gala; + +  /* We never look for GA's on indirections */ +  ASSERT(IS_INDIRECTION(addr) == NULL); +  if ((gala = lookupHashTable(LAtoGALAtable, (StgWord) addr)) == NULL) +    return NULL; +  else +    return &(gala->ga); +} + +//@node Interface to GALA table, GC functions for GALA tables, Interface to LAGA table, Global Address Manipulation +//@subsection Interface to GALA table + +/* +  We also manage a mapping of global addresses to local addresses, so that +  we can ``common up'' multiple references to the same object as they arrive +  in data packets from remote PEs. + +  The global address to local address mapping is actually managed via a +  ``packed global address'' to GALA hash table.  The packed global +  address takes the interesting part of the @globalAddr@ structure +  (i.e. the pe and slot fields) and packs them into a single word +  suitable for hashing. +*/ + +//@cindex GALAlookup +StgClosure * +GALAlookup(ga) +globalAddr *ga; +{ +  StgWord pga = PackGA(taskIDtoPE(ga->payload.gc.gtid), ga->payload.gc.slot); +  GALA *gala; + +  if ((gala = (GALA *) lookupHashTable(pGAtoGALAtable, pga)) == NULL) +    return NULL; +  else { +    /*  +     * Bypass any indirections when returning a local closure to +     * the caller.  Note that we do not short-circuit the entry in +     * the GALA tables right now, because we would have to do a +     * hash table delete and insert in the LAtoGALAtable to keep +     * that table up-to-date for preferred GALA pairs.  That's +     * probably a bit expensive. +     */ +    return UNWIND_IND((StgClosure *)(gala->la)); +  } +} + +/* +  External references to our globally-visible closures are managed through an +  indirection table.  The idea is that the closure may move about as the result +  of local garbage collections, but its global identity is determined by its +  slot in the indirection table, which never changes. + +  The indirection table is maintained implicitly as part of the global +  address to local address table.  We need only keep track of the +  highest numbered indirection index allocated so far, along with a free +  list of lower numbered indices no longer in use. +*/ + +/*  +   Allocate an indirection slot for the closure currently at address @addr@. +*/ + +//@cindex allocIndirection +static GALA * +allocIndirection(StgPtr addr) +{ +  GALA *gala; +   +  if ((gala = freeIndirections) != NULL) { +    freeIndirections = gala->next; +  } else { +    gala = allocGALA(); +    gala->ga.payload.gc.gtid = mytid; +    gala->ga.payload.gc.slot = nextIndirection++; +  } +  gala->ga.weight = MAX_GA_WEIGHT; +  gala->la = addr; +  return gala; +} + +/* +  Make a local closure at @addr@ globally visible.  We have to allocate an +  indirection slot for it, and update both the local address to global address +  and global address to local address maps. +*/ + +//@cindex makeGlobal +globalAddr * +makeGlobal(addr, preferred) +StgClosure *addr; +rtsBool preferred; +{ +  GALA *oldGALA = lookupHashTable(LAtoGALAtable, (StgWord) addr); +  GALA *newGALA = allocIndirection((StgPtr)addr); +  StgWord pga = PackGA(thisPE, newGALA->ga.payload.gc.slot); + +  ASSERT(HEAP_ALLOCED(addr)); // check that addr might point into the heap  +  ASSERT(GALAlookup(&(newGALA->ga)) == NULL); +   +  newGALA->la = addr; +  newGALA->preferred = preferred; + +  if (preferred) { +    /* The new GA is now the preferred GA for the LA */ +    if (oldGALA != NULL) { +      oldGALA->preferred = rtsFalse; +      (void) removeHashTable(LAtoGALAtable, (StgWord) addr, (void *) oldGALA); +    } +    insertHashTable(LAtoGALAtable, (StgWord) addr, (void *) newGALA); +  } + +  /* put the new GALA entry on the list of live indirections */ +  newGALA->next = liveIndirections; +  liveIndirections = newGALA; +   +  insertHashTable(pGAtoGALAtable, pga, (void *) newGALA); +   +  return &(newGALA->ga); +} + +/* +  Assign an existing remote global address to an existing closure. +  We do not retain the @globalAddr@ structure that's passed in as an argument, +  so it can be a static in the calling routine. +*/ + +//@cindex setRemoteGA +globalAddr * +setRemoteGA(addr, ga, preferred) +StgClosure *addr; +globalAddr *ga; +rtsBool preferred; +{ +  GALA *oldGALA = lookupHashTable(LAtoGALAtable, (StgWord) addr); +  GALA *newGALA = allocGALA(); +  StgWord pga = PackGA(taskIDtoPE(ga->payload.gc.gtid), ga->payload.gc.slot); + +  ASSERT(ga->payload.gc.gtid != mytid); +  ASSERT(ga->weight > 0); +  ASSERT(GALAlookup(ga) == NULL); + +  newGALA->ga = *ga; +  newGALA->la = addr; +  newGALA->preferred = preferred; + +  if (preferred) { +    /* The new GA is now the preferred GA for the LA */ +    if (oldGALA != NULL) { +      oldGALA->preferred = rtsFalse; +      (void) removeHashTable(LAtoGALAtable, (StgWord) addr, (void *) oldGALA); +    } +    insertHashTable(LAtoGALAtable, (StgWord) addr, (void *) newGALA); +  } +  newGALA->next = liveRemoteGAs; +  liveRemoteGAs = newGALA; +   +  insertHashTable(pGAtoGALAtable, pga, (void *) newGALA); +   +  ga->weight = 0; + +  return &(newGALA->ga); +} + +/* +  Give me a bit of weight to give away on a new reference to a particular +  global address.  If we run down to nothing, we have to assign a new GA.   +*/ + +//@cindex splitWeight +void +splitWeight(to, from) +globalAddr *to, *from; +{ +  /* Make sure we have enough weight to split */ +  if (from->weight == 1) +    from = makeGlobal(GALAlookup(from), rtsTrue); +   +  to->payload = from->payload; + +  if (from->weight == 0) +    to->weight = 1L << (BITS_IN(unsigned) - 1); +  else +    to->weight = from->weight / 2; + +  from->weight -= to->weight; +} + +/* +  Here, I am returning a bit of weight that a remote PE no longer needs. +*/ + +//@cindex addWeight +globalAddr * +addWeight(ga) +globalAddr *ga; +{ +  StgWord pga = PackGA(taskIDtoPE(ga->payload.gc.gtid), ga->payload.gc.slot); +  GALA *gala = (GALA *) lookupHashTable(pGAtoGALAtable, pga); + +  IF_PAR_DEBUG(weight, +	       fprintf(stderr, "@* Adding weight %x to ", ga->weight); +	       printGA(&(gala->ga)); +	       fputc('\n', stderr)); + +  gala->ga.weight += ga->weight;     +  ga->weight = 0; + +  return &(gala->ga); +} + +/* +  Initialize all of the global address structures: the task ID to PE id +  map, the local address to global address map, the global address to +  local address map, and the indirection table. +*/ + +//@cindex initGAtables +void +initGAtables(void) +{ +  taskIDtoPEtable = allocHashTable(); +  LAtoGALAtable = allocHashTable(); +  pGAtoGALAtable = allocHashTable(); +} + +//@cindex PackGA +StgWord +PackGA (pe, slot) +StgWord pe; +int slot; +{ +  int pe_shift = (BITS_IN(StgWord)*3)/4; +  int pe_bits  = BITS_IN(StgWord) - pe_shift; + +  if ( pe_bits < 8 || slot >= (1L << pe_shift) ) { /* big trouble */ +    fflush(stdout); +    fprintf(stderr, "PackGA: slot# too big (%d) or not enough pe_bits (%d)\n", +	    slot,pe_bits); +    stg_exit(EXIT_FAILURE); +  } + +  return((((StgWord)(pe)) << pe_shift) | ((StgWord)(slot))); +	 +    /* the idea is to use 3/4 of the bits (e.g., 24) for indirection- +       table "slot", and 1/4 for the pe# (e.g., 8). +        +       We check for too many bits in "slot", and double-check (at +       compile-time?) that we have enough bits for "pe".  We *don't* +       check for too many bits in "pe", because SysMan enforces a +       MAX_PEs limit at the very very beginning. + +       Phil & Will 95/08 +    */ +} + +//@node GC functions for GALA tables, Debugging routines, Interface to GALA table, Global Address Manipulation +//@subsection GC functions for GALA tables + +/* +  When we do a copying collection, we want to evacuate all of the local +  entries in the GALA table for which there are outstanding remote +  pointers (i.e. for which the weight is not MAX_GA_WEIGHT.) +*/ +//@cindex markLocalGAs +void +markLocalGAs(rtsBool full) +{ +  GALA *gala; +  GALA *next; +  GALA *prev = NULL; +  StgPtr old_la, new_la; +  nat n=0, m=0; // debugging only +   +  IF_DEBUG(gc, +	   belch("@@ markLocalGAs: Marking LIVE INDIRECTIONS in GALA table starting with GALA at %p\n", +		 liveIndirections); +	   printLAGAtable()); + +  for (gala = liveIndirections, m=0; gala != NULL; gala = next, m++) { +    IF_DEBUG(gc, + 	     printGA(&(gala->ga)); +	     fprintf(stderr, ";@ %d: LA: %p (%s) ", +		     m, gala->la, info_type(gala->la))); +    next = gala->next; +    old_la = gala->la; +    ASSERT(gala->ga.payload.gc.gtid == mytid); /* it's supposed to be local */ +    if (get_itbl((StgClosure *)old_la)->type == EVACUATED) { +      /* somebody else already evacuated this closure */ +      new_la = ((StgEvacuated *)old_la)->evacuee; +      IF_DEBUG(gc, +	       belch(" already evacuated to %p\n", new_la)); +    } else { +      StgClosure *foo ; // debugging only +      n++; +      IF_PAR_DEBUG(verbose, +		   if (IS_INDIRECTION((StgClosure *)old_la)) +		       belch("{markLocalGAs}Daq ghuH: trying to mark an indirection %p (%s) -> %p (%s); [closure=%p]", +			     old_la, info_type(old_la),  +			     (foo = UNWIND_IND((StgClosure *)old_la)), info_type(foo),  +			     old_la)); +      new_la = MarkRoot(UNWIND_IND((StgClosure *)old_la)); // or just evacuate(old_ga) +      IF_DEBUG(gc, +	       belch(" evacuated %p to %p\n", old_la, new_la)); +    } + +    gala->la = new_la; +    /* remove old LA and replace with new LA */ +    //(void) removeHashTable(LAtoGALAtable, (StgWord) old_la, (void *) gala); +    //insertHashTable(LAtoGALAtable, (StgWord) new_la, (void *) gala); + +    gala->next = prev; +    prev = gala; +  } +  liveIndirections = prev;  /* list has been reversed during the marking */ + +  IF_PAR_DEBUG(verbose, +	       belch("@@ markLocalGAs: %d of %d GALAs marked on PE %x", +		     n, m, mytid)); + +  /* -------------------------------------------------------------------- */ + +  n=0; m=0; // debugging only +   +  IF_DEBUG(gc, +	   belch("@@ markLocalGAs: Marking LIVE REMOTE GAs in GALA table starting with GALA at %p\n", +		 liveRemoteGAs)); + +  for (gala = liveRemoteGAs, prev = NULL; gala != NULL; gala = next) { +    IF_DEBUG(gc, +	     printGA(&(gala->ga))); +    next = gala->next; +    old_la = gala->la; +    ASSERT(gala->ga.payload.gc.gtid != mytid); /* it's supposed to be remote */ +    if (get_itbl((StgClosure *)old_la)->type == EVACUATED) { +      /* somebody else already evacuated this closure */ +      new_la = ((StgEvacuated *)old_la)->evacuee; +    } else { +      n++; +      new_la = MarkRoot((StgClosure *)old_la); // or just evacuate(old_ga) +    } + +    gala->la = new_la; +    /* remove old LA and replace with new LA */ +    //(void) removeHashTable(LAtoGALAtable, (StgWord) old_la, (void *) gala); +    //insertHashTable(LAtoGALAtable, (StgWord) new_la, (void *) gala); + +    gala->next = prev; +    prev = gala; +  } +  liveRemoteGAs = prev; /* list is reversed during marking */ + +  /* If we have any remaining FREE messages to send off, do so now */ +  // sendFreeMessages(); + +  IF_DEBUG(gc, +	   belch("@@ markLocalGAs: GALA after marking"); +	   printLAGAtable(); +	   belch("--------------------------------------")); +   +} + +void +OLDmarkLocalGAs(rtsBool full) +{ +  extern StgClosure *MarkRootHWL(StgClosure *root); + +  GALA *gala; +  GALA *next; +  GALA *prev = NULL; +  StgPtr new_la; +  nat n=0, m=0; // debugging only +   +  IF_DEBUG(gc, +	   belch("@@ markLocalGAs: Marking entries in GALA table starting with GALA at %p", +		 liveIndirections); +	   printLAGAtable()); + +  for (gala = liveIndirections; gala != NULL; gala = next) { +    IF_DEBUG(gc, + 	     printGA(&(gala->ga)); +	     fprintf(stderr, " LA: %p (%s) ", +		     gala->la, info_type(gala->la))); +    next = gala->next; +    ASSERT(gala->ga.payload.gc.gtid == mytid); /* it's supposed to be local */ +    if (gala->ga.weight != MAX_GA_WEIGHT) { +      /* Remote references exist, so we must evacuate the local closure */ +      StgPtr old_la = gala->la; + +      if (get_itbl((StgClosure *)old_la)->type != EVACUATED) { // track evacuee!?? +	n++; +	IF_DEBUG(gc, +		 fprintf(stderr, " marking as root\n")); +	new_la = MarkRoot((StgClosure *)old_la); // or just evacuate(old_ga) +	//IF_DEBUG(gc, +	//	 fprintf(stderr, " new LA is %p ", new_la)); +	if (!full && gala->preferred && new_la != old_la) { +	  IF_DEBUG(gc, +		   fprintf(stderr, " replacing %p with %p in LAGA table\n", +			   old_la, new_la)); +	  (void) removeHashTable(LAtoGALAtable, (StgWord) old_la, (void *) gala); +	  insertHashTable(LAtoGALAtable, (StgWord) new_la, (void *) gala); +	} +      } else { +  	IF_DEBUG(gc, +  		 fprintf(stderr, " EVAC ")); +	new_la = ((StgEvacuated *)old_la)->evacuee; +	IF_DEBUG(gc, +		 fprintf(stderr, " replacing %p with %p in LAGA table\n", +			   old_la, new_la)); +	(void) removeHashTable(LAtoGALAtable, (StgWord) old_la, (void *) gala); +	insertHashTable(LAtoGALAtable, (StgWord) new_la, (void *) gala); +      }  +      gala->next = prev; +      prev = gala; +    } else { +      /* Since we have all of the weight, this GA is no longer needed */ +      StgWord pga = PackGA(thisPE, gala->ga.payload.gc.slot); + +      m++; +      IF_DEBUG(gc, +	       fprintf(stderr, " freeing slot %d",  +		       gala->ga.payload.gc.slot)); + +      /* put the now redundant GALA onto the free list */ +      gala->next = freeIndirections; +      freeIndirections = gala; +      /* remove the GALA from the GALA table; now it's just local */ +      (void) removeHashTable(pGAtoGALAtable, pga, (void *) gala); +      if (!full && gala->preferred) +	(void) removeHashTable(LAtoGALAtable, (StgWord) gala->la, (void *) gala); + +#ifdef DEBUG +      gala->ga.weight = 0x0d0d0d0d; +      gala->la = (StgWord) 0x0bad0bad; +#endif +    } +  } +  liveIndirections = prev;  /* list has been reversed during the marking */ + +  IF_PAR_DEBUG(verbose, +	       belch("@@ markLocalGAs: %d GALAs marked, %d GALAs nuked on PE %x", +		     n, m, mytid)); + +} + +//@cindex RebuildGAtables +void +RebuildGAtables(rtsBool full) +{ +  GALA *gala; +  GALA *next; +  GALA *prev; +  StgClosure *closure, *last, *new_closure; + +  //prepareFreeMsgBuffers(); + +  if (full) +    RebuildLAGAtable(); + +  IF_DEBUG(gc, +	   belch("@@ RebuildGAtables: After ReBuilding GALA table starting with GALA at %p", +		 liveRemoteGAs); +	   printLAGAtable()); +} + +void +OLDRebuildGAtables(rtsBool full) +{ +  GALA *gala; +  GALA *next; +  GALA *prev; +  StgClosure *closure, *last, *new_closure; + +  prepareFreeMsgBuffers(); + +  for (gala = liveRemoteGAs, prev = NULL; gala != NULL; gala = next) { +    IF_DEBUG(gc, +	     printGA(&(gala->ga))); +    next = gala->next; +    ASSERT(gala->ga.payload.gc.gtid != mytid); /* it's supposed to be remote */ + +    closure = (StgClosure *) (gala->la); + +    /* +     * If the old closure has not been forwarded, we let go.  Note that this +     * approach also drops global aliases for PLCs. +     */ + +    if (!full && gala->preferred) +      (void) removeHashTable(LAtoGALAtable, (StgWord) gala->la, (void *) gala); + +    /* Follow indirection chains to the end, just in case */ +    closure = UNWIND_IND(closure); + +    /* +    if (get_itbl(closure)->type != EVACUATED) { // (new_closure = isAlive(closure)) == NULL) { // (W_) Forward_Ref_info) +      // closure is not alive any more, thus remove GA  +      int pe = taskIDtoPE(gala->ga.payload.gc.gtid); +      StgWord pga = PackGA(pe, gala->ga.payload.gc.slot); + +      IF_DEBUG(gc, +	       fprintf(stderr, " (LA: %p (%s)) is unused on this PE -> sending free\n", +		       closure, info_type(closure))); + +      (void) removeHashTable(pGAtoGALAtable, pga, (void *) gala); +      freeRemoteGA(pe, &(gala->ga)); +      gala->next = freeGALAList; +      freeGALAList = gala; +    } else { +    */ +    if (get_itbl(closure)->type == EVACUATED) { +      IF_DEBUG(gc, +	       fprintf(stderr, " EVAC %p (%s)\n", +		       closure, info_type(closure))); +      closure = ((StgEvacuated *)closure)->evacuee; +    } else { +      IF_DEBUG(gc, +	       fprintf(stderr, " !EVAC %p (%s)\n", +		       closure, info_type(closure))); +    } +    gala->la = closure; +    if (!full && gala->preferred) +      insertHashTable(LAtoGALAtable, (StgWord) gala->la, (void *) gala); +    gala->next = prev; +    prev = gala; +  } +  //} +  liveRemoteGAs = prev; /* list is reversed during marking */ + +  /* If we have any remaining FREE messages to send off, do so now */ +  sendFreeMessages(); + +  if (full) +    RebuildLAGAtable(); + +  IF_DEBUG(gc, +	   belch("@@ RebuildGAtables: After ReBuilding GALA table starting with GALA at %p", +		 liveRemoteGAs); +	   printLAGAtable()); +} + +/* +  Rebuild the LA->GA table, assuming that the addresses in the GALAs are +  correct.   +*/ + +//@cindex RebuildLAGAtable +void +RebuildLAGAtable(void) +{ +  GALA *gala; +  nat n=0, m=0; // debugging + +  /* The old LA->GA table is worthless */ +  freeHashTable(LAtoGALAtable, NULL); +  LAtoGALAtable = allocHashTable(); + +  IF_DEBUG(gc, +	   belch("@@ RebuildLAGAtable: new LAGA table at %p", +		 LAtoGALAtable));  +   +  for (gala = liveIndirections; gala != NULL; gala = gala->next) { +    n++; +    if (gala->preferred) +      insertHashTable(LAtoGALAtable, (StgWord) gala->la, (void *) gala); +  } + +  for (gala = liveRemoteGAs; gala != NULL; gala = gala->next) { +    m++; +    if (gala->preferred) +      insertHashTable(LAtoGALAtable, (StgWord) gala->la, (void *) gala); +  } + +  IF_DEBUG(gc, +	   belch("@@ RebuildLAGAtable: inserted %d entries from liveIndirections and %d entries from liveRemoteGAs", +		 n,m));  +   +} + +//@node Debugging routines, Index, GC functions for GALA tables, Global Address Manipulation +//@subsection Debugging routines + +//@cindex printGA +void +printGA (globalAddr *ga) +{ +  fprintf(stderr, "((%x, %d, %x))",  +	  ga->payload.gc.gtid, +	  ga->payload.gc.slot, +	  ga->weight); +} + +//@cindex printGALA +void  +printGALA (GALA *gala) +{ +  printGA(&(gala->ga)); +  fprintf(stderr, " -> %p (%s)", (StgPtr)gala->la, info_type(gala->la)); +  fprintf(stderr, " %s", (gala->preferred) ? "PREF" : "____"); +} + +/* +  Printing the LA->GA table. +*/ + +//@cindex DebugPrintLAGAtable +void +printLAGAtable(void) +{ +  GALA *gala; +  nat n=0, m=0; // debugging + +  belch("@@ LAGAtable (%p) with liveIndirections=%p, liveRemoteGAs=%p:", +	LAtoGALAtable, liveIndirections, liveRemoteGAs);  +   +  for (gala = liveIndirections; gala != NULL; gala = gala->next) { +    n++; +    printGALA(gala); +    fputc('\n', stderr); +  } + +  for (gala = liveRemoteGAs; gala != NULL; gala = gala->next) { +    m++; +    printGALA(gala); +    fputc('\n', stderr); +  } +  belch("@@ LAGAtable has %d liveIndirections entries and %d liveRemoteGAs entries", +	n, m); +} + +#endif /* PAR -- whole file */ + +//@node Index,  , Debugging routines, Global Address Manipulation +//@subsection Index + +//@index +//* GALAlookup::  @cindex\s-+GALAlookup +//* LAGAlookup::  @cindex\s-+LAGAlookup +//* LAtoGALAtable::  @cindex\s-+LAtoGALAtable +//* PackGA::  @cindex\s-+PackGA +//* RebuildGAtables::  @cindex\s-+RebuildGAtables +//* RebuildLAGAtable::  @cindex\s-+RebuildLAGAtable +//* addWeight::  @cindex\s-+addWeight +//* allocGALA::  @cindex\s-+allocGALA +//* allocIndirection::  @cindex\s-+allocIndirection +//* freeIndirections::  @cindex\s-+freeIndirections +//* initGAtables::  @cindex\s-+initGAtables +//* liveIndirections::  @cindex\s-+liveIndirections +//* liveRemoteGAs::  @cindex\s-+liveRemoteGAs +//* makeGlobal::  @cindex\s-+makeGlobal +//* markLocalGAs::  @cindex\s-+markLocalGAs +//* nextIndirection::  @cindex\s-+nextIndirection +//* pGAtoGALAtable::  @cindex\s-+pGAtoGALAtable +//* registerTask::  @cindex\s-+registerTask +//* setRemoteGA::  @cindex\s-+setRemoteGA +//* splitWeight::  @cindex\s-+splitWeight +//* taskIDtoPE::  @cindex\s-+taskIDtoPE +//* taskIDtoPEtable::  @cindex\s-+taskIDtoPEtable +//* thisPE::  @cindex\s-+thisPE +//@end index diff --git a/ghc/rts/parallel/GranSim.c b/ghc/rts/parallel/GranSim.c new file mode 100644 index 0000000000..8d08fb6b9e --- /dev/null +++ b/ghc/rts/parallel/GranSim.c @@ -0,0 +1,3005 @@ +/*  +   Time-stamp: <Sat Dec 11 1999 17:25:27 Stardate: [-30]4033.42 software> +   $Id: GranSim.c,v 1.2 2000/01/13 14:34:06 hwloidl Exp $ + +   Variables and functions specific to GranSim the parallelism simulator +   for GPH. +*/ + +//@node GranSim specific code, , , +//@section GranSim specific code + +/* +   Macros for dealing with the new and improved GA field for simulating +   parallel execution. Based on @CONCURRENT@ package. The GA field now +   contains a mask, where the n-th bit stands for the n-th processor, where +   this data can be found. In case of multiple copies, several bits are +   set. The total number of processors is bounded by @MAX_PROC@, which +   should be <= the length of a word in bits.  -- HWL  +*/ + +//@menu +//* Includes::			 +//* Prototypes and externs::	 +//* Constants and Variables::	 +//* Initialisation::		 +//* Global Address Operations::	  +//* Global Event Queue::	 +//* Spark queue functions::	 +//* Scheduling functions::	 +//* Thread Queue routines::	 +//* GranSim functions::		 +//* GranSimLight routines::	 +//* Code for Fetching Nodes::	 +//* Idle PEs::			 +//* Routines directly called from Haskell world::   +//* Emiting profiling info for GrAnSim::   +//* Dumping routines::		 +//* Index::			 +//@end menu + +//@node Includes, Prototypes and externs, GranSim specific code, GranSim specific code +//@subsection Includes + +#include "Rts.h" +#include "RtsFlags.h" +#include "RtsUtils.h" +#include "StgMiscClosures.h" +#include "StgTypes.h" +#include "Schedule.h" +#include "SchedAPI.h"       // for pushClosure +#include "GC.h" +#include "GranSimRts.h" +#include "GranSim.h" +#include "ParallelRts.h" +#include "ParallelDebug.h" +#include "Storage.h"       // for recordMutable + + +//@node Prototypes and externs, Constants and Variables, Includes, GranSim specific code +//@subsection Prototypes and externs + +#if defined(GRAN) + +/* Prototypes */ +static inline PEs      ga_to_proc(StgWord); +static inline rtsBool  any_idle(void); +static inline nat      idlers(void); +       PEs             where_is(StgClosure *node); + +static rtsBool         stealSomething(PEs proc, rtsBool steal_spark, rtsBool steal_thread); +static inline rtsBool  stealSpark(PEs proc); +static inline rtsBool  stealThread(PEs proc); +static rtsBool         stealSparkMagic(PEs proc); +static rtsBool         stealThreadMagic(PEs proc); +/* subsumed by stealSomething +static void            stealThread(PEs proc);  +static void            stealSpark(PEs proc); +*/ +static rtsTime         sparkStealTime(void); +static nat             natRandom(nat from, nat to); +static PEs             findRandomPE(PEs proc); +static void            sortPEsByTime (PEs proc, PEs *pes_by_time,  +				      nat *firstp, nat *np); + +void GetRoots(void); + +#endif /* GRAN */ + +//@node Constants and Variables, Initialisation, Prototypes and externs, GranSim specific code +//@subsection Constants and Variables + +#if defined(GRAN) || defined(PAR) +/* See GranSim.h for the definition of the enum gran_event_types */ +char *gran_event_names[] = { +    "START", "START(Q)", +    "STEALING", "STOLEN", "STOLEN(Q)", +    "FETCH", "REPLY", "BLOCK", "RESUME", "RESUME(Q)", +    "SCHEDULE", "DESCHEDULE", +    "END", +    "SPARK", "SPARKAT", "USED", "PRUNED", "EXPORTED", "ACQUIRED", +    "ALLOC", +    "TERMINATE", +    "SYSTEM_START", "SYSTEM_END",           /* only for debugging */ +    "??" +}; +#endif + +#if defined(GRAN)                                              /* whole file */ +char *proc_status_names[] = { +  "Idle", "Sparking", "Starting", "Fetching", "Fishing", "Busy",  +  "UnknownProcStatus" +}; + +/* For internal use (event statistics) only */ +char *event_names[] = +    { "ContinueThread", "StartThread", "ResumeThread",  +      "MoveSpark", "MoveThread", "FindWork", +      "FetchNode", "FetchReply", +      "GlobalBlock", "UnblockThread" +    }; + +//@cindex CurrentProc +PEs CurrentProc = 0; + +/* +  ToDo: Create a structure for the processor status and put all the  +        arrays below into it.  +  -- HWL */ + +//@cindex CurrentTime +/* One clock for each PE */ +rtsTime CurrentTime[MAX_PROC];   + +/* Useful to restrict communication; cf fishing model in GUM */ +nat OutstandingFetches[MAX_PROC], OutstandingFishes[MAX_PROC]; + +/* Status of each PE (new since but independent of GranSim Light) */ +rtsProcStatus procStatus[MAX_PROC]; + +# if defined(GRAN) && defined(GRAN_CHECK) +/* To check if the RTS ever tries to run a thread that should be blocked +   because of fetching remote data */ +StgTSO *BlockedOnFetch[MAX_PROC]; +# define FETCH_MASK_TSO  0x08000000      /* only bits 0, 1, 2 should be used */ +# endif + +nat SparksAvail = 0;     /* How many sparks are available */ +nat SurplusThreads = 0;  /* How many excess threads are there */ + +/* Do we need to reschedule following a fetch? */ +rtsBool NeedToReSchedule = rtsFalse, IgnoreEvents = rtsFalse, IgnoreYields = rtsFalse;  +rtsTime TimeOfNextEvent, TimeOfLastEvent, EndOfTimeSlice; /* checked from the threaded world! */ + +//@cindex spark queue +/* GranSim: a globally visible array of spark queues */ +rtsSparkQ pending_sparks_hds[MAX_PROC]; +rtsSparkQ pending_sparks_tls[MAX_PROC]; + +nat sparksIgnored = 0, sparksCreated = 0; + +GlobalGranStats globalGranStats; + +nat gran_arith_cost, gran_branch_cost, gran_load_cost,  +    gran_store_cost, gran_float_cost; + +/* +Old comment from 0.29. ToDo: Check and update -- HWL + +The following variables control the behaviour of GrAnSim. In general, there +is one RTS option for enabling each of these features. In getting the +desired setup of GranSim the following questions have to be answered: +\begin{itemize} +\item {\em Which scheduling algorithm} to use (@RtsFlags.GranFlags.DoFairSchedule@)?  +      Currently only unfair scheduling is supported. +\item What to do when remote data is fetched (@RtsFlags.GranFlags.DoAsyncFetch@)?  +      Either block and wait for the +      data or reschedule and do some other work. +      Thus, if this variable is true, asynchronous communication is +      modelled. Block on fetch mainly makes sense for incremental fetching. + +      There is also a simplified fetch variant available +      (@RtsFlags.GranFlags.SimplifiedFetch@). This variant does not use events to model +      communication. It is faster but the results will be less accurate. +\item How aggressive to be in getting work after a reschedule on fetch +      (@RtsFlags.GranFlags.FetchStrategy@)? +      This is determined by the so-called {\em fetching +      strategy\/}. Currently, there are four possibilities: +      \begin{enumerate} +       \item Only run a runnable thread. +       \item Turn a spark into a thread, if necessary. +       \item Steal a remote spark, if necessary. +       \item Steal a runnable thread from another processor, if necessary. +      \end{itemize} +      The variable @RtsFlags.GranFlags.FetchStrategy@ determines how far to go in this list +      when rescheduling on a fetch. +\item Should sparks or threads be stolen first when looking for work +      (@RtsFlags.GranFlags.DoStealThreadsFirst@)?  +      The default is to steal sparks first (much cheaper). +\item Should the RTS use a lazy thread creation scheme +      (@RtsFlags.GranFlags.DoAlwaysCreateThreads@)?  By default yes i.e.\ sparks are only +      turned into threads when work is needed. Also note, that sparks +      can be discarded by the RTS (this is done in the case of an overflow +      of the spark pool). Setting @RtsFlags.GranFlags.DoAlwaysCreateThreads@  to @True@ forces +      the creation of threads at the next possibility (i.e.\ when new work +      is demanded the next time). +\item Should data be fetched closure-by-closure or in packets +      (@RtsFlags.GranFlags.DoBulkFetching@)? The default strategy is a GRIP-like incremental  +      (i.e.\ closure-by-closure) strategy. This makes sense in a +      low-latency setting but is bad in a high-latency system. Setting  +      @RtsFlags.GranFlags.DoBulkFetching@ to @True@ enables bulk (packet) fetching. Other +      parameters determine the size of the packets (@pack_buffer_size@) and the number of +      thunks that should be put into one packet (@RtsFlags.GranFlags.ThunksToPack@). +\item If there is no other possibility to find work, should runnable threads +      be moved to an idle processor (@RtsFlags.GranFlags.DoThreadMigration@)? In any case, the +      RTS tried to get sparks (either local or remote ones) first. Thread +      migration is very expensive, since a whole TSO has to be transferred +      and probably data locality becomes worse in the process. Note, that +      the closure, which will be evaluated next by that TSO is not +      transferred together with the TSO (that might block another thread). +\item Should the RTS distinguish between sparks created by local nodes and +      stolen sparks (@RtsFlags.GranFlags.PreferSparksOfLocalNodes@)?  The idea is to improve  +      data locality by preferring sparks of local nodes (it is more likely +      that the data for those sparks is already on the local processor).  +      However, such a distinction also imposes an overhead on the spark +      queue management, and typically a large number of sparks are +      generated during execution. By default this variable is set to @False@. +\item Should the RTS use granularity control mechanisms? The idea of a  +      granularity control mechanism is to make use of granularity +      information provided via annotation of the @par@ construct in order +      to prefer bigger threads when either turning a spark into a thread or +      when choosing the next thread to schedule. Currently, three such +      mechanisms are implemented: +      \begin{itemize} +        \item Cut-off: The granularity information is interpreted as a +              priority. If a threshold priority is given to the RTS, then +              only those sparks with a higher priority than the threshold  +              are actually created. Other sparks are immediately discarded. +              This is similar to a usual cut-off mechanism often used in  +              parallel programs, where parallelism is only created if the  +              input data is lage enough. With this option, the choice is  +              hidden in the RTS and only the threshold value has to be  +              provided as a parameter to the runtime system. +        \item Priority Sparking: This mechanism keeps priorities for sparks +              and chooses the spark with the highest priority when turning +              a spark into a thread. After that the priority information is +              discarded. The overhead of this mechanism comes from +              maintaining a sorted spark queue. +        \item Priority Scheduling: This mechanism keeps the granularity +              information for threads, to. Thus, on each reschedule the  +              largest thread is chosen. This mechanism has a higher +              overhead, as the thread queue is sorted, too. +       \end{itemize}   +\end{itemize} +*/ + +//@node Initialisation, Global Address Operations, Constants and Variables, GranSim specific code +//@subsection Initialisation + +void  +init_gr_stats (void) { +  memset(&globalGranStats, '\0', sizeof(GlobalGranStats)); +#if 0 +  /* event stats */ +  globalGranStats.noOfEvents = 0; +  for (i=0; i<MAX_EVENT; i++) globalGranStats.event_counts[i]=0; + +  /* communication stats */ +  globalGranStats.fetch_misses = 0; +  globalGranStats.tot_low_pri_sparks = 0; + +  /* obscure stats */   +  globalGranStats.rs_sp_count = 0; +  globalGranStats.rs_t_count = 0; +  globalGranStats.ntimes_total = 0,  +  globalGranStats.fl_total = 0; +  globalGranStats.no_of_steals = 0; + +  /* spark queue stats */ +  globalGranStats.tot_sq_len = 0,  +  globalGranStats.tot_sq_probes = 0;  +  globalGranStats.tot_sparks = 0; +  globalGranStats.withered_sparks = 0; +  globalGranStats.tot_add_threads = 0; +  globalGranStats.tot_tq_len = 0; +  globalGranStats.non_end_add_threads = 0; + +  /* thread stats */ +  globalGranStats.tot_threads_created = 0; +  for (i=0; i<MAX_PROC; i++) globalGranStats.threads_created_on_PE[i]=0; +#endif /* 0 */ +} + +//@node Global Address Operations, Global Event Queue, Initialisation, GranSim specific code +//@subsection Global Address Operations +/* +  ---------------------------------------------------------------------- +  Global Address Operations + +  These functions perform operations on the global-address (ga) part of a +  closure. The ga is the only new field (1 word) in a closure introduced by +  GrAnSim. It serves as a bitmask, indicating on which processor the +  closure is residing. Since threads are described by Thread State Object +  (TSO), which is nothing but another kind of closure, this scheme allows +  gives placement information about threads. + +  A ga is just a bitmask, so the operations on them are mainly bitmask +  manipulating functions. Note, that there are important macros like PROCS, +  IS_LOCAL_TO etc. They are defined in @GrAnSim.lh@. + +  NOTE: In GrAnSim-light we don't maintain placement information. This +  allows to simulate an arbitrary number of processors. The price we have +  to be is the lack of costing any communication properly. In short, +  GrAnSim-light is meant to reveal the maximal parallelism in a program. +  From an implementation point of view the important thing is: {\em +  GrAnSim-light does not maintain global-addresses}.  */ + +/* ga_to_proc returns the first processor marked in the bitmask ga. +   Normally only one bit in ga should be set. But for PLCs all bits +   are set. That shouldn't hurt since we only need IS_LOCAL_TO for PLCs */ +  +//@cindex ga_to_proc + +static inline PEs +ga_to_proc(StgWord ga) +{ +    PEs i; +    for (i = 0; i < RtsFlags.GranFlags.proc && !IS_LOCAL_TO(ga, i); i++); +    ASSERT(0<=i && i<RtsFlags.GranFlags.proc); +    return (i); +} + +/* NB: This takes a *node* rather than just a ga as input */ +//@cindex where_is +PEs +where_is(StgClosure *node) +{ return (ga_to_proc(PROCS(node))); } + +// debugging only +//@cindex is_unique +rtsBool +is_unique(StgClosure *node) +{  +  PEs i; +  rtsBool unique = rtsFalse; + +  for (i = 0; i < RtsFlags.GranFlags.proc ; i++) +    if (IS_LOCAL_TO(PROCS(node), i)) +      if (unique)          // exactly 1 instance found so far +	return rtsFalse;   // found a 2nd instance => not unique +      else  +	unique = rtsTrue;  // found 1st instance  +  ASSERT(unique);          // otherwise returned from within loop +  return (unique); +} + +//@cindex any_idle +static inline rtsBool +any_idle(void) { /* any (map (\ i -> procStatus[i] == Idle)) [0,..,MAX_PROC] */ + PEs i;  + rtsBool any_idle;  + for(i=0, any_idle=rtsFalse;  +     !any_idle && i<RtsFlags.GranFlags.proc;  +     any_idle = any_idle || procStatus[i] == Idle, i++)  + {} ; +} + +//@cindex idlers +static inline nat +idlers(void) {  /* number of idle PEs */ + PEs i, j;  + for(i=0, j=0; +     i<RtsFlags.GranFlags.proc;  +     j += (procStatus[i] == Idle) ? 1 : 0, i++)  + {} ; + return j; +} + +//@node Global Event Queue, Spark queue functions, Global Address Operations, GranSim specific code +//@subsection Global Event Queue +/* +The following routines implement an ADT of an event-queue (FIFO).  +ToDo: Put that in an own file(?) +*/ + +/* Pointer to the global event queue; events are currently malloc'ed */ +rtsEventQ EventHd = NULL; + +//@cindex get_next_event +rtsEvent * +get_next_event(void) +{ +  static rtsEventQ entry = NULL; + +  if (EventHd == NULL) { +    barf("No next event. This may be caused by a circular data dependency in the program."); +  } + +  if (entry != NULL) +    free((char *)entry); + +  if (RtsFlags.GranFlags.GranSimStats.Global) {     /* count events */ +    globalGranStats.noOfEvents++; +    globalGranStats.event_counts[EventHd->evttype]++; +  } + +  entry = EventHd; + +  IF_GRAN_DEBUG(event_trace, +	   print_event(entry)); + +  EventHd = EventHd->next; +  return(entry); +} + +/* When getting the time of the next event we ignore CONTINUETHREAD events: +   we don't want to be interrupted before the end of the current time slice +   unless there is something important to handle.  +*/ +//@cindex get_time_of_next_event +rtsTime +get_time_of_next_event(void) +{  +  rtsEventQ event = EventHd; + +  while (event != NULL && event->evttype==ContinueThread) { +    event = event->next; +  } +  if(event == NULL) +      return ((rtsTime) 0); +  else +      return (event->time); +} + +/* ToDo: replace malloc/free with a free list */ +//@cindex insert_event +void +insert_event(newentry) +rtsEvent *newentry; +{ +  rtsEventType evttype = newentry->evttype; +  rtsEvent *event, **prev; + +  /* if(evttype >= CONTINUETHREAD1) evttype = CONTINUETHREAD; */ + +  /* Search the queue and insert at the right point: +     FINDWORK before everything, CONTINUETHREAD after everything. + +     This ensures that we find any available work after all threads have +     executed the current cycle.  This level of detail would normally be +     irrelevant, but matters for ridiculously low latencies... +  */ + +  /* Changed the ordering: Now FINDWORK comes after everything but  +     CONTINUETHREAD. This makes sure that a MOVESPARK comes before a  +     FINDWORK. This is important when a GranSimSparkAt happens and +     DoAlwaysCreateThreads is turned on. Also important if a GC occurs +     when trying to build a new thread (see much_spark)  -- HWL 02/96  */ + +  if(EventHd == NULL) +    EventHd = newentry; +  else { +    for (event = EventHd, prev=(rtsEvent**)&EventHd;  +	 event != NULL;  +         prev = (rtsEvent**)&(event->next), event = event->next) { +      switch (evttype) { +        case FindWork: if ( event->time < newentry->time || +                            ( (event->time == newentry->time) && +			      (event->evttype != ContinueThread) ) ) +                         continue; +                       else +                         break; +        case ContinueThread: if ( event->time <= newentry->time ) +			       continue; +			     else +                               break; +        default: if ( event->time < newentry->time ||  +	              ((event->time == newentry->time) && +		       (event->evttype == newentry->evttype)) ) +		   continue; +		 else +                   break; +       } +       /* Insert newentry here (i.e. before event) */ +       *prev = newentry; +       newentry->next = event; +       break; +    } +    if (event == NULL) +      *prev = newentry; +  } +} + +//@cindex new_event +void +new_event(proc,creator,time,evttype,tso,node,spark) +PEs proc, creator; +rtsTime time; +rtsEventType evttype; +StgTSO *tso; +StgClosure *node; +rtsSpark *spark; +{ +  rtsEvent *newentry = (rtsEvent *) stgMallocBytes(sizeof(rtsEvent), "new_event"); + +  newentry->proc     = proc; +  newentry->creator  = creator; +  newentry->time     = time; +  newentry->evttype  = evttype; +  newentry->tso      = tso; +  newentry->node     = node; +  newentry->spark    = spark; +  newentry->gc_info  = 0; +  newentry->next     = NULL; + +  insert_event(newentry); + +  IF_DEBUG(gran,  +	   fprintf(stderr, "GRAN: new_event: \n");  +	   print_event(newentry)) +} + +//@cindex prepend_event +void +prepend_event(event)       /* put event at beginning of EventQueue */ +rtsEvent *event; +{				  /* only used for GC! */ + event->next = EventHd; + EventHd = event; +} + +//@cindex grab_event +rtsEventQ +grab_event(void)             /* undo prepend_event i.e. get the event */ +{			 /* at the head of EventQ but don't free anything */ + rtsEventQ event = EventHd; + + if (EventHd == NULL) { +   barf("No next event (in grab_event). This may be caused by a circular data dependency in the program."); + } + + EventHd = EventHd->next; + return (event); +} + +//@cindex traverse_eventq_for_gc +void  +traverse_eventq_for_gc(void) +{ + rtsEventQ event = EventHd; + StgWord bufsize; + StgClosure *closurep; + StgTSO *tsop; + StgPtr buffer, bufptr; + PEs proc, creator; + + /* Traverse eventq and replace every FETCHREPLY by a FETCHNODE for the +    orig closure (root of packed graph). This means that a graph, which is +    between processors at the time of GC is fetched again at the time when +    it would have arrived, had there been no GC. Slightly inaccurate but +    safe for GC. +    This is only needed for GUM style fetchng. -- HWL */ + if (!RtsFlags.GranFlags.DoBulkFetching) +   return; + + for(event = EventHd; event!=NULL; event=event->next) { +   if (event->evttype==FetchReply) { +     buffer = stgCast(StgPtr,event->node); +     ASSERT(buffer[PACK_FLAG_LOCN]==MAGIC_PACK_FLAG);  /* It's a pack buffer */ +     bufsize = buffer[PACK_SIZE_LOCN]; +     closurep = stgCast(StgClosure*,buffer[PACK_HDR_SIZE]); +     tsop = stgCast(StgTSO*,buffer[PACK_TSO_LOCN]); +     proc = event->proc; +     creator = event->creator;                 /* similar to unpacking */ +     for (bufptr=buffer+PACK_HDR_SIZE;  +	  bufptr<(buffer+bufsize); +	  bufptr++) { +	 // if ( (INFO_TYPE(INFO_PTR(*bufptr)) == INFO_SPEC_RBH_TYPE) || +	 //      (INFO_TYPE(INFO_PTR(*bufptr)) == INFO_GEN_RBH_TYPE) ) { +	   if ( GET_INFO(stgCast(StgClosure*,bufptr)) ) { +	     convertFromRBH(stgCast(StgClosure *,bufptr)); +	 } +     } +     free(buffer); +     event->evttype = FetchNode; +     event->proc    = creator; +     event->creator = proc; +     event->node    = closurep; +     event->tso     = tsop; +     event->gc_info = 0; +   } + } +} + +void +markEventQueue(void) +{  +  StgClosure *MarkRoot(StgClosure *root); // prototype + +  rtsEventQ event = EventHd; +  nat len; + +  /* iterate over eventq and register relevant fields in event as roots */ +  for(event = EventHd, len =  0; event!=NULL; event=event->next, len++) { +    switch (event->evttype) { +      case ContinueThread:   +	event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso); +	break; +      case StartThread:  +	event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso); +	event->node = (StgClosure *)MarkRoot((StgClosure *)event->node); +	break; +      case ResumeThread: +	event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso); +	event->node = (StgClosure *)MarkRoot((StgClosure *)event->node); +	break; +      case MoveSpark: +	event->spark->node = (StgClosure *)MarkRoot((StgClosure *)event->spark->node); +	break; +      case MoveThread: +	event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso); +	break; +      case FindWork: +	break; +      case FetchNode:  +	event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso); +	event->node = (StgClosure *)MarkRoot((StgClosure *)event->node); +  	break; +      case FetchReply: +	event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso); +	if (RtsFlags.GranFlags.DoBulkFetching) +	  // ToDo: traverse_eventw_for_gc if GUM-Fetching!!! HWL +	  belch("ghuH: packets in BulkFetching not marked as roots; mayb be fatal"); +	else +	  event->node = (StgTSO *)MarkRoot((StgClosure *)event->node); +	break; +      case GlobalBlock: +	event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso); +	event->node = (StgClosure *)MarkRoot((StgClosure *)event->node); +	break; +      case UnblockThread: +	event->tso = (StgTSO *)MarkRoot((StgClosure *)event->tso); +	event->node = (StgClosure *)MarkRoot((StgClosure *)event->node); +	break; +      default: +	barf("markEventQueue: trying to mark unknown event @ %p", event); +    }} +  IF_DEBUG(gc, +	   belch("GC: markEventQueue: %d events in queue", len)); +} + +/* +  Prune all ContinueThread events related to tso or node in the eventq. +  Currently used if a thread leaves STG land with ThreadBlocked status, +  i.e. it blocked on a closure and has been put on its blocking queue.  It +  will be reawakended via a call to awaken_blocked_queue. Until then no +  event effecting this tso should appear in the eventq.  A bit of a hack, +  because ideally we shouldn't generate such spurious ContinueThread events +  in the first place.   +*/ +//@cindex prune_eventq  +void  +prune_eventq(tso, node)  +StgTSO *tso;  +StgClosure *node;  +{ rtsEventQ prev = (rtsEventQ)NULL, event = EventHd; + +  /* node unused for now */  +  ASSERT(node==NULL);  +  /* tso must be valid, then */ +  ASSERT(tso!=END_TSO_QUEUE); +  while (event != NULL) { +    if (event->evttype==ContinueThread &&  +	(event->tso==tso)) { +      IF_GRAN_DEBUG(event_trace, // ToDo: use another debug flag +		    belch("prune_eventq: pruning ContinueThread event for TSO %d (%p) on PE %d @ %lx (%p)", +			  event->tso->id, event->tso, event->proc, event->time, event)); +      if (prev==(rtsEventQ)NULL) { // beginning of eventq +	EventHd = event->next; +	free(event);  +	event = EventHd; +      } else { +	prev->next = event->next; +	free(event);  +	event = prev->next; +      } +    } else { // no pruning necessary; go to next event +      prev = event; +      event = event->next; +    } +  } +} + +//@cindex print_event +void +print_event(event) +rtsEvent *event; +{ +  char str_tso[16], str_node[16]; +  StgThreadID tso_id; + +  if (event->tso==END_TSO_QUEUE) { +    strcpy(str_tso, "______"); +    tso_id = 0; +  } else {  +    sprintf(str_tso, "%p", event->tso); +    tso_id = (event->tso==NULL) ? 0 : event->tso->id; +  } +  if  (event->node==(StgClosure*)NULL) { +    strcpy(str_node, "______"); +  } else { +    sprintf(str_node, "%p", event->node); +  } +  // HWL: shouldn't be necessary; ToDo: nuke +  //str_tso[6]='\0'; +  //str_node[6]='\0'; + +  if (event==NULL) +    fprintf(stderr,"Evt: NIL\n"); +  else +    fprintf(stderr, "Evt: %s (%u), PE %u [%u], Time %lu, TSO %d (%s), Node %s\n", //"Evt: %s (%u), PE %u [%u], Time %u, TSO %s (%#l), Node %s\n", +	      event_names[event->evttype], event->evttype, +              event->proc, event->creator, event->time,  +	      tso_id, str_tso, str_node +	      /*, event->spark, event->next */ ); + +} + +//@cindex print_eventq +void +print_eventq(hd) +rtsEvent *hd; +{ +  rtsEvent *x; + +  fprintf(stderr,"Event Queue with root at %p:\n", hd); +  for (x=hd; x!=NULL; x=x->next) { +    print_event(x); +  } +} + +/*  +   Spark queue functions are now all  in Sparks.c!! +*/ +//@node Scheduling functions, Thread Queue routines, Spark queue functions, GranSim specific code +//@subsection Scheduling functions + +/*  +   These functions are variants of thread initialisation and therefore +   related to initThread and friends in Schedule.c. However, they are +   specific to a GranSim setup in storing more info in the TSO's statistics +   buffer and sorting the thread queues etc.   +*/ + +/* +   A large portion of startThread deals with maintaining a sorted thread +   queue, which is needed for the Priority Sparking option. Without that +   complication the code boils down to FIFO handling.   +*/ +//@cindex insertThread +void +insertThread(tso, proc) +StgTSO*     tso; +PEs         proc; +{ +  StgTSO *prev = NULL, *next = NULL; +  nat count = 0; +  rtsBool found = rtsFalse; + +  ASSERT(CurrentProc==proc); +  ASSERT(!is_on_queue(tso,proc)); +  /* Idle proc: put the thread on the run queue +     same for pri spark and basic version */ +  if (run_queue_hds[proc] == END_TSO_QUEUE) +    { +      /* too strong! +      ASSERT((CurrentProc==MainProc &&    +	      CurrentTime[MainProc]==0 && +	      procStatus[MainProc]==Idle) || +	     procStatus[proc]==Starting); +      */ +      run_queue_hds[proc] = run_queue_tls[proc] = tso; + +      CurrentTime[proc] += RtsFlags.GranFlags.Costs.threadqueuetime; + +      /* new_event of ContinueThread has been moved to do_the_startthread */ + +      /* too strong! +      ASSERT(procStatus[proc]==Idle ||  +             procStatus[proc]==Fishing ||  +             procStatus[proc]==Starting); +      procStatus[proc] = Busy; +      */ +      return; +    } + +  if (RtsFlags.GranFlags.Light) +    GranSimLight_insertThread(tso, proc); + +  /* Only for Pri Scheduling: find place where to insert tso into queue */ +  if (RtsFlags.GranFlags.DoPriorityScheduling && tso->gran.pri!=0) +    /* {add_to_spark_queue}vo' jInIHta'; Qu' wa'DIch yIleghQo' */ +    for (prev = run_queue_hds[proc], next = run_queue_hds[proc]->link, count=0; +	 (next != END_TSO_QUEUE) &&  +	 !(found = tso->gran.pri >= next->gran.pri); +	 prev = next, next = next->link, count++)  +      {  +       ASSERT((prev!=(StgTSO*)NULL || next==run_queue_hds[proc]) && +	      (prev==(StgTSO*)NULL || prev->link==next)); +      } + +  ASSERT(!found || next != END_TSO_QUEUE); +  ASSERT(procStatus[proc]!=Idle); +  +  if (found) { +     /* found can only be rtsTrue if pri scheduling enabled */  +     ASSERT(RtsFlags.GranFlags.DoPriorityScheduling); +     if (RtsFlags.GranFlags.GranSimStats.Global)  +       globalGranStats.non_end_add_threads++; +     /* Add tso to ThreadQueue between prev and next */ +     tso->link = next; +     if ( next == (StgTSO*)END_TSO_QUEUE ) { +       run_queue_tl = tso; +     } else { +       /* no back link for TSO chain */ +     } +      +     if ( prev == (StgTSO*)END_TSO_QUEUE ) { +       /* Never add TSO as first elem of thread queue; the first */ +       /* element should be the one that is currently running -- HWL */ +       IF_DEBUG(gran, +		belch("GRAN: Qagh: NewThread (w/ PriorityScheduling): Trying to add TSO %p (PRI=%d) as first elem of threadQ (%p) on proc %u (@ %u)\n", +		    tso, tso->gran.pri, run_queue_hd, proc, +		    CurrentTime[proc])); +     } else { +      prev->link = tso; +     } +  } else { /* !found */ /* or not pri sparking! */ +    /* Add TSO to the end of the thread queue on that processor */ +    run_queue_tls[proc]->link = tso; +    run_queue_tls[proc] = tso; +  } +  ASSERT(RtsFlags.GranFlags.DoPriorityScheduling || count==0); +  CurrentTime[proc] += count * RtsFlags.GranFlags.Costs.pri_sched_overhead + +                       RtsFlags.GranFlags.Costs.threadqueuetime; + +  /* ToDo: check if this is still needed -- HWL  +  if (RtsFlags.GranFlags.DoThreadMigration) +    ++SurplusThreads; + +  if (RtsFlags.GranFlags.GranSimStats.Full && +      !(( event_type == GR_START || event_type == GR_STARTQ) &&  +	RtsFlags.GranFlags.labelling) ) +    DumpRawGranEvent(proc, creator, event_type+1, tso, node,  +	             tso->gran.sparkname, spark_queue_len(proc)); +  */ + +# if defined(GRAN_CHECK) +  /* Check if thread queue is sorted. Only for testing, really!  HWL */ +  if ( RtsFlags.GranFlags.DoPriorityScheduling &&  +       (RtsFlags.GranFlags.Debug.sortedQ) ) { +    rtsBool sorted = rtsTrue; +    StgTSO *prev, *next; + +    if (run_queue_hds[proc]==END_TSO_QUEUE ||  +	run_queue_hds[proc]->link==END_TSO_QUEUE) { +      /* just 1 elem => ok */ +    } else { +      /* Qu' wa'DIch yIleghQo' (ignore first elem)! */ +      for (prev = run_queue_hds[proc]->link, next = prev->link; +	   (next != END_TSO_QUEUE) ; +	   prev = next, next = prev->link) { +	ASSERT((prev!=(StgTSO*)NULL || next==run_queue_hds[proc]) && +	       (prev==(StgTSO*)NULL || prev->link==next)); +	sorted = sorted &&  +	         (prev->gran.pri >= next->gran.pri); +      } +    } +    if (!sorted) { +      fprintf(stderr,"Qagh: THREADQ on PE %d is not sorted:\n", +	      CurrentProc); +      G_THREADQ(run_queue_hd,0x1); +    } +  } +# endif +} + +/* +  insertThread, which is only used for GranSim Light, is similar to +  startThread in that it adds a TSO to a thread queue. However, it assumes +  that the thread queue is sorted by local clocks and it inserts the TSO at +  the right place in the queue. Don't create any event, just insert.   +*/ +//@cindex GranSimLight_insertThread +rtsBool +GranSimLight_insertThread(tso, proc) +StgTSO* tso; +PEs proc; +{ +  StgTSO *prev, *next; +  nat count = 0; +  rtsBool found = rtsFalse; + +  ASSERT(RtsFlags.GranFlags.Light); + +  /* In GrAnSim-Light we always have an idle `virtual' proc. +     The semantics of the one-and-only thread queue is different here: +     all threads in the queue are running (each on its own virtual processor); +     the queue is only needed internally in the simulator to interleave the +     reductions of the different processors. +     The one-and-only thread queue is sorted by the local clocks of the TSOs. +  */ +  ASSERT(run_queue_hds[proc] != END_TSO_QUEUE); +  ASSERT(tso->link == END_TSO_QUEUE); + +  /* If only one thread in queue so far we emit DESCHEDULE in debug mode */ +  if (RtsFlags.GranFlags.GranSimStats.Full && +      (RtsFlags.GranFlags.Debug.checkLight) &&  +      (run_queue_hd->link == END_TSO_QUEUE)) { +    DumpRawGranEvent(proc, proc, GR_DESCHEDULE, +		     run_queue_hds[proc], (StgClosure*)NULL,  +		     tso->gran.sparkname, spark_queue_len(proc)); // ToDo: check spar_queue_len +    // resched = rtsTrue; +  } + +  /* this routine should only be used in a GrAnSim Light setup */ +  /* && CurrentProc must be 0 in GrAnSim Light setup */ +  ASSERT(RtsFlags.GranFlags.Light && CurrentProc==0); + +  /* Idle proc; same for pri spark and basic version */ +  if (run_queue_hd==END_TSO_QUEUE) +    { +      run_queue_hd = run_queue_tl = tso; +      /* MAKE_BUSY(CurrentProc); */ +      return rtsTrue; +    } + +  for (prev = run_queue_hds[proc], next = run_queue_hds[proc]->link, count = 0; +       (next != END_TSO_QUEUE) &&  +       !(found = (tso->gran.clock < next->gran.clock)); +       prev = next, next = next->link, count++)  +    {  +       ASSERT((prev!=(StgTSO*)NULL || next==run_queue_hds[proc]) && +	      (prev==(StgTSO*)NULL || prev->link==next)); +    } + +  /* found can only be rtsTrue if pri sparking enabled */  +  if (found) { +     /* Add tso to ThreadQueue between prev and next */ +     tso->link = next; +     if ( next == END_TSO_QUEUE ) { +       run_queue_tls[proc] = tso; +     } else { +       /* no back link for TSO chain */ +     } +      +     if ( prev == END_TSO_QUEUE ) { +       run_queue_hds[proc] = tso; +     } else { +       prev->link = tso; +     } +  } else { /* !found */ /* or not pri sparking! */ +    /* Add TSO to the end of the thread queue on that processor */ +    run_queue_tls[proc]->link = tso; +    run_queue_tls[proc] = tso; +  } + +  if ( prev == END_TSO_QUEUE ) {        /* new head of queue */ +    new_event(proc, proc, CurrentTime[proc], +	      ContinueThread, +	      tso, (StgClosure*)NULL, (rtsSpark*)NULL); +  } +  /* +  if (RtsFlags.GranFlags.GranSimStats.Full &&  +      !(( event_type == GR_START || event_type == GR_STARTQ) &&  +	RtsFlags.GranFlags.labelling) ) +    DumpRawGranEvent(proc, creator, gr_evttype, tso, node, +		     tso->gran.sparkname, spark_queue_len(proc)); +  */ +  return rtsTrue; +} + +/* +  endThread is responsible for general clean-up after the thread tso has +  finished. This includes emitting statistics into the profile etc.   +*/ +void +endThread(StgTSO *tso, PEs proc)  +{ +  ASSERT(procStatus[proc]==Busy);        // coming straight out of STG land +  ASSERT(tso->whatNext==ThreadComplete); +  // ToDo: prune ContinueThreads for this TSO from event queue +  DumpEndEvent(proc, tso, rtsFalse /* not mandatory */); + +  /* if this was the last thread on this PE then make it Idle */ +  if (run_queue_hds[proc]==END_TSO_QUEUE) { +    procStatus[CurrentProc] = Idle; +  } +} + +//@node Thread Queue routines, GranSim functions, Scheduling functions, GranSim specific code +//@subsection Thread Queue routines + +/*  +   Check whether given tso resides on the run queue of the current processor. +   Only used for debugging. +*/ +    +//@cindex is_on_queue +rtsBool +is_on_queue (StgTSO *tso, PEs proc)  +{ +  StgTSO *t; +  rtsBool found; + +  for (t=run_queue_hds[proc], found=rtsFalse;  +       t!=END_TSO_QUEUE && !(found = t==tso); +       t=t->link) +    /* nothing */ ; + +  return found; +} + +/* This routine  is only  used for keeping   a statistics  of thread  queue +   lengths to evaluate the impact of priority scheduling. -- HWL  +   {spark_queue_len}vo' jInIHta' +*/ +//@cindex thread_queue_len +nat +thread_queue_len(PEs proc)  +{ + StgTSO *prev, *next; + nat len; + + for (len = 0, prev = END_TSO_QUEUE, next = run_queue_hds[proc]; +      next != END_TSO_QUEUE;  +      len++, prev = next, next = prev->link) +   {} + + return (len); +} + +//@node GranSim functions, GranSimLight routines, Thread Queue routines, GranSim specific code +//@subsection GranSim functions + +/* -----------------------------------------------------------------  */ +/* The main event handling functions; called from Schedule.c (schedule) */ +/* -----------------------------------------------------------------  */ +  +//@cindex do_the_globalblock + +void  +do_the_globalblock(rtsEvent* event) +{  +  PEs proc          = event->proc;        /* proc that requested node */ +  StgTSO *tso       = event->tso;         /* tso that requested node */ +  StgClosure  *node = event->node;        /* requested, remote node */ + +  IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the GlobalBlock\n")); +  /* There should be no GLOBALBLOCKs in GrAnSim Light setup */ +  ASSERT(!RtsFlags.GranFlags.Light); +  /* GlobalBlock events only valid with GUM fetching */ +  ASSERT(RtsFlags.GranFlags.DoBulkFetching); + +  IF_GRAN_DEBUG(bq, // globalBlock, +    if (IS_LOCAL_TO(PROCS(node),proc)) { +      belch("## Qagh: GlobalBlock: Blocking TSO %d (%p) on LOCAL node %p (PE %d).\n", +	    tso->id, tso, node, proc); +    }); + +  /* CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.munpacktime; */ +  if ( blockFetch(tso,proc,node) != 0 ) +    return;                     /* node has become local by now */ + +#if 0 + ToDo: check whether anything has to be done at all after blockFetch -- HWL + +  if (!RtsFlags.GranFlags.DoAsyncFetch) { /* head of queue is next thread */ +    StgTSO* tso = run_queue_hds[proc];       /* awaken next thread */ +    if (tso != (StgTSO*)NULL) { +      new_event(proc, proc, CurrentTime[proc], +		ContinueThread, +		tso, (StgClosure*)NULL, (rtsSpark*)NULL); +      CurrentTime[proc] += RtsFlags.GranFlags.Costs.threadcontextswitchtime; +      if (RtsFlags.GranFlags.GranSimStats.Full) +        DumpRawGranEvent(proc, CurrentProc, GR_SCHEDULE, tso, +			 (StgClosure*)NULL, tso->gran.sparkname, spark_queue_len(CurrentProc));  // ToDo: check sparkname and spar_queue_len +      procStatus[proc] = Busy;                  /* might have been fetching */ +    } else { +      procStatus[proc] = Idle;                     /* no work on proc now */ +    } +  } else {  /* RtsFlags.GranFlags.DoAsyncFetch i.e. block-on-fetch */ +	      /* other thread is already running */ +	      /* 'oH 'utbe' 'e' vIHar ; I think that's not needed -- HWL  +	      new_event(proc,proc,CurrentTime[proc], +		       CONTINUETHREAD,EVENT_TSO(event), +		       (RtsFlags.GranFlags.DoBulkFetching ? closure : +		       EVENT_NODE(event)),NULL); +	      */ +  } +#endif +} + +//@cindex do_the_unblock + +void  +do_the_unblock(rtsEvent* event)  +{ +  PEs proc = event->proc,       /* proc that requested node */ +      creator = event->creator; /* proc that requested node */ +  StgTSO* tso = event->tso;     /* tso that requested node */ +  StgClosure* node = event->node;  /* requested, remote node */ +   +  IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the UnBlock\n")) +  /* There should be no UNBLOCKs in GrAnSim Light setup */ +  ASSERT(!RtsFlags.GranFlags.Light); +  /* UnblockThread means either FetchReply has arrived or +     a blocking queue has been awakened; +     ToDo: check with assertions +  ASSERT(procStatus[proc]==Fetching || IS_BLACK_HOLE(event->node)); +  */ +  if (!RtsFlags.GranFlags.DoAsyncFetch) {  /* block-on-fetch */ +    /* We count block-on-fetch as normal block time */     +    tso->gran.blocktime += CurrentTime[proc] - tso->gran.blockedat; +    /* Dumping now done when processing the event +       No costs for contextswitch or thread queueing in this case  +       if (RtsFlags.GranFlags.GranSimStats.Full) +         DumpRawGranEvent(proc, CurrentProc, GR_RESUME, tso,  +                          (StgClosure*)NULL, tso->gran.sparkname, spark_queue_len(CurrentProc)); +    */ +    /* Maybe do this in FetchReply already  +    if (procStatus[proc]==Fetching) +      procStatus[proc] = Busy; +    */ +    /* +    new_event(proc, proc, CurrentTime[proc], +	      ContinueThread, +	      tso, node, (rtsSpark*)NULL); +    */ +  } else { +    /* Asynchr comm causes additional costs here: */ +    /* Bring the TSO from the blocked queue into the threadq */ +  } +  /* In all cases, the UnblockThread causes a ResumeThread to be scheduled */ +  new_event(proc, proc,  +	    CurrentTime[proc]+RtsFlags.GranFlags.Costs.threadqueuetime, +	    ResumeThread, +	    tso, node, (rtsSpark*)NULL); +} + +//@cindex do_the_fetchnode + +void +do_the_fetchnode(rtsEvent* event) +{ +  PEs proc = event->proc,       /* proc that holds the requested node */ +      creator = event->creator; /* proc that requested node */ +  StgTSO* tso = event->tso; +  StgClosure* node = event->node;  /* requested, remote node */ +  rtsFetchReturnCode rc; + +  ASSERT(CurrentProc==proc); +  /* There should be no FETCHNODEs in GrAnSim Light setup */ +  ASSERT(!RtsFlags.GranFlags.Light); + +  IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the FetchNode\n")); + +  CurrentTime[proc] += RtsFlags.GranFlags.Costs.munpacktime; + +  /* ToDo: check whether this is the right place for dumping the event */ +  if (RtsFlags.GranFlags.GranSimStats.Full) +    DumpRawGranEvent(creator, proc, GR_FETCH, tso, node, (StgInt)0, 0); + +  do { +    rc = handleFetchRequest(node, proc, creator, tso); +    if (rc == OutOfHeap) {                                   /* trigger GC */ +# if defined(GRAN_CHECK)  && defined(GRAN) +     if (RtsFlags.GcFlags.giveStats) +       fprintf(RtsFlags.GcFlags.statsFile,"*****   veQ boSwI'  PackNearbyGraph(node %p, tso %p (%d))\n", +     	        node, tso, tso->id); +# endif +     prepend_event(event); +     GarbageCollect(GetRoots);  +     // HWL: ToDo: check whether a ContinueThread has to be issued +     // HWL old: ReallyPerformThreadGC(PACK_HEAP_REQUIRED, rtsFalse); +# if defined(GRAN_CHECK)  && defined(GRAN) +     if (RtsFlags.GcFlags.giveStats) { +       fprintf(RtsFlags.GcFlags.statsFile,"*****      SAVE_Hp=%p, SAVE_HpLim=%p, PACK_HEAP_REQUIRED=%d\n", +     	        Hp, HpLim, 0) ; // PACK_HEAP_REQUIRED);  ??? +       fprintf(stderr,"*****      No. of packets so far: %d (total size: %d)\n",  +     	        globalGranStats.tot_packets, globalGranStats.tot_packet_size); +     } +# endif  +     event = grab_event(); +     // Hp -= PACK_HEAP_REQUIRED; // ??? + +     /* GC knows that events are special and follows the pointer i.e. */ +     /* events are valid even if they moved. An EXIT is triggered */ +     /* if there is not enough heap after GC. */ +    } +  } while (rc == OutOfHeap); +} + +//@cindex do_the_fetchreply +void  +do_the_fetchreply(rtsEvent* event) +{ +  PEs proc = event->proc,       /* proc that requested node */ +      creator = event->creator; /* proc that holds the requested node */ +  StgTSO* tso = event->tso; +  StgClosure* node = event->node;  /* requested, remote node */ +  StgClosure* closure=(StgClosure*)NULL; + +  ASSERT(CurrentProc==proc); +  ASSERT(RtsFlags.GranFlags.DoAsyncFetch || procStatus[proc]==Fetching); + +  IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the FetchReply\n")); +  /* There should be no FETCHREPLYs in GrAnSim Light setup */ +  ASSERT(!RtsFlags.GranFlags.Light); + +  /* assign message unpack costs *before* dumping the event */ +  CurrentTime[proc] += RtsFlags.GranFlags.Costs.munpacktime; +   +  /* ToDo: check whether this is the right place for dumping the event */ +  if (RtsFlags.GranFlags.GranSimStats.Full) +    DumpRawGranEvent(proc, creator, GR_REPLY, tso, node,  +  		      tso->gran.sparkname, spark_queue_len(proc)); + +  /* THIS SHOULD NEVER HAPPEN  +     If tso is in the BQ of node this means that it actually entered the  +     remote closure, due to a missing GranSimFetch at the beginning of the  +     entry code; therefore, this is actually a faked fetch, triggered from  +     within GranSimBlock;  +     since tso is both in the EVQ and the BQ for node, we have to take it out  +     of the BQ first before we can handle the FetchReply; +     ToDo: special cases in awaken_blocked_queue, since the BQ magically moved. +  */ +  if (tso->blocked_on!=(StgClosure*)NULL) { +    IF_GRAN_DEBUG(bq, +		  belch("## ghuH: TSO %d (%p) in FetchReply is blocked on node %p (shouldn't happen AFAIK)", +			tso->id, tso, node)); +    // unlink_from_bq(tso, node); +  } +     +  if (RtsFlags.GranFlags.DoBulkFetching) {      /* bulk (packet) fetching */ +    rtsPackBuffer *buffer = (rtsPackBuffer*)node; +    nat size = buffer->size; +   +    /* NB: Fetch misses can't occur with GUM fetching, as */ +    /* updatable closure are turned into RBHs and therefore locked */ +    /* for other processors that try to grab them. */ +   +    closure = UnpackGraph(buffer); +    CurrentTime[proc] += size * RtsFlags.GranFlags.Costs.munpacktime; +  } else  // incremental fetching +      /* Copy or  move node to CurrentProc */ +      if (fetchNode(node, creator, proc)) { +        /* Fetch has failed i.e. node has been grabbed by another PE */ +        PEs p = where_is(node); +        rtsTime fetchtime; +      +	if (RtsFlags.GranFlags.GranSimStats.Global) +	  globalGranStats.fetch_misses++; + +	IF_GRAN_DEBUG(thunkStealing, +		 belch("== Qu'vatlh! fetch miss @ %u: node %p is at proc %u (rather than proc %u)\n", +		       CurrentTime[proc],node,p,creator)); + +	CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.mpacktime; +	 +	/* Count fetch again !? */ +	++(tso->gran.fetchcount); +	tso->gran.fetchtime += RtsFlags.GranFlags.Costs.fetchtime; +         +	fetchtime = stg_max(CurrentTime[CurrentProc],CurrentTime[p]) + +		    RtsFlags.GranFlags.Costs.latency; +	 +	/* Chase the grabbed node */ +	new_event(p, proc, fetchtime, +		  FetchNode, +		  tso, node, (rtsSpark*)NULL); + +# if 0 && defined(GRAN_CHECK) && defined(GRAN) /* Just for testing */ +       IF_GRAN_DEBUG(blockOnFetch, +		     BlockedOnFetch[CurrentProc] = tso;) /*-rtsTrue;-*/ +	 +       IF_GRAN_DEBUG(blockOnFetch_sanity, +		     tso->type |= FETCH_MASK_TSO;) +# endif + +        CurrentTime[proc] += RtsFlags.GranFlags.Costs.mtidytime; +	 +        return; /* NB: no REPLy has been processed; tso still sleeping */ +    } + +    /* -- Qapla'! Fetch has been successful; node is here, now  */ +    ++(event->tso->gran.fetchcount); +    event->tso->gran.fetchtime += RtsFlags.GranFlags.Costs.fetchtime; + +    /* this is now done at the beginning of this routine +    if (RtsFlags.GranFlags.GranSimStats.Full) +       DumpRawGranEvent(proc,event->creator, GR_REPLY, event->tso, +			(RtsFlags.GranFlags.DoBulkFetching ?  +			       closure :  +			       event->node), +                        tso->gran.sparkname, spark_queue_len(proc)); +    */ + +    --OutstandingFetches[proc]; +    ASSERT(OutstandingFetches[proc] >= 0); +    new_event(proc, proc, CurrentTime[proc], +	      ResumeThread, +	      event->tso, (RtsFlags.GranFlags.DoBulkFetching ?  +			   closure :  +			   event->node), +	      (rtsSpark*)NULL); +} + +//@cindex do_the_movethread + +void +do_the_movethread(rtsEvent* event) { +  PEs proc = event->proc,       /* proc that requested node */ +      creator = event->creator; /* proc that holds the requested node */ +  StgTSO* tso = event->tso; + + IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the MoveThread\n")); + + ASSERT(CurrentProc==proc); + /* There should be no MOVETHREADs in GrAnSim Light setup */ + ASSERT(!RtsFlags.GranFlags.Light); + /* MOVETHREAD events should never occur without -bM */ + ASSERT(RtsFlags.GranFlags.DoThreadMigration); + /* Bitmask of moved thread should be 0 */ + ASSERT(PROCS(tso)==0); + ASSERT(procStatus[proc] == Fishing || +	RtsFlags.GranFlags.DoAsyncFetch); + ASSERT(OutstandingFishes[proc]>0); + + /* ToDo: exact costs for unpacking the whole TSO  */ + CurrentTime[proc] +=  5l * RtsFlags.GranFlags.Costs.munpacktime; + + /* ToDo: check whether this is the right place for dumping the event */ + if (RtsFlags.GranFlags.GranSimStats.Full) +   DumpRawGranEvent(proc, creator,  +		    GR_STOLEN, tso, (StgClosure*)NULL, (StgInt)0, 0); + + // ToDo: check cost functions + --OutstandingFishes[proc]; + SET_GRAN_HDR(tso, ThisPE);         // adjust the bitmask for the TSO + insertThread(tso, proc); + + if (procStatus[proc]==Fishing) +   procStatus[proc] = Idle; + + if (RtsFlags.GranFlags.GranSimStats.Global) +   globalGranStats.tot_TSOs_migrated++; +} + +//@cindex do_the_movespark + +void +do_the_movespark(rtsEvent* event) { + PEs proc = event->proc,       /* proc that requested spark */ +     creator = event->creator; /* proc that holds the requested spark */ + StgTSO* tso = event->tso; + rtsSparkQ spark = event->spark; + + IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the MoveSpark\n")) + + ASSERT(CurrentProc==proc); + ASSERT(spark!=NULL); + ASSERT(procStatus[proc] == Fishing || +	RtsFlags.GranFlags.DoAsyncFetch); + ASSERT(OutstandingFishes[proc]>0);  + + CurrentTime[proc] += RtsFlags.GranFlags.Costs.munpacktime; +           + /* record movement of spark only if spark profiling is turned on */ + if (RtsFlags.GranFlags.GranSimStats.Sparks) +    DumpRawGranEvent(proc, creator, +		     SP_ACQUIRED, +		     tso, spark->node, spark->name, spark_queue_len(proc)); + + /* global statistics */ + if ( RtsFlags.GranFlags.GranSimStats.Global && +      !closure_SHOULD_SPARK(spark->node)) +   globalGranStats.withered_sparks++; +   /* Not adding the spark to the spark queue would be the right */ +   /* thing here, but it also would be cheating, as this info can't be */ +   /* available in a real system. -- HWL */ + + --OutstandingFishes[proc]; + + add_to_spark_queue(spark); + + IF_GRAN_DEBUG(randomSteal, // ToDo: spark-distribution flag +	       print_sparkq_stats()); + + /* Should we treat stolen sparks specially? Currently, we don't. */ + + if (procStatus[proc]==Fishing) +   procStatus[proc] = Idle; + + /* add_to_spark_queue will increase the time of the current proc. */ + /* +   If proc was fishing, it is Idle now with the new spark in its spark +   pool. This means that the next time handleIdlePEs is called, a local +   FindWork will be created on this PE to turn the spark into a thread. Of +   course another PE might steal the spark in the meantime (that's why we +   are using events rather than inlining all the operations in the first +   place). */ +} + +/* +  In the Constellation class version of GranSim the semantics of StarThread +  events has changed. Now, StartThread has to perform 3 basic operations: +   - create a new thread (previously this was done in ActivateSpark); +   - insert the thread into the run queue of the current processor +   - generate a new event for actually running the new thread +  Note that the insertThread is called via createThread.  +*/ +   +//@cindex do_the_startthread + +void +do_the_startthread(rtsEvent *event) +{ +  PEs proc          = event->proc;        /* proc that requested node */ +  StgTSO *tso       = event->tso;         /* tso that requested node */ +  StgClosure  *node = event->node;        /* requested, remote node */ +  rtsSpark *spark   = event->spark; +  GranEventType gr_evttype; + +  ASSERT(CurrentProc==proc); +  ASSERT(!RtsFlags.GranFlags.Light || CurrentProc==0); +  ASSERT(event->evttype == ResumeThread || event->evttype == StartThread); +  /* if this was called via StartThread: */ +  ASSERT(event->evttype!=StartThread || tso == END_TSO_QUEUE); // not yet created +  // ToDo: check: ASSERT(event->evttype!=StartThread || procStatus[proc]==Starting); +  /* if this was called via ResumeThread: */ +  ASSERT(event->evttype!=ResumeThread ||  +	   RtsFlags.GranFlags.DoAsyncFetch ||!is_on_queue(tso,proc));  + +  /* startThread may have been called from the main event handler upon +     finding either a ResumeThread or a StartThread event; set the +     gr_evttype (needed for writing to .gr file) accordingly */ +  // gr_evttype = (event->evttype == ResumeThread) ? GR_RESUME : GR_START; + +  if ( event->evttype == StartThread ) { +    GranEventType gr_evttype = (run_queue_hds[proc]==END_TSO_QUEUE) ?  +                                 GR_START : GR_STARTQ; + +    tso = createThread(BLOCK_SIZE_W, spark->gran_info);// implicit insertThread! +    pushClosure(tso, node); + +    // ToDo: fwd info on local/global spark to thread -- HWL +    // tso->gran.exported =  spark->exported; +    // tso->gran.locked =   !spark->global; +    tso->gran.sparkname = spark->name; + +    ASSERT(CurrentProc==proc); +    if (RtsFlags.GranFlags.GranSimStats.Full) +      DumpGranEvent(gr_evttype,tso); + +    CurrentTime[proc] += RtsFlags.GranFlags.Costs.threadcreatetime; +  } else { // event->evttype == ResumeThread +    GranEventType gr_evttype = (run_queue_hds[proc]==END_TSO_QUEUE) ?  +                                 GR_RESUME : GR_RESUMEQ; + +    insertThread(tso, proc); + +    ASSERT(CurrentProc==proc); +    if (RtsFlags.GranFlags.GranSimStats.Full) +      DumpGranEvent(gr_evttype,tso); +  } + +  ASSERT(run_queue_hds[proc]!=END_TSO_QUEUE); // non-empty run queue +  procStatus[proc] = Busy; +  /* make sure that this thread is actually run */ +  new_event(proc, proc,  +	    CurrentTime[proc], +	    ContinueThread, +	    tso, node, (rtsSpark*)NULL); +   +  /* A wee bit of statistics gathering */ +  if (RtsFlags.GranFlags.GranSimStats.Global) { +    globalGranStats.tot_add_threads++; +    globalGranStats.tot_tq_len += thread_queue_len(CurrentProc); +  } + +} + +//@cindex do_the_findwork +void +do_the_findwork(rtsEvent* event)  +{ +  PEs proc = event->proc,       /* proc to search for work */ +      creator = event->creator; /* proc that requested work */ +  rtsSparkQ spark = event->spark; +  /* ToDo: check that this size is safe -- HWL */ +  nat req_heap = sizeofW(StgTSO) + MIN_STACK_WORDS; +                 // add this? -- HWL:RtsFlags.ConcFlags.stkChunkSize; + +  IF_DEBUG(gran, fprintf(stderr, "GRAN: doing the Findwork\n")); + +  /* If GUM style fishing is enabled, the contents of the spark field says +     what to steal (spark(1) or thread(2)); */ +  ASSERT(!(RtsFlags.GranFlags.Fishing && event->spark==(rtsSpark*)0)); + +  /* Make sure that we have enough heap for creating a new +     thread. This is a conservative estimate of the required heap. +     This eliminates special checks for GC around NewThread within +     ActivateSpark.                                                 */ +   +  if (Hp + req_heap > HpLim ) { +    IF_DEBUG(gc,  +	     belch("GC: Doing GC from within Findwork handling (that's bloody dangerous if you ask me)");) +      GarbageCollect(GetRoots); +      // ReallyPerformThreadGC(req_heap, rtsFalse);   old -- HWL +      Hp -= req_heap; +      if (procStatus[CurrentProc]==Sparking)  +	procStatus[CurrentProc]=Idle; +      return; +  } +   +  if ( RtsFlags.GranFlags.DoAlwaysCreateThreads || +       RtsFlags.GranFlags.Fishing || +       ((procStatus[proc]==Idle || procStatus[proc]==Sparking) && +	(RtsFlags.GranFlags.FetchStrategy >= 2 ||  +	 OutstandingFetches[proc] == 0)) )  +   { +    rtsBool found; +    rtsSparkQ  prev, spark; +     +    /* ToDo: check */ +    ASSERT(procStatus[proc]==Sparking || +	   RtsFlags.GranFlags.DoAlwaysCreateThreads || +	   RtsFlags.GranFlags.Fishing); +     +    /* SImmoHwI' yInej! Search spark queue! */ +    /* gimme_spark (event, &found, &spark); */ +    findLocalSpark(event, &found, &spark); + +    if (!found) { /* pagh vumwI' */ +      /* +        If no spark has been found this can mean 2 things: +	 1/ The FindWork was a fish (i.e. a message sent by another PE) and  +	    the spark pool of the receiver is empty +	    --> the fish has to be forwarded to another PE +         2/ The FindWork was local to this PE (i.e. no communication; in this +            case creator==proc) and the spark pool of the PE is not empty  +	    contains only sparks of closures that should not be sparked  +	    (note: if the spark pool were empty, handleIdlePEs wouldn't have  +	    generated a FindWork in the first place) +	    --> the PE has to be made idle to trigger stealing sparks the next +	        time handleIdlePEs is performed +      */  + +      ASSERT(pending_sparks_hds[proc]==(rtsSpark*)NULL); +      if (creator==proc) { +	/* local FindWork */ +	if (procStatus[proc]==Busy) { +	  belch("ghuH: PE %d in Busy state while processing local FindWork (spark pool is empty!) @ %lx", +		proc, CurrentTime[proc]); +	  procStatus[proc] = Idle; +	} +      } else { +	/* global FindWork i.e. a Fish */ +	ASSERT(RtsFlags.GranFlags.Fishing); +	/* actually this generates another request from the originating PE */ +	ASSERT(OutstandingFishes[creator]>0); +	OutstandingFishes[creator]--; +	/* ToDo: assign costs for sending fish to proc not to creator */ +	stealSpark(creator); /* might steal from same PE; ToDo: fix */ +	ASSERT(RtsFlags.GranFlags.maxFishes!=1 || procStatus[creator] == Fishing); +	/* any assertions on state of proc possible here? */ +      } +    } else { +      /* DaH chu' Qu' yIchen! Now create new work! */  +      IF_GRAN_DEBUG(findWork, +		    belch("+- munching spark %p; creating thread for node %p", +			  spark, spark->node)); +      activateSpark (event, spark); +      ASSERT(spark != (rtsSpark*)NULL); +      spark = delete_from_sparkq (spark, proc, rtsTrue); +    } + +    IF_GRAN_DEBUG(findWork, +		  belch("+- Contents of spark queues at the end of FindWork @ %lx", +			CurrentTime[proc]);  +		  print_sparkq_stats()); + +    /* ToDo: check ; not valid if GC occurs in ActivateSpark */ +    ASSERT(!found || +	    /* forward fish  or */ +	    (proc!=creator || +	    /* local spark  or */ +            (proc==creator && procStatus[proc]==Starting)) ||  +	   //(!found && procStatus[proc]==Idle) || +	   RtsFlags.GranFlags.DoAlwaysCreateThreads);  +   } else { +    IF_GRAN_DEBUG(findWork, +		  belch("+- RTS refuses to findWork on PE %d @ %lx", +			proc, CurrentTime[proc]); +		  belch("  procStatus[%d]=%s, fetch strategy=%d, outstanding fetches[%d]=%d",  +			proc, proc_status_names[procStatus[proc]], +			RtsFlags.GranFlags.FetchStrategy,  +			proc, OutstandingFetches[proc])); +   }   +} +  +//@node GranSimLight routines, Code for Fetching Nodes, GranSim functions, GranSim specific code +//@subsection GranSimLight routines + +/*  +   This code is called from the central scheduler after having rgabbed a +   new event and is only needed for GranSim-Light. It mainly adjusts the +   ActiveTSO so that all costs that have to be assigned from within the +   scheduler are assigned to the right TSO. The choice of ActiveTSO depends +   on the type of event that has been found.   +*/ + +void +GranSimLight_enter_system(event, ActiveTSOp) +rtsEvent *event; +StgTSO **ActiveTSOp; +{ +  StgTSO *ActiveTSO = *ActiveTSOp; + +  ASSERT (RtsFlags.GranFlags.Light); +   +  /* Restore local clock of the virtual processor attached to CurrentTSO. +     All costs will be associated to the `virt. proc' on which the tso +     is living. */ +  if (ActiveTSO != NULL) {                     /* already in system area */ +    ActiveTSO->gran.clock = CurrentTime[CurrentProc]; +    if (RtsFlags.GranFlags.DoFairSchedule) +      { +	if (RtsFlags.GranFlags.GranSimStats.Full && +	    RtsFlags.GranFlags.Debug.checkLight) +	  DumpGranEvent(GR_SYSTEM_END,ActiveTSO); +      } +  } +  switch (event->evttype) +    {  +    case ContinueThread:  +    case FindWork:       /* inaccurate this way */ +      ActiveTSO = run_queue_hd; +      break; +    case ResumeThread:    +    case StartThread: +    case MoveSpark:      /* has tso of virt proc in tso field of event */ +      ActiveTSO = event->tso; +      break; +    default: barf("Illegal event type %s (%d) in GrAnSim Light setup\n", +		  event_names[event->evttype],event->evttype); +    } +  CurrentTime[CurrentProc] = ActiveTSO->gran.clock; +  if (RtsFlags.GranFlags.DoFairSchedule) { +      if (RtsFlags.GranFlags.GranSimStats.Full && +	  RtsFlags.GranFlags.Debug.checkLight) +	DumpGranEvent(GR_SYSTEM_START,ActiveTSO); +  } +} + +void +GranSimLight_leave_system(event, ActiveTSOp) +rtsEvent *event; +StgTSO **ActiveTSOp; +{ +  StgTSO *ActiveTSO = *ActiveTSOp; + +  ASSERT(RtsFlags.GranFlags.Light); + +  /* Save time of `virt. proc' which was active since last getevent and +     restore time of `virt. proc' where CurrentTSO is living on. */ +  if(RtsFlags.GranFlags.DoFairSchedule) { +    if (RtsFlags.GranFlags.GranSimStats.Full && +	RtsFlags.GranFlags.Debug.checkLight) // ToDo: clean up flags +      DumpGranEvent(GR_SYSTEM_END,ActiveTSO); +  } +  ActiveTSO->gran.clock = CurrentTime[CurrentProc]; +  ActiveTSO = (StgTSO*)NULL; +  CurrentTime[CurrentProc] = CurrentTSO->gran.clock; +  if (RtsFlags.GranFlags.DoFairSchedule /* &&  resched */ ) { +    // resched = rtsFalse; +    if (RtsFlags.GranFlags.GranSimStats.Full && +	RtsFlags.GranFlags.Debug.checkLight) +      DumpGranEvent(GR_SCHEDULE,run_queue_hd); +  } +  /*  +     if (TSO_LINK(ThreadQueueHd)!=PrelBase_Z91Z93_closure && +     (TimeOfNextEvent == 0 || +     TSO_CLOCK(TSO_LINK(ThreadQueueHd))+1000<TimeOfNextEvent)) { +     new_event(CurrentProc,CurrentProc,TSO_CLOCK(TSO_LINK(ThreadQueueHd))+1000, +     CONTINUETHREAD,TSO_LINK(ThreadQueueHd),PrelBase_Z91Z93_closure,NULL); +     TimeOfNextEvent = get_time_of_next_event(); +     } +  */ +} + +//@node Code for Fetching Nodes, Idle PEs, GranSimLight routines, GranSim specific code +//@subsection Code for Fetching Nodes + +/* +   The following GrAnSim routines simulate the fetching of nodes from a +   remote processor. We use a 1 word bitmask to indicate on which processor +   a node is lying. Thus, moving or copying a node from one processor to +   another just requires an appropriate change in this bitmask (using +   @SET_GA@).  Additionally, the clocks have to be updated. + +   A special case arises when the node that is needed by processor A has +   been moved from a processor B to a processor C between sending out a +   @FETCH@ (from A) and its arrival at B. In that case the @FETCH@ has to +   be forwarded to C. This is simulated by issuing another FetchNode event +   on processor C with A as creator. +*/ +  +/* ngoqvam che' {GrAnSim}! */ + +/* Fetch node "node" to processor "p" */ + +//@cindex fetchNode + +rtsFetchReturnCode +fetchNode(node,from,to) +StgClosure* node; +PEs from, to; +{ +  /* In case of RtsFlags.GranFlags.DoBulkFetching this fct should never be  +     entered! Instead, UnpackGraph is used in ReSchedule */ +  StgClosure* closure; + +  ASSERT(to==CurrentProc); +  /* Should never be entered  in GrAnSim Light setup */ +  ASSERT(!RtsFlags.GranFlags.Light); +  /* fetchNode should never be entered with DoBulkFetching */ +  ASSERT(!RtsFlags.GranFlags.DoBulkFetching); + +  /* Now fetch the node */ +  if (!IS_LOCAL_TO(PROCS(node),from) && +      !IS_LOCAL_TO(PROCS(node),to) )  +    return NodeHasMoved; +   +  if (closure_HNF(node))                /* node already in head normal form? */ +    node->header.gran.procs |= PE_NUMBER(to);           /* Copy node */ +  else +    node->header.gran.procs = PE_NUMBER(to);            /* Move node */ + +  return Ok; +} + +/*  +   Process a fetch request.  +    +   Cost of sending a packet of size n = C + P*n +   where C = packet construction constant,  +         P = cost of packing one word into a packet +   [Should also account for multiple packets]. +*/ + +//@cindex handleFetchRequest + +rtsFetchReturnCode +handleFetchRequest(node,to,from,tso) +StgClosure* node;   // the node which is requested +PEs to, from;       // fetch request: from -> to +StgTSO* tso;        // the tso which needs the node +{ +  ASSERT(!RtsFlags.GranFlags.Light); +  /* ToDo: check assertion */ +  ASSERT(OutstandingFetches[from]>0); + +  /* probably wrong place; */ +  ASSERT(CurrentProc==to); + +  if (IS_LOCAL_TO(PROCS(node), from)) /* Somebody else moved node already => */ +    {                                 /* start tso */ +      IF_GRAN_DEBUG(thunkStealing, +		    fprintf(stderr,"ghuH: handleFetchRequest entered with local node %p (%s) (PE %d)\n",  +			    node, info_type(node), from)); + +      if (RtsFlags.GranFlags.DoBulkFetching) { +	nat size; +	rtsPackBuffer *graph; + +	/* Create a 1-node-buffer and schedule a FETCHREPLY now */ +	graph = PackOneNode(node, tso, &size);  +	new_event(from, to, CurrentTime[to], +		  FetchReply, +		  tso, graph, (rtsSpark*)NULL); +      } else { +	new_event(from, to, CurrentTime[to], +		  FetchReply, +		  tso, node, (rtsSpark*)NULL); +      } +      IF_GRAN_DEBUG(thunkStealing, +		    belch("== majQa'! closure %p is local on PE %d already (this is a good thing)", node, from)); +      return (NodeIsLocal); +    } +  else if (IS_LOCAL_TO(PROCS(node), to) )   /* Is node still here? */ +    { +      if (RtsFlags.GranFlags.DoBulkFetching) { /* {GUM}vo' ngoqvam vInIHta' */ +	nat size;                              /* (code from GUM) */ +	StgClosure* graph; + +	if (IS_BLACK_HOLE(node)) {   /* block on BH or RBH */ +	  new_event(from, to, CurrentTime[to], +		    GlobalBlock, +		    tso, node, (rtsSpark*)NULL); +	  /* Note: blockFetch is done when handling GLOBALBLOCK event;  +	           make sure the TSO stays out of the run queue */ +          /* When this thread is reawoken it does the usual: it tries to  +             enter the updated node and issues a fetch if it's remote. +             It has forgotten that it has sent a fetch already (i.e. a +             FETCHNODE is swallowed by a BH, leaving the thread in a BQ) */ +          --OutstandingFetches[from]; + +	  IF_GRAN_DEBUG(thunkStealing, +			belch("== majQa'! closure %p on PE %d is a BH (demander=PE %d); faking a FMBQ",  +			      node, to, from)); +	  if (RtsFlags.GranFlags.GranSimStats.Global) { +	    globalGranStats.tot_FMBQs++; +	  } +	  return (NodeIsBH); +	} + +	/* The tso requesting the node is blocked and cannot be on a run queue */ +	ASSERT(!is_on_queue(tso, from)); + +	if ((graph = PackNearbyGraph(node, tso, &size)) == NULL)  +	  return (OutOfHeap);  /* out of heap */ + +	/* Actual moving/copying of node is done on arrival; see FETCHREPLY */ +	/* Send a reply to the originator */ +	/* ToDo: Replace that by software costs for doing graph packing! */ +	CurrentTime[to] += size * RtsFlags.GranFlags.Costs.mpacktime; + +	new_event(from, to, +		  CurrentTime[to]+RtsFlags.GranFlags.Costs.latency, +		  FetchReply, +		  tso, (StgClosure *)graph, (rtsSpark*)NULL); +         +	CurrentTime[to] += RtsFlags.GranFlags.Costs.mtidytime; +	return (Ok); +      } else {                   /* incremental (single closure) fetching */ +	/* Actual moving/copying of node is done on arrival; see FETCHREPLY */ +	/* Send a reply to the originator */ +	CurrentTime[to] += RtsFlags.GranFlags.Costs.mpacktime; + +	new_event(from, to, +		  CurrentTime[to]+RtsFlags.GranFlags.Costs.latency, +		  FetchReply, +		  tso, node, (rtsSpark*)NULL); +       +	CurrentTime[to] += RtsFlags.GranFlags.Costs.mtidytime; +	return (Ok); +      } +    } +  else       /* Qu'vatlh! node has been grabbed by another proc => forward */ +    {     +      PEs node_loc = where_is(node); +      rtsTime fetchtime; + +      IF_GRAN_DEBUG(thunkStealing, +		    belch("== Qu'vatlh! node %p has been grabbed by PE %d from PE %d (demander=%d) @ %d\n", +			  node,node_loc,to,from,CurrentTime[to])); +      if (RtsFlags.GranFlags.GranSimStats.Global) { +	globalGranStats.fetch_misses++; +      } + +      /* Prepare FORWARD message to proc p_new */ +      CurrentTime[to] += RtsFlags.GranFlags.Costs.mpacktime; +       +      fetchtime = stg_max(CurrentTime[to], CurrentTime[node_loc]) + +                  RtsFlags.GranFlags.Costs.latency; +           +      new_event(node_loc, from, fetchtime, +		FetchNode, +		tso, node, (rtsSpark*)NULL); + +      CurrentTime[to] += RtsFlags.GranFlags.Costs.mtidytime; + +      return (NodeHasMoved); +    } +} + +/* +   blockFetch blocks a BlockedFetch node on some kind of black hole. + +   Taken from gum/HLComms.lc.   [find a  better  place for that ?] --  HWL   + +   {\bf Note:} In GranSim we don't have @FETCHME@ nodes and therefore don't +   create @FMBQ@'s (FetchMe blocking queues) to cope with global +   blocking. Instead, non-local TSO are put into the BQ in the same way as +   local TSOs. However, we have to check if a TSO is local or global in +   order to account for the latencies involved and for keeping track of the +   number of fetches that are really going on.   +*/ + +//@cindex blockFetch + +rtsFetchReturnCode +blockFetch(tso, proc, bh) +StgTSO* tso;                        /* TSO which gets blocked */ +PEs proc;                           /* PE where that tso was running */ +StgClosure* bh;                     /* closure to block on (BH, RBH, BQ) */ +{ +  StgInfoTable *info; + +  IF_GRAN_DEBUG(bq, +		fprintf(stderr,"## blockFetch: blocking TSO %p (%d)[PE %d] on node %p (%s) [PE %d]. No graph is packed!\n",  +		tso, tso->id, proc, bh, info_type(bh), where_is(bh))); + +    if (!IS_BLACK_HOLE(bh)) {                      /* catches BHs and RBHs */ +      IF_GRAN_DEBUG(bq, +		    fprintf(stderr,"## blockFetch: node %p (%s) is not a BH => awakening TSO %p (%d) [PE %u]\n",  +			    bh, info_type(bh), tso, tso->id, proc)); + +      /* No BH anymore => immediately unblock tso */ +      new_event(proc, proc, CurrentTime[proc], +	        UnblockThread, +                tso, bh, (rtsSpark*)NULL); + +      /* Is this always a REPLY to a FETCH in the profile ? */ +      if (RtsFlags.GranFlags.GranSimStats.Full) +	DumpRawGranEvent(proc, proc, GR_REPLY, tso, bh, (StgInt)0, 0); +      return (NodeIsNoBH); +    } + +    /* DaH {BQ}Daq Qu' Suq 'e' wISov! +       Now we know that we have to put the tso into the BQ. +       2 cases: If block-on-fetch, tso is at head of threadq =>  +                => take it out of threadq and into BQ +                If reschedule-on-fetch, tso is only pointed to be event +                => just put it into BQ + +    ngoq ngo'!! +    if (!RtsFlags.GranFlags.DoAsyncFetch) { +      GranSimBlock(tso, proc, bh); +    } else { +      if (RtsFlags.GranFlags.GranSimStats.Full) +	DumpRawGranEvent(proc, where_is(bh), GR_BLOCK, tso, bh, (StgInt)0, 0); +      ++(tso->gran.blockcount); +      tso->gran.blockedat = CurrentTime[proc]; +    } +    */ + +    /* after scheduling the GlobalBlock event the TSO is not put into the +       run queue again; it is only pointed to via the event we are +       processing now; in GranSim 4.xx there is no difference between +       synchr and asynchr comm here */ +    ASSERT(!is_on_queue(tso, proc)); +    ASSERT(tso->link == END_TSO_QUEUE); + +    GranSimBlock(tso, proc, bh);  /* GranSim statistics gathering */ + +    /* Now, put tso into BQ (similar to blocking entry codes) */ +    info = get_itbl(bh); +    switch (info -> type) { +      case RBH: +      case BLACKHOLE: +      case CAF_BLACKHOLE: // ToDo: check whether this is a possibly ITBL here +      case SE_BLACKHOLE:   // ToDo: check whether this is a possibly ITBL here +      case SE_CAF_BLACKHOLE:// ToDo: check whether this is a possibly ITBL here +	/* basically an inlined version of BLACKHOLE_entry -- HWL */ +	/* Change the BLACKHOLE into a BLACKHOLE_BQ */ +	((StgBlockingQueue *)bh)->header.info = &BLACKHOLE_BQ_info; +	/* Put ourselves on the blocking queue for this black hole */ +	// tso->link=END_TSO_QUEUE;   not necessary; see assertion above +	((StgBlockingQueue *)bh)->blocking_queue = (StgBlockingQueueElement *)tso; +	tso->blocked_on = bh; +	recordMutable((StgMutClosure *)bh); +	break; + +    case BLACKHOLE_BQ: +	/* basically an inlined version of BLACKHOLE_BQ_entry -- HWL */ +	tso->link = (StgTSO *) (((StgBlockingQueue*)bh)->blocking_queue);  +	((StgBlockingQueue*)bh)->blocking_queue = (StgBlockingQueueElement *)tso; +	recordMutable((StgMutClosure *)bh); + +# if 0 && defined(GC_MUT_REQUIRED) +	ToDo: check whether recordMutable is necessary -- HWL +	/* +	 * If we modify a black hole in the old generation, we have to make  +	 * sure it goes on the mutables list +	 */ + +	if (bh <= StorageMgrInfo.OldLim) { +	    MUT_LINK(bh) = (W_) StorageMgrInfo.OldMutables; +	    StorageMgrInfo.OldMutables = bh; +	} else +	    MUT_LINK(bh) = MUT_NOT_LINKED; +# endif +	break; + +    case FETCH_ME_BQ: +	barf("Qagh: FMBQ closure (%p) found in GrAnSim (TSO=%p (%d))\n", +	     bh, tso, tso->id); + +    default: +	{ +	  G_PRINT_NODE(bh); +	  barf("Qagh: thought %p was a black hole (IP %p (%s))", +		  bh, info, info_type(get_itbl(bh))); +	} +      } +    return (Ok); +} + + +//@node Idle PEs, Routines directly called from Haskell world, Code for Fetching Nodes, GranSim specific code +//@subsection Idle PEs + +/* +   Export work to idle PEs. This function is called from @ReSchedule@ +   before dispatching on the current event. @HandleIdlePEs@ iterates over +   all PEs, trying to get work for idle PEs. Note, that this is a +   simplification compared to GUM's fishing model. We try to compensate for +   that by making the cost for stealing work dependent on the number of +   idle processors and thereby on the probability with which a randomly +   sent fish would find work.   +*/ + +//@cindex handleIdlePEs + +void +handleIdlePEs(void) +{ +  PEs p; + +  IF_DEBUG(gran, fprintf(stderr, "GRAN: handling Idle PEs\n")) + +  /* Should never be entered in GrAnSim Light setup */ +  ASSERT(!RtsFlags.GranFlags.Light); + +  /* Could check whether there are idle PEs if it's a cheap check */ +  for (p = 0; p < RtsFlags.GranFlags.proc; p++)  +    if (procStatus[p]==Idle)  /*  && IS_SPARKING(p) && IS_STARTING(p) */ +      /* First look for local work i.e. examine local spark pool! */ +      if (pending_sparks_hds[p]!=(rtsSpark *)NULL) { +	new_event(p, p, CurrentTime[p], +		  FindWork, +		  (StgTSO*)NULL, (StgClosure*)NULL, (rtsSpark*)NULL); +	procStatus[p] = Sparking; +      } else if ((RtsFlags.GranFlags.maxFishes==0 || +		  OutstandingFishes[p]<RtsFlags.GranFlags.maxFishes) ) { + +	/* If no local work then try to get remote work!  +	   Qu' Hopbe' pagh tu'lu'pu'chugh Qu' Hop yISuq ! */ +	if (RtsFlags.GranFlags.DoStealThreadsFirst &&  +	    (RtsFlags.GranFlags.FetchStrategy >= 4 || OutstandingFetches[p] == 0)) +	  { +	    if (SurplusThreads > 0l)                    /* Steal a thread */ +	      stealThread(p); +           +	    if (procStatus[p]!=Idle) +	      break; +	  } +	 +	if (SparksAvail > 0 &&  +	    (RtsFlags.GranFlags.FetchStrategy >= 3 || OutstandingFetches[p] == 0)) /* Steal a spark */ +	  stealSpark(p); +	 +	if (SurplusThreads > 0 &&  +	    (RtsFlags.GranFlags.FetchStrategy >= 4 || OutstandingFetches[p] == 0)) /* Steal a thread */ +	  stealThread(p); +      } +} + +/* +   Steal a spark and schedule moving it to proc. We want to look at PEs in +   clock order -- most retarded first.  Currently sparks are only stolen +   from the @ADVISORY_POOL@ never from the @REQUIRED_POOL@. Eventually, +   this should be changed to first steal from the former then from the +   latter. + +   We model a sort of fishing mechanism by counting the number of sparks +   and threads we are currently stealing.  */ + +/*  +   Return a random nat value in the intervall [from, to)  +*/ +static nat  +natRandom(from, to) +nat from, to; +{ +  nat r, d; + +  ASSERT(from<=to); +  d = to - from; +  /* random returns a value in [0, RAND_MAX] */ +  r = (nat) ((float)from + ((float)random()*(float)d)/(float)RAND_MAX); +  r = (r==to) ? from : r; +  ASSERT(from<=r && (r<to || from==to)); +  return r;   +} + +/*  +   Find any PE other than proc. Used for GUM style fishing only. +*/ +static PEs  +findRandomPE (proc) +PEs proc; +{ +  nat p; + +  ASSERT(RtsFlags.GranFlags.Fishing); +  if (RtsFlags.GranFlags.RandomSteal) { +    p = natRandom(0,RtsFlags.GranFlags.proc);  /* full range of PEs */ +  } else { +    p = 0; +  } +  IF_GRAN_DEBUG(randomSteal, +		belch("^^ RANDOM_STEAL (fishing): stealing from PE %d (current proc is %d)", +		      p, proc);) +     +  return (PEs)p; +} + +/* +  Magic code for stealing sparks/threads makes use of global knowledge on +  spark queues.   +*/ +static void +sortPEsByTime (proc, pes_by_time, firstp, np)  +PEs proc; +PEs *pes_by_time; +nat *firstp, *np; +{ +  PEs p, temp, n, i, j; +  nat first, upb, r=0, q=0; + +  ASSERT(!RtsFlags.GranFlags.Fishing); + +#if 0   +  upb = RtsFlags.GranFlags.proc;            /* full range of PEs */ + +  if (RtsFlags.GranFlags.RandomSteal) { +    r = natRandom(0,RtsFlags.GranFlags.proc);  /* full range of PEs */ +  } else { +    r = 0; +  } +#endif + +  /* pes_by_time shall contain processors from which we may steal sparks */  +  for(n=0, p=0; p < RtsFlags.GranFlags.proc; ++p) +    if ((proc != p) &&                       // not the current proc +        (pending_sparks_hds[p] != (rtsSpark *)NULL) && // non-empty spark pool +        (CurrentTime[p] <= CurrentTime[CurrentProc])) +      pes_by_time[n++] = p; + +  /* sort pes_by_time */ +  for(i=0; i < n; ++i) +    for(j=i+1; j < n; ++j) +      if (CurrentTime[pes_by_time[i]] > CurrentTime[pes_by_time[j]]) { +	rtsTime temp = pes_by_time[i]; +	pes_by_time[i] = pes_by_time[j]; +	pes_by_time[j] = temp; +      } + +  /* Choose random processor to steal spark from; first look at processors */ +  /* that are earlier than the current one (i.e. proc) */ +  for(first=0;  +      (first < n) && (CurrentTime[pes_by_time[first]] <= CurrentTime[proc]); +      ++first) +    /* nothing */ ; + +  /* if the assertion below is true we can get rid of first */ +  /* ASSERT(first==n); */ +  /* ToDo: check if first is really needed; find cleaner solution */ + +  *firstp = first; +  *np = n; +} + +/*  +   Steal a spark (piece of work) from any processor and bring it to proc. +*/ +//@cindex stealSpark +static inline rtsBool  +stealSpark(PEs proc) { stealSomething(proc, rtsTrue, rtsFalse); } + +/*  +   Steal a thread from any processor and bring it to proc i.e. thread migration +*/ +//@cindex stealThread +static inline rtsBool  +stealThread(PEs proc) { stealSomething(proc, rtsFalse, rtsTrue); } + +/*  +   Steal a spark or a thread and schedule moving it to proc. +*/ +//@cindex stealSomething +static rtsBool +stealSomething(proc, steal_spark, steal_thread) +PEs proc;                           // PE that needs work (stealer) +rtsBool steal_spark, steal_thread;  // should a spark and/or thread be stolen +{ +  PEs p; +  rtsTime fish_arrival_time; +  rtsSpark *spark, *prev, *next; +  rtsBool stolen = rtsFalse; + +  ASSERT(steal_spark || steal_thread); + +  /* Should never be entered in GrAnSim Light setup */ +  ASSERT(!RtsFlags.GranFlags.Light); +  ASSERT(!steal_thread || RtsFlags.GranFlags.DoThreadMigration); + +  if (!RtsFlags.GranFlags.Fishing) { +    // ToDo: check if stealing threads is prefered over stealing sparks +    if (steal_spark) { +      if (stealSparkMagic(proc)) +	return rtsTrue; +      else                             // no spark found +	if (steal_thread) +	  return stealThreadMagic(proc); +        else                           // no thread found +	  return rtsFalse;              +    } else {                           // ASSERT(steal_thread); +      return stealThreadMagic(proc); +    } +    barf("stealSomething: never reached"); +  } + +  /* The rest of this function does GUM style fishing */ +   +  p = findRandomPE(proc); /* find a random PE other than proc */ +   +  /* Message packing costs for sending a Fish; qeq jabbI'ID */ +  CurrentTime[proc] += RtsFlags.GranFlags.Costs.mpacktime; +   +  /* use another GranEvent for requesting a thread? */ +  if (steal_spark && RtsFlags.GranFlags.GranSimStats.Sparks) +    DumpRawGranEvent(p, proc, SP_REQUESTED, +		     (StgTSO*)NULL, (StgClosure *)NULL, (StgInt)0, 0); + +  /* time of the fish arrival on the remote PE */ +  fish_arrival_time = CurrentTime[proc] + RtsFlags.GranFlags.Costs.latency; +   +  /* Phps use an own Fish event for that? */ +  /* The contents of the spark component is a HACK: +      1 means give me a spark; +      2 means give me a thread +      0 means give me nothing (this should never happen) +  */ +  new_event(p, proc, fish_arrival_time, +	    FindWork, +	    (StgTSO*)NULL, (StgClosure*)NULL,  +	    (steal_spark ? (rtsSpark*)1 : steal_thread ? (rtsSpark*)2 : (rtsSpark*)0)); +   +  ++OutstandingFishes[proc]; +  /* only with Async fetching? */ +  if (procStatus[proc]==Idle)   +    procStatus[proc]=Fishing; +   +  /* time needed to clean up buffers etc after sending a message */ +  CurrentTime[proc] += RtsFlags.GranFlags.Costs.mtidytime; + +  /* If GUM style fishing stealing always succeeds because it only consists +     of sending out a fish; of course, when the fish may return +     empty-handed! */ +  return rtsTrue; +} + +/*  +   This version of stealing a spark makes use of the global info on all +   spark pools etc which is not available in a real parallel system. +   This could be extended to test e.g. the impact of perfect load information. +*/ +//@cindex stealSparkMagic +static rtsBool +stealSparkMagic(proc) +PEs proc; +{ +  PEs p, i, j, n, first, upb; +  rtsSpark *spark, *next; +  PEs pes_by_time[MAX_PROC]; +  rtsBool stolen = rtsFalse; +  rtsTime stealtime; + +  /* Should never be entered in GrAnSim Light setup */ +  ASSERT(!RtsFlags.GranFlags.Light); + +  sortPEsByTime(proc, pes_by_time, &first, &n); + +  while (!stolen && n>0) { +    upb = (first==0) ? n : first; +    i = natRandom(0,upb);                /* choose a random eligible PE */ +    p = pes_by_time[i]; + +    IF_GRAN_DEBUG(randomSteal, +		  belch("^^ stealSparkMagic (random_steal, not fishing): stealing spark from PE %d (current proc is %d)", +			p, proc)); +       +    ASSERT(pending_sparks_hds[p]!=(rtsSpark *)NULL); /* non-empty spark pool */ + +    /* Now go through rtsSparkQ and steal the first eligible spark */ +     +    spark = pending_sparks_hds[p];  +    while (!stolen && spark != (rtsSpark*)NULL) +      { +	/* NB: no prev pointer is needed here because all sparks that are not  +	   chosen are pruned +	*/ +	if ((procStatus[p]==Idle || procStatus[p]==Sparking || procStatus[p] == Fishing) && +	    spark->next==(rtsSpark*)NULL)  +	  { +	    /* Be social! Don't steal the only spark of an idle processor  +	       not {spark} neH yInIH !! */ +	    break; /* next PE */ +	  }  +	else if (closure_SHOULD_SPARK(spark->node)) +	  { +	    /* Don't Steal local sparks;  +	       ToDo: optionally prefer local over global sparks +	    if (!spark->global) { +	      prev=spark; +	      continue;                  next spark +	    } +	    */ +	    /* found a spark! */ + +	    /* Prepare message for sending spark */ +	    CurrentTime[p] += RtsFlags.GranFlags.Costs.mpacktime; + +	    if (RtsFlags.GranFlags.GranSimStats.Sparks) +	      DumpRawGranEvent(p, (PEs)0, SP_EXPORTED, +			       (StgTSO*)NULL, spark->node, +			       spark->name, spark_queue_len(p)); + +	    stealtime = (CurrentTime[p] > CurrentTime[proc] ?  +			   CurrentTime[p] :  +			   CurrentTime[proc]) +	                + sparkStealTime(); + +	    new_event(proc, p /* CurrentProc */, stealtime, +		      MoveSpark, +		      (StgTSO*)NULL, spark->node, spark); +	     +	    stolen = rtsTrue; +	    ++OutstandingFishes[proc]; /* no. of sparks currently on the fly */ +	    if (procStatus[proc]==Idle) +	      procStatus[proc] = Fishing; +	    ++(spark->global);         /* record that this is a global spark */ +	    ASSERT(SparksAvail>0); +	    --SparksAvail;            /* on-the-fly sparks are not available */ +	    next = delete_from_sparkq(spark, p, rtsFalse); // don't dispose! +	    CurrentTime[p] += RtsFlags.GranFlags.Costs.mtidytime; +	  } +	else   /* !(closure_SHOULD_SPARK(SPARK_NODE(spark))) */ +	  { +	   IF_GRAN_DEBUG(checkSparkQ, +			 belch("^^ pruning spark %p (node %p) in stealSparkMagic", +			       spark, spark->node)); + +	    /* if the spark points to a node that should not be sparked, +	       prune the spark queue at this point */ +	    if (RtsFlags.GranFlags.GranSimStats.Sparks) +	      DumpRawGranEvent(p, (PEs)0, SP_PRUNED, +			       (StgTSO*)NULL, spark->node, +			       spark->name, spark_queue_len(p)); +	    if (RtsFlags.GranFlags.GranSimStats.Global) +	      globalGranStats.pruned_sparks++; +	     +	    ASSERT(SparksAvail>0); +	    --SparksAvail; +	    spark = delete_from_sparkq(spark, p, rtsTrue); +	  } +	/* unlink spark (may have been freed!) from sparkq; +	if (prev == NULL) // spark was head of spark queue +	  pending_sparks_hds[p] = spark->next; +        else   +	  prev->next = spark->next; +	if (spark->next == NULL) +	  pending_sparks_tls[p] = prev; +        else   +	  next->prev = prev; +	*/ +      }                    /* while ...    iterating over sparkq */ + +    /* ToDo: assert that PE p still has work left after stealing the spark */ + +    if (!stolen && (n>0)) {  /* nothing stealable from proc p :( */ +      ASSERT(pes_by_time[i]==p); + +      /* remove p from the list (at pos i) */ +      for (j=i; j+1<n; j++) +	pes_by_time[j] = pes_by_time[j+1]; +      n--; +       +      /* update index to first proc which is later (or equal) than proc */ +      for ( ; +	    (first>0) && +	      (CurrentTime[pes_by_time[first-1]]>CurrentTime[proc]); +	    first--) +	/* nothing */ ; +    }  +  }  /* while ... iterating over PEs in pes_by_time */ + +  IF_GRAN_DEBUG(randomSteal, +		if (stolen) +		  belch("^^ stealSparkMagic: spark %p (node=%p) stolen by PE %d from PE %d (SparksAvail=%d; idlers=%d)", +		       spark, spark->node, proc, p,  +		       SparksAvail, idlers()); +		else   +		  belch("^^ stealSparkMagic: nothing stolen by PE %d (sparkq len after pruning=%d)(SparksAvail=%d; idlers=%d)", +		        proc, SparksAvail, idlers())); + +  if (RtsFlags.GranFlags.GranSimStats.Global && +      stolen && (i!=0)) {                          /* only for statistics */ +    globalGranStats.rs_sp_count++; +    globalGranStats.ntimes_total += n; +    globalGranStats.fl_total += first; +    globalGranStats.no_of_steals++; +  } + +  return stolen; +} + +/*  +   The old stealThread code, which makes use of global info and does not +   send out fishes.   +   NB: most of this is the same as in stealSparkMagic; +       only the pieces specific to processing thread queues are different;  +       long live polymorphism!   +*/ + +//@cindex stealThreadMagic +static rtsBool +stealThreadMagic(proc) +PEs proc; +{ +  PEs p, i, j, n, first, upb; +  StgTSO *tso; +  PEs pes_by_time[MAX_PROC]; +  rtsBool stolen = rtsFalse; +  rtsTime stealtime; + +  /* Should never be entered in GrAnSim Light setup */ +  ASSERT(!RtsFlags.GranFlags.Light); + +  sortPEsByTime(proc, pes_by_time, &first, &n); + +  while (!stolen && n>0) { +    upb = (first==0) ? n : first; +    i = natRandom(0,upb);                /* choose a random eligible PE */ +    p = pes_by_time[i]; + +    IF_GRAN_DEBUG(randomSteal, +		  belch("^^ stealThreadMagic (random_steal, not fishing): stealing thread from PE %d (current proc is %d)", +			p, proc)); +       +    /* Steal the first exportable thread in the runnable queue but +       never steal the first in the queue for social reasons; +       not Qu' wa'DIch yInIH !! +    */ +    /* Would be better to search through queue and have options which of +       the threads to pick when stealing */ +    if (run_queue_hds[p] == END_TSO_QUEUE) { +      IF_GRAN_DEBUG(randomSteal, +		    belch("^^ stealThreadMagic: No thread to steal from PE %d (stealer=PE %d)",  +			  p, proc)); +    } else { +      tso = run_queue_hds[p]->link;  /* tso is *2nd* thread in thread queue */ +      /* Found one */ +      stolen = rtsTrue; + +      /* update links in queue */ +      run_queue_hds[p]->link = tso->link; +      if (run_queue_tls[p] == tso) +	run_queue_tls[p] = run_queue_hds[p]; +       +      /* ToDo: Turn magic constants into params */ +       +      CurrentTime[p] += 5l * RtsFlags.GranFlags.Costs.mpacktime; +       +      stealtime = (CurrentTime[p] > CurrentTime[proc] ?  +		   CurrentTime[p] :  +		   CurrentTime[proc]) +	+ sparkStealTime()  +	+ 4l * RtsFlags.GranFlags.Costs.additional_latency +	+ 5l * RtsFlags.GranFlags.Costs.munpacktime; + +      /* Move the thread; set bitmask to 0 while TSO is `on-the-fly' */ +      SET_GRAN_HDR(tso,Nowhere /* PE_NUMBER(proc) */);  + +      /* Move from one queue to another */ +      new_event(proc, p, stealtime, +		MoveThread, +		tso, (StgClosure*)NULL, (rtsSpark*)NULL); + +      /* MAKE_BUSY(proc);  not yet; only when thread is in threadq */ +      ++OutstandingFishes[proc]; +      if (procStatus[proc]) +	procStatus[proc] = Fishing; +      --SurplusThreads; + +      if(RtsFlags.GranFlags.GranSimStats.Full) +	DumpRawGranEvent(p, proc,  +			 GR_STEALING,  +			 tso, (StgClosure*)NULL, (StgInt)0, 0); +       +      /* costs for tidying up buffer after having sent it */ +      CurrentTime[p] += 5l * RtsFlags.GranFlags.Costs.mtidytime; +    } + +    /* ToDo: assert that PE p still has work left after stealing the spark */ + +    if (!stolen && (n>0)) {  /* nothing stealable from proc p :( */ +      ASSERT(pes_by_time[i]==p); + +      /* remove p from the list (at pos i) */ +      for (j=i; j+1<n; j++) +	pes_by_time[j] = pes_by_time[j+1]; +      n--; +       +      /* update index to first proc which is later (or equal) than proc */ +      for ( ; +	    (first>0) && +	      (CurrentTime[pes_by_time[first-1]]>CurrentTime[proc]); +	    first--) +	/* nothing */ ; +    }  +  }  /* while ... iterating over PEs in pes_by_time */ + +  IF_GRAN_DEBUG(randomSteal, +		if (stolen) +  		  belch("^^ stealThreadMagic: stolen TSO %d (%p) by PE %d from PE %d (SparksAvail=%d; idlers=%d)", +		        tso->id, tso, proc, p, +		        SparksAvail, idlers()); +		else +		  belch("stealThreadMagic: nothing stolen by PE %d (SparksAvail=%d; idlers=%d)", +			proc, SparksAvail, idlers())); + +  if (RtsFlags.GranFlags.GranSimStats.Global && +      stolen && (i!=0)) { /* only for statistics */ +    /* ToDo: more statistics on avg thread queue lenght etc */ +    globalGranStats.rs_t_count++; +    globalGranStats.no_of_migrates++; +  } + +  return stolen; +} + +//@cindex sparkStealTime +static rtsTime +sparkStealTime(void) +{ +  double fishdelay, sparkdelay, latencydelay; +  fishdelay =  (double)RtsFlags.GranFlags.proc/2; +  sparkdelay = fishdelay -  +          ((fishdelay-1)/(double)(RtsFlags.GranFlags.proc-1))*(double)idlers(); +  latencydelay = sparkdelay*((double)RtsFlags.GranFlags.Costs.latency); + +  return((rtsTime)latencydelay); +} + +//@node Routines directly called from Haskell world, Emiting profiling info for GrAnSim, Idle PEs, GranSim specific code +//@subsection Routines directly called from Haskell world +/*  +The @GranSim...@ routines in here are directly called via macros from the +threaded world.  + +First some auxiliary routines. +*/ + +/* Take the current thread off the thread queue and thereby activate the  +   next thread. It's assumed that the next ReSchedule after this uses  +   NEW_THREAD as param.  +   This fct is called from GranSimBlock and GranSimFetch  +*/ + +//@cindex ActivateNextThread + +void  +ActivateNextThread (proc) +PEs proc; +{ +  StgTSO *t; +  /* +    This routine is entered either via GranSimFetch or via GranSimBlock. +    It has to prepare the CurrentTSO for being blocked and update the +    run queue and other statistics on PE proc. The actual enqueuing to the  +    blocking queue (if coming from GranSimBlock) is done in the entry code  +    of the BLACKHOLE and BLACKHOLE_BQ closures (see StgMiscClosures.hc). +  */ +  /* ToDo: add assertions here!! */ +  //ASSERT(run_queue_hds[proc]!=END_TSO_QUEUE); + +  // Only necessary if the running thread is at front of the queue +  // run_queue_hds[proc] = run_queue_hds[proc]->link; +  ASSERT(CurrentProc==proc); +  ASSERT(!is_on_queue(CurrentTSO,proc)); +  if (run_queue_hds[proc]==END_TSO_QUEUE) { +    /* NB: this routine is only entered with asynchr comm (see assertion) */ +    procStatus[proc] = Idle; +  } else { +    /* ToDo: check cost assignment */ +    CurrentTime[proc] += RtsFlags.GranFlags.Costs.threadcontextswitchtime; +    if (RtsFlags.GranFlags.GranSimStats.Full &&  +	(!RtsFlags.GranFlags.Light || RtsFlags.GranFlags.Debug.checkLight))  +                                      /* right flag !?? ^^^ */  +      DumpRawGranEvent(proc, 0, GR_SCHEDULE, run_queue_hds[proc], +                       (StgClosure*)NULL, (StgInt)0, 0); +  } +} + +/*  +   The following GranSim fcts are stg-called from the threaded world.     +*/ + +/* Called from HP_CHK and friends (see StgMacros.h)  */ +//@cindex GranSimAllocate +void  +GranSimAllocate(n) +StgInt n; +{ +  CurrentTSO->gran.allocs += n; +  ++(CurrentTSO->gran.basicblocks); + +  if (RtsFlags.GranFlags.GranSimStats.Heap) { +      DumpRawGranEvent(CurrentProc, 0, GR_ALLOC, CurrentTSO, +                       (StgClosure*)NULL, (StgInt)0, n); +  } +   +  CurrentTSO->gran.exectime += RtsFlags.GranFlags.Costs.heapalloc_cost; +  CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.heapalloc_cost; +} + +/* +  Subtract the values added above, if a heap check fails and +  so has to be redone. +*/ +//@cindex GranSimUnallocate +void  +GranSimUnallocate(n) +StgInt n; +{ +  CurrentTSO->gran.allocs -= n; +  --(CurrentTSO->gran.basicblocks); +   +  CurrentTSO->gran.exectime -= RtsFlags.GranFlags.Costs.heapalloc_cost; +  CurrentTime[CurrentProc] -= RtsFlags.GranFlags.Costs.heapalloc_cost; +} + +/* NB: We now inline this code via GRAN_EXEC rather than calling this fct */ +//@cindex GranSimExec +void  +GranSimExec(ariths,branches,loads,stores,floats) +StgWord ariths,branches,loads,stores,floats; +{ +  StgWord cost = RtsFlags.GranFlags.Costs.arith_cost*ariths +  +            RtsFlags.GranFlags.Costs.branch_cost*branches +  +            RtsFlags.GranFlags.Costs.load_cost * loads + +            RtsFlags.GranFlags.Costs.store_cost*stores +  +            RtsFlags.GranFlags.Costs.float_cost*floats; + +  CurrentTSO->gran.exectime += cost; +  CurrentTime[CurrentProc] += cost; +} + +/*  +   Fetch the node if it isn't local +   -- result indicates whether fetch has been done. + +   This is GRIP-style single item fetching. +*/ + +//@cindex GranSimFetch +StgInt  +GranSimFetch(node /* , liveness_mask */ ) +StgClosure *node; +/* StgInt liveness_mask; */ +{ +  /* reset the return value (to be checked within STG land) */ +  NeedToReSchedule = rtsFalse;    + +  if (RtsFlags.GranFlags.Light) { +     /* Always reschedule in GrAnSim-Light to prevent one TSO from +        running off too far  +     new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc], +	      ContinueThread,CurrentTSO,node,NULL); +     */ +     return(0);  +  } + +  /* Faking an RBH closure: +     If the bitmask of the closure is 0 then this node is a fake RBH; +  */ +  if (node->header.gran.procs == Nowhere) { +    IF_GRAN_DEBUG(bq, +		  belch("## Found fake RBH (node %p); delaying TSO %d (%p)",  +			node, CurrentTSO->id, CurrentTSO)); +		   +    new_event(CurrentProc, CurrentProc, CurrentTime[CurrentProc]+10000, +	      ContinueThread, CurrentTSO, node, (rtsSpark*)NULL); + +    /* Rescheduling (GranSim internal) is necessary */ +    NeedToReSchedule = rtsTrue; +     +    return(1);  +  } + +  /* Note: once a node has been fetched, this test will be passed */ +  if (!IS_LOCAL_TO(PROCS(node),CurrentProc)) +    { +      PEs p = where_is(node); +      rtsTime fetchtime; +       +      IF_GRAN_DEBUG(thunkStealing, +		    if (p==CurrentProc)  +		      belch("GranSimFetch: Trying to fetch from own processor%u\n", p);); +       +      CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.mpacktime; +      /* NB: Fetch is counted on arrival (FetchReply) */ +       +      fetchtime = stg_max(CurrentTime[CurrentProc],CurrentTime[p]) + +	RtsFlags.GranFlags.Costs.latency; +       +      new_event(p, CurrentProc, fetchtime, +		FetchNode, CurrentTSO, node, (rtsSpark*)NULL); +       +      if (fetchtime<TimeOfNextEvent) +	TimeOfNextEvent = fetchtime; +       +      /* About to block */ +      CurrentTSO->gran.blockedat = CurrentTime[CurrentProc]; +       +      ++OutstandingFetches[CurrentProc]; +       +      if (RtsFlags.GranFlags.DoAsyncFetch)  +	/* if asynchr comm is turned on, activate the next thread in the q */ +	ActivateNextThread(CurrentProc); +      else +	procStatus[CurrentProc] = Fetching; + +#if 0  +      /* ToDo: nuke the entire if (anything special for fair schedule?) */ +      if (RtsFlags.GranFlags.DoAsyncFetch)  +	{ +	  /* Remove CurrentTSO from the queue -- assumes head of queue == CurrentTSO */ +	  if(!RtsFlags.GranFlags.DoFairSchedule) +	    { +	      /* now done in do_the_fetchnode  +	      if (RtsFlags.GranFlags.GranSimStats.Full) +		DumpRawGranEvent(CurrentProc, p, GR_FETCH, CurrentTSO, +				 node, (StgInt)0, 0); +	      */				 +	      ActivateNextThread(CurrentProc); +               +# if 0 && defined(GRAN_CHECK) +	      if (RtsFlags.GranFlags.Debug.blockOnFetch_sanity) { +		if (TSO_TYPE(CurrentTSO) & FETCH_MASK_TSO) { +		  fprintf(stderr,"FetchNode: TSO 0x%x has fetch-mask set @ %d\n", +			  CurrentTSO,CurrentTime[CurrentProc]); +		  stg_exit(EXIT_FAILURE); +		} else { +		  TSO_TYPE(CurrentTSO) |= FETCH_MASK_TSO; +		} +	      } +# endif +	      CurrentTSO->link = END_TSO_QUEUE; +	      /* CurrentTSO = END_TSO_QUEUE; */ +	       +	      /* CurrentTSO is pointed to by the FetchNode event; it is +		 on no run queue any more */ +	  } else {  /* fair scheduling currently not supported -- HWL */ +	    barf("Asynchr communication is not yet compatible with fair scheduling\n"); +	  } +	} else {                /* !RtsFlags.GranFlags.DoAsyncFetch */ +	  procStatus[CurrentProc] = Fetching; // ToDo: BlockedOnFetch; +	  /* now done in do_the_fetchnode  +	  if (RtsFlags.GranFlags.GranSimStats.Full) +	    DumpRawGranEvent(CurrentProc, p, +			     GR_FETCH, CurrentTSO, node, (StgInt)0, 0); +	  */ +	  IF_GRAN_DEBUG(blockOnFetch,  +			BlockedOnFetch[CurrentProc] = CurrentTSO;); /*- rtsTrue; -*/ +	} +#endif /* 0 */ + +      CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.mtidytime; +       +      /* Rescheduling (GranSim internal) is necessary */ +      NeedToReSchedule = rtsTrue; +       +      return(1);  +    } +  return(0); +} + +//@cindex GranSimSpark +void  +GranSimSpark(local,node) +StgInt local; +StgClosure *node; +{ +  /* ++SparksAvail;  Nope; do that in add_to_spark_queue */ +  if (RtsFlags.GranFlags.GranSimStats.Sparks) +    DumpRawGranEvent(CurrentProc, (PEs)0, SP_SPARK, +		     END_TSO_QUEUE, node, (StgInt)0, spark_queue_len(CurrentProc)-1); + +  /* Force the PE to take notice of the spark */ +  if(RtsFlags.GranFlags.DoAlwaysCreateThreads) { +    new_event(CurrentProc,CurrentProc,CurrentTime[CurrentProc], +	      FindWork, +	      END_TSO_QUEUE, (StgClosure*)NULL, (rtsSpark*)NULL); +    if (CurrentTime[CurrentProc]<TimeOfNextEvent) +      TimeOfNextEvent = CurrentTime[CurrentProc]; +  } + +  if(local) +    ++CurrentTSO->gran.localsparks; +  else +    ++CurrentTSO->gran.globalsparks; +} + +//@cindex GranSimSparkAt +void  +GranSimSparkAt(spark,where,identifier) +rtsSpark *spark; +StgClosure *where;    /* This should be a node; alternatively could be a GA */ +StgInt identifier; +{ +  PEs p = where_is(where); +  GranSimSparkAtAbs(spark,p,identifier); +} + +//@cindex GranSimSparkAtAbs +void  +GranSimSparkAtAbs(spark,proc,identifier) +rtsSpark *spark; +PEs proc;         +StgInt identifier; +{ +  rtsTime exporttime; + +  if (spark == (rtsSpark *)NULL) /* Note: Granularity control might have */ +    return;                          /* turned a spark into a NULL. */ + +  /* ++SparksAvail; Nope; do that in add_to_spark_queue */ +  if(RtsFlags.GranFlags.GranSimStats.Sparks) +    DumpRawGranEvent(proc,0,SP_SPARKAT, +		     END_TSO_QUEUE, spark->node, (StgInt)0, spark_queue_len(proc)); + +  if (proc!=CurrentProc) { +    CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.mpacktime; +    exporttime = (CurrentTime[proc] > CurrentTime[CurrentProc]?  +                  CurrentTime[proc]: CurrentTime[CurrentProc]) +                 + RtsFlags.GranFlags.Costs.latency; +  } else { +    exporttime = CurrentTime[CurrentProc]; +  } + +  if ( RtsFlags.GranFlags.Light ) +    /* Need CurrentTSO in event field to associate costs with creating +       spark even in a GrAnSim Light setup */ +    new_event(proc, CurrentProc, exporttime, +	      MoveSpark, +	      CurrentTSO, spark->node, spark); +  else +    new_event(proc, CurrentProc, exporttime, +	      MoveSpark, (StgTSO*)NULL, spark->node, spark); +  /* Bit of a hack to treat placed sparks the same as stolen sparks */ +  ++OutstandingFishes[proc]; + +  /* Force the PE to take notice of the spark (FINDWORK is put after a +     MoveSpark into the sparkq!) */ +  if (RtsFlags.GranFlags.DoAlwaysCreateThreads) { +    new_event(CurrentProc,CurrentProc,exporttime+1, +              FindWork, +	      (StgTSO*)NULL, (StgClosure*)NULL, (rtsSpark*)NULL); +  } + +  if (exporttime<TimeOfNextEvent) +    TimeOfNextEvent = exporttime; + +  if (proc!=CurrentProc) { +    CurrentTime[CurrentProc] += RtsFlags.GranFlags.Costs.mtidytime; +    ++CurrentTSO->gran.globalsparks; +  } else {  +    ++CurrentTSO->gran.localsparks; +  } +} + +/*  +   This function handles local and global blocking.  It's called either +   from threaded code (RBH_entry, BH_entry etc) or from blockFetch when +   trying to fetch an BH or RBH  +*/ + +//@cindex GranSimBlock +void  +GranSimBlock(tso, proc, node) +StgTSO *tso; +PEs proc; +StgClosure *node; +{ +  PEs node_proc = where_is(node), tso_proc = where_is(tso); + +  ASSERT(tso_proc==CurrentProc); +  // ASSERT(node_proc==CurrentProc); +  IF_GRAN_DEBUG(bq, +		if (node_proc!=CurrentProc)  +		  belch("## ghuH: TSO %d (%lx) [PE %d] blocks on non-local node %p [PE %d] (no simulation of FETCHMEs)", +		        tso->id, tso, tso_proc, node, node_proc));  +  ASSERT(tso->link==END_TSO_QUEUE); +  ASSERT(!is_on_queue(tso,proc)); // tso must not be on run queue already! +  //ASSERT(tso==run_queue_hds[proc]); + +  IF_DEBUG(gran, +	   belch("GRAN: TSO %d (%p) [PE %d] blocks on closure %p @ %lx", +		 tso->id, tso, proc, node, CurrentTime[proc]);) + + +    /* THIS SHOULD NEVER HAPPEN! +       If tso tries to block on a remote node (i.e. node_proc!=CurrentProc) +       we have missed a GranSimFetch before entering this closure; +       we hack around it for now, faking a FetchNode;  +       because GranSimBlock is entered via a BLACKHOLE(_BQ) closure, +       tso will be blocked on this closure until the FetchReply occurs. + +       ngoq Dogh!  + +    if (node_proc!=CurrentProc) { +      StgInt ret; +      ret = GranSimFetch(node); +      IF_GRAN_DEBUG(bq, +                    if (ret) +		      belch(".. GranSimBlock: faking a FetchNode of node %p from %d to %d", +			    node, node_proc, CurrentProc);); +      return; +    } +    */ + +  if (RtsFlags.GranFlags.GranSimStats.Full) +    DumpRawGranEvent(proc,node_proc,GR_BLOCK,tso,node,(StgInt)0,0); + +  ++(tso->gran.blockcount); +  /* Distinction  between local and global block is made in blockFetch */ +  tso->gran.blockedat = CurrentTime[proc]; + +  CurrentTime[proc] += RtsFlags.GranFlags.Costs.threadqueuetime; +  ActivateNextThread(proc); +  /* tso->link = END_TSO_QUEUE;    not really necessary; only for testing */ +} + +#endif /* GRAN */ + +//@node Index,  , Dumping routines, GranSim specific code +//@subsection Index + +//@index +//* ActivateNextThread::  @cindex\s-+ActivateNextThread +//* CurrentProc::  @cindex\s-+CurrentProc +//* CurrentTime::  @cindex\s-+CurrentTime +//* GranSimAllocate::  @cindex\s-+GranSimAllocate +//* GranSimBlock::  @cindex\s-+GranSimBlock +//* GranSimExec::  @cindex\s-+GranSimExec +//* GranSimFetch::  @cindex\s-+GranSimFetch +//* GranSimLight_insertThread::  @cindex\s-+GranSimLight_insertThread +//* GranSimSpark::  @cindex\s-+GranSimSpark +//* GranSimSparkAt::  @cindex\s-+GranSimSparkAt +//* GranSimSparkAtAbs::  @cindex\s-+GranSimSparkAtAbs +//* GranSimUnallocate::  @cindex\s-+GranSimUnallocate +//* any_idle::  @cindex\s-+any_idle +//* blockFetch::  @cindex\s-+blockFetch +//* do_the_fetchnode::  @cindex\s-+do_the_fetchnode +//* do_the_fetchreply::  @cindex\s-+do_the_fetchreply +//* do_the_findwork::  @cindex\s-+do_the_findwork +//* do_the_globalblock::  @cindex\s-+do_the_globalblock +//* do_the_movespark::  @cindex\s-+do_the_movespark +//* do_the_movethread::  @cindex\s-+do_the_movethread +//* do_the_startthread::  @cindex\s-+do_the_startthread +//* do_the_unblock::  @cindex\s-+do_the_unblock +//* fetchNode::  @cindex\s-+fetchNode +//* ga_to_proc::  @cindex\s-+ga_to_proc +//* get_next_event::  @cindex\s-+get_next_event +//* get_time_of_next_event::  @cindex\s-+get_time_of_next_event +//* grab_event::  @cindex\s-+grab_event +//* handleFetchRequest::  @cindex\s-+handleFetchRequest +//* handleIdlePEs::  @cindex\s-+handleIdlePEs +//* idlers::  @cindex\s-+idlers +//* insertThread::  @cindex\s-+insertThread +//* insert_event::  @cindex\s-+insert_event +//* is_on_queue::  @cindex\s-+is_on_queue +//* is_unique::  @cindex\s-+is_unique +//* new_event::  @cindex\s-+new_event +//* prepend_event::  @cindex\s-+prepend_event +//* print_event::  @cindex\s-+print_event +//* print_eventq::  @cindex\s-+print_eventq +//* prune_eventq ::  @cindex\s-+prune_eventq  +//* spark queue::  @cindex\s-+spark queue +//* sparkStealTime::  @cindex\s-+sparkStealTime +//* stealSomething::  @cindex\s-+stealSomething +//* stealSpark::  @cindex\s-+stealSpark +//* stealSparkMagic::  @cindex\s-+stealSparkMagic +//* stealThread::  @cindex\s-+stealThread +//* stealThreadMagic::  @cindex\s-+stealThreadMagic +//* thread_queue_len::  @cindex\s-+thread_queue_len +//* traverse_eventq_for_gc::  @cindex\s-+traverse_eventq_for_gc +//* where_is::  @cindex\s-+where_is +//@end index diff --git a/ghc/rts/parallel/GranSimRts.h b/ghc/rts/parallel/GranSimRts.h new file mode 100644 index 0000000000..585291a3b4 --- /dev/null +++ b/ghc/rts/parallel/GranSimRts.h @@ -0,0 +1,261 @@ +/* -------------------------------------------------------------------------- +   Time-stamp: <Sat Dec 04 1999 01:26:45 Stardate: [-30]3995.30 hwloidl> +   $Id: GranSimRts.h,v 1.2 2000/01/13 14:34:07 hwloidl Exp $ + +   Variables and functions specific to GranSim. +   ----------------------------------------------------------------------- */ + +#ifndef GRANSIM_RTS_H +#define GRANSIM_RTS_H + +//@node Headers for GranSim objs used only in the RTS internally, , , +//@section Headers for GranSim objs used only in the RTS internally + +//@menu +//* Event queue::		 +//* Spark handling routines::	 +//* Processor related stuff::	 +//* Local types::		 +//* Statistics gathering::	 +//* Prototypes::		 +//@end menu + +//@node Event queue, Spark handling routines, Headers for GranSim objs used only in the RTS internally, Headers for GranSim objs used only in the RTS internally +//@subsection Event queue + +#if defined(GRAN) || defined(PAR) +/* Granularity event types for output (see DumpGranEvent) */ +typedef enum GranEventType_ { +    GR_START = 0, GR_STARTQ,  +    GR_STEALING, GR_STOLEN, GR_STOLENQ,  +    GR_FETCH, GR_REPLY, GR_BLOCK, GR_RESUME, GR_RESUMEQ, +    GR_SCHEDULE, GR_DESCHEDULE, +    GR_END, +    SP_SPARK, SP_SPARKAT, SP_USED, SP_PRUNED, SP_EXPORTED, SP_ACQUIRED, SP_REQUESTED, +    GR_ALLOC, +    GR_TERMINATE, +    GR_SYSTEM_START, GR_SYSTEM_END,            /* only for debugging */ +    GR_EVENT_MAX +} GranEventType; + +extern char *gran_event_names[]; +#endif + +#if defined(GRAN)                                            /* whole file */ + +/* Event Types (internal use only) */ +typedef enum rtsEventType_ { + ContinueThread = 0,  /* Continue running the first thread in the queue */ + StartThread,         /* Start a newly created thread */ + ResumeThread,        /* Resume a previously running thread */ + MoveSpark,           /* Move a spark from one PE to another */ + MoveThread,          /* Move a thread from one PE to another */ + FindWork,            /* Search for work */ + FetchNode,           /* Fetch a node */ + FetchReply,          /* Receive a node */ + GlobalBlock,         /* Block a TSO on a remote node */ + UnblockThread        /* Make a TSO runnable */ +} rtsEventType; + +/* Number of last event type */ +#define MAX_EVENT       9 +  +typedef struct rtsEvent_ { +  PEs           proc;    /* Processor id */ +  PEs           creator; /* Processor id of PE that created the event */ +  rtsEventType  evttype; /* rtsEvent type */ +  rtsTime       time;    /* Time at which event happened */ +  StgTSO       *tso;     /* Associated TSO, if relevant */ +  StgClosure   *node;    /* Associated node, if relevant */ +  rtsSpark     *spark;   /* Associated SPARK, if relevant */ +  StgInt        gc_info; /* Counter of heap objects to mark (used in GC only)*/ +  struct rtsEvent_ *next; +  } rtsEvent; + +typedef rtsEvent *rtsEventQ; + +extern rtsEventQ EventHd; + +/* Interface for ADT of Event Queue */ +rtsEvent *get_next_event(void); +rtsTime   get_time_of_next_event(void); +void      insert_event(rtsEvent *newentry); +void      new_event(PEs proc, PEs creator, rtsTime time,  +		    rtsEventType evttype, StgTSO *tso,  +		    StgClosure *node, rtsSpark *spark); +void      print_event(rtsEvent *event); +void      print_eventq(rtsEvent *hd); +void      prepend_event(rtsEvent *event); +rtsEventQ grab_event(void); +void      prune_eventq(StgTSO *tso, StgClosure *node);  + +void      traverse_eventq_for_gc(void); +void      markEventQueue(void); + +//@node Spark handling routines, Processor related stuff, Event queue, Headers for GranSim objs used only in the RTS internally +//@subsection Spark handling routines + +/* These functions are only used in the RTS internally; see GranSim.h for rest */ +void 	  disposeSpark(rtsSpark *spark); +void 	  disposeSparkQ(rtsSparkQ spark); +void 	  print_spark(rtsSpark *spark); +void      print_sparkq(PEs proc); +void 	  print_sparkq_stats(void); +nat  	  spark_queue_len(PEs proc); +rtsSpark *delete_from_sparkq (rtsSpark *spark, PEs p, rtsBool dispose_too); +void      markSparkQueue(void); + +//@node Processor related stuff, Local types, Spark handling routines, Headers for GranSim objs used only in the RTS internally +//@subsection Processor related stuff + +typedef enum rtsProcStatus_ { +  Idle = 0,             /* empty threadq */ +  Sparking,             /* non-empty sparkq; FINDWORK has been issued */ +  Starting,             /* STARTTHREAD has been issue */ +  Fetching,             /* waiting for remote data (only if block-on-fetch) */ +  Fishing,              /* waiting for remote spark/thread */ +  Busy                  /* non-empty threadq, with head of queue active */ +} rtsProcStatus; + +/* +#define IS_IDLE(proc)        (procStatus[proc] == Idle) +#define IS_SPARKING(proc)    (procStatus[proc] == Sparking) +#define IS_STARTING(proc)    (procStatus[proc] == Starting) +#define IS_FETCHING(proc)    (procStatus[proc] == Fetching) +#define IS_FISHING(proc)     (procStatus[proc] == Fishing) +#define IS_BUSY(proc)        (procStatus[proc] == Busy)     +#define ANY_IDLE             (any_idle()) +#define MAKE_IDLE(proc)      procStatus[proc] = Idle +#define MAKE_SPARKING(proc)  procStatus[proc] = Sparking +#define MAKE_STARTING(proc)  procStatus[proc] = Starting +#define MAKE_FETCHING(proc)  procStatus[proc] = Fetching +#define MAKE_FISHING(proc)   procStatus[proc] = Fishing +#define MAKE_BUSY(proc)      procStatus[proc] = Busy +*/ + +//@node Local types, Statistics gathering, Processor related stuff, Headers for GranSim objs used only in the RTS internally +//@subsection Local types + +/* Return codes of HandleFetchRequest: +    0 ... ok (FETCHREPLY event with a buffer containing addresses of the  +              nearby graph has been scheduled) +    1 ... node is already local (fetched by somebody else; no event is +                                  scheduled in here) +    2 ... fetch request has been forwrded to the PE that now contains the +           node +    3 ... node is a black hole (BH, BQ or RBH); no event is scheduled, and +           the current TSO is put into the blocking queue of that node +    4 ... out of heap in PackNearbyGraph; GC should be triggered in calling +          function to guarantee that the tso and node inputs are valid +          (they may be moved during GC). +   Return codes of blockFetch: +    0 ... ok; tso is now at beginning of BQ attached to the bh closure +    1 ... the bh closure is no BH any more; tso is immediately unblocked +*/ + +typedef enum rtsFetchReturnCode_ { +  Ok = 0, +  NodeIsLocal, +  NodeHasMoved, +  NodeIsBH, +  NodeIsNoBH, +  OutOfHeap, +} rtsFetchReturnCode; +   +//@node Statistics gathering, Prototypes, Local types, Headers for GranSim objs used only in the RTS internally +//@subsection Statistics gathering + +extern unsigned int /* nat */ OutstandingFetches[], OutstandingFishes[]; +extern rtsProcStatus procStatus[]; +extern StgTSO *BlockedOnFetch[]; + +/* global structure for collecting statistics */ +typedef struct GlobalGranStats_ { +  /* event stats */ +  nat noOfEvents; +  nat event_counts[MAX_EVENT]; + +  /* communication stats */ +  nat fetch_misses; +  nat tot_fake_fetches;   // GranSim internal; faked Fetches are a kludge!! +  nat tot_low_pri_sparks; + +  /* load distribution statistics */   +  nat rs_sp_count, rs_t_count, ntimes_total, fl_total,  +      no_of_steals, no_of_migrates; + +  /* spark queue stats */ +  nat tot_sq_len, tot_sq_probes, tot_sparks; +  nat tot_add_threads, tot_tq_len, non_end_add_threads; + +  /* packet statistics */ +  nat tot_packets, tot_packet_size, tot_cuts, tot_thunks; + +  /* thread stats */ +  nat tot_threads_created, threads_created_on_PE[MAX_PROC], +      tot_TSOs_migrated; + +  /* spark stats */ +  nat pruned_sparks, withered_sparks; +  nat tot_sparks_created, sparks_created_on_PE[MAX_PROC]; + +  /* scheduling stats */ +  nat tot_yields; + +  /* blocking queue statistics */ +  rtsTime tot_bq_processing_time; +  nat tot_bq_len, tot_bq_len_local, tot_awbq, tot_FMBQs; +} GlobalGranStats; + +extern GlobalGranStats globalGranStats; + +//@node Prototypes,  , Statistics gathering, Headers for GranSim objs used only in the RTS internally +//@subsection Prototypes + +/* Generally useful fcts */ +PEs where_is(StgClosure *node); +rtsBool is_unique(StgClosure *node); + +/* Prototypes of event handling functions; needed in Schedule.c:ReSchedule() */ +void do_the_globalblock (rtsEvent* event); +void do_the_unblock (rtsEvent* event); +void do_the_fetchnode (rtsEvent* event); +void do_the_fetchreply (rtsEvent* event); +void do_the_movethread (rtsEvent* event); +void do_the_movespark (rtsEvent* event); +void do_the_startthread(rtsEvent *event); +void do_the_findwork(rtsEvent* event); +void gimme_spark (rtsEvent *event, rtsBool *found_res, rtsSparkQ *spark_res); +rtsBool munch_spark (rtsEvent *event, rtsSparkQ spark); + +/* GranSimLight routines */ +void GranSimLight_enter_system(rtsEvent *event, StgTSO **ActiveTSOp); +void GranSimLight_leave_system(rtsEvent *event, StgTSO **ActiveTSOp); + +/* Communication related routines */ +rtsFetchReturnCode fetchNode(StgClosure* node, PEs from, PEs to); +rtsFetchReturnCode handleFetchRequest(StgClosure* node, PEs curr_proc, PEs p, StgTSO* tso); +rtsFetchReturnCode blockFetch(StgTSO* tso, PEs proc, StgClosure* bh); +void               handleIdlePEs(void); + +long int random(void); /* used in stealSpark() and stealThread() in GranSim.c */ + +/* Scheduling fcts defined in GranSim.c */ +void    insertThread(StgTSO *tso, PEs proc); +void    endThread(StgTSO *tso, PEs proc); +rtsBool GranSimLight_insertThread(StgTSO *tso, PEs proc); +nat     thread_queue_len(PEs proc); + +/* For debugging */ +rtsBool is_on_queue (StgTSO *tso, PEs proc); + +/* Interface for dumping routines (i.e. writing to log file) */ +void DumpGranEvent(GranEventType name, StgTSO *tso); +void DumpEndEvent(PEs proc, StgTSO *tso, rtsBool mandatory_thread); +void DumpTSO(StgTSO *tso); +void DumpRawGranEvent(PEs proc, PEs p, GranEventType name,  + 	              StgTSO *tso, StgClosure *node, StgInt sparkname, StgInt len); + +#endif + +#endif /* GRANSIM_RTS_H  */ diff --git a/ghc/rts/parallel/HLC.h b/ghc/rts/parallel/HLC.h new file mode 100644 index 0000000000..f2d98d4bdc --- /dev/null +++ b/ghc/rts/parallel/HLC.h @@ -0,0 +1,59 @@ +/* -------------------------------------------------------------------------- +   Time-stamp: <Sun Dec 05 1999 21:02:36 Stardate: [-30]4004.38 hwloidl> +   $Id: HLC.h,v 1.2 2000/01/13 14:34:07 hwloidl Exp $ + +   High Level Communications Header (HLC.h) + +   Contains the high-level definitions (i.e. communication +   subsystem independent) used by GUM +   Phil Trinder, Glasgow University, 12 December 1994 +   H-W. Loidl, Heriot-Watt, November 1999 +   ----------------------------------------------------------------------- */ + +#ifndef __HLC_H +#define __HLC_H + +#ifdef PAR + +#include "LLC.h" + +#define NEW_FISH_AGE           0 +#define NEW_FISH_HISTORY       0 +#define NEW_FISH_HUNGER        0 +#define FISH_LIFE_EXPECTANCY  10 + + +//@node GUM Message Sending and Unpacking Functions +//@subsection GUM Message Sending and Unpacking Functions + +rtsBool  initMoreBuffers(void); + +void 	 sendFetch (globalAddr *ga, globalAddr *bqga, int load); +void 	 sendResume(globalAddr *rga, int nelem, rtsPackBuffer *data); +void 	 sendAck (GlobalTaskId task, int ngas, globalAddr *gagamap); +void 	 sendFish (GlobalTaskId destPE, GlobalTaskId origPE, int age, int history, int hunger); +void 	 sendFree (GlobalTaskId destPE, int nelem, P_ data); +void 	 sendSchedule(GlobalTaskId origPE, int nelem, rtsPackBuffer *data); + +//@node Message-Processing Functions +//@subsection Message-Processing Functions + +void 	 processMessages(void); +void 	 processFetches(void); +void 	 processTheRealFetches(void); + +//@node Miscellaneous Functions +//@subsection Miscellaneous Functions + +void 	 prepareFreeMsgBuffers(void); +void 	 freeRemoteGA (int pe, globalAddr *ga); +void 	 sendFreeMessages(void); + +GlobalTaskId  choosePE(void); +StgClosure   *createBlockedFetch (globalAddr ga, globalAddr rga); +void 	      waitForTermination(void); + +void          DebugPrintGAGAMap (globalAddr *gagamap, int nGAs); + +#endif /* PAR */ +#endif /* __HLC_H */ diff --git a/ghc/rts/parallel/HLComms.c b/ghc/rts/parallel/HLComms.c new file mode 100644 index 0000000000..bce0de7806 --- /dev/null +++ b/ghc/rts/parallel/HLComms.c @@ -0,0 +1,1305 @@ +/* ---------------------------------------------------------------------------- + * Time-stamp: <Wed Jan 12 2000 13:32:25 Stardate: [-30]4193.86 hwloidl> + * $Id: HLComms.c,v 1.2 2000/01/13 14:34:07 hwloidl Exp $ + * + * High Level Communications Routines (HLComms.lc) + * + * Contains the high-level routines (i.e. communication + * subsystem independent) used by GUM + *  + * Phil Trinder, Glasgow University, 12 December 1994 + * Adapted for new RTS + * Phil Trinder, Simon Marlow July 1998 + * H-W. Loidl, Heriot-Watt University, November 1999 + *  + * ------------------------------------------------------------------------- */ + +#ifdef PAR /* whole file */ + +//@node High Level Communications Routines, , , +//@section High Level Communications Routines + +//@menu +//* Macros etc::		 +//* Includes::			 +//* GUM Message Sending and Unpacking Functions::   +//* Message-Processing Functions::   +//* GUM Message Processor::	 +//* Miscellaneous Functions::	 +//* Index::			 +//@end menu + +//@node Macros etc, Includes, High Level Communications Routines, High Level Communications Routines +//@subsection Macros etc + +# ifndef _AIX +# define NON_POSIX_SOURCE /* so says Solaris */ +# endif + +//@node Includes, GUM Message Sending and Unpacking Functions, Macros etc, High Level Communications Routines +//@subsection Includes + +#include "Rts.h" +#include "RtsUtils.h" +#include "RtsFlags.h" +#include "Storage.h"   // for recordMutable +#include "HLC.h" +#include "Parallel.h" +#include "GranSimRts.h" +#include "ParallelRts.h" +#include "FetchMe.h"     // for BLOCKED_FETCH_info etc +#if defined(DEBUG) +# include "ParallelDebug.h" +#endif +#include "StgMacros.h" // inlined IS_... fcts + +//@node GUM Message Sending and Unpacking Functions, Message-Processing Functions, Includes, High Level Communications Routines +//@subsection GUM Message Sending and Unpacking Functions + +/* + * GUM Message Sending and Unpacking Functions + */ + +/* + * Allocate space for message processing + */ + +//@cindex gumPackBuffer +static rtsPackBuffer *gumPackBuffer; + +//@cindex initMoreBuffers +rtsBool +initMoreBuffers(void) +{ +  if ((gumPackBuffer = (rtsPackBuffer *)stgMallocWords(RtsFlags.ParFlags.packBufferSize,  +					     "initMoreBuffers")) == NULL) +    return rtsFalse; +  return rtsTrue; +} + +/* + * SendFetch packs the two global addresses and a load into a message + + * sends it.   + +//@cindex FETCH + +   Structure of a FETCH message: + +         |    GA 1     |        GA 2          | +         +------------------------------------+------+ +	 | gtid | slot | weight | gtid | slot | load | +	 +------------------------------------+------+ + */ + +//@cindex sendFetch +void +sendFetch(globalAddr *rga, globalAddr *lga, int load) +{ +  ASSERT(rga->weight > 0 && lga->weight > 0); +  IF_PAR_DEBUG(fetch, +	       belch("** [%x] Sending Fetch for ((%x, %d, 0)); locally ((%x, %d, %x)), load = %d",  +		     mytid, +		     rga->payload.gc.gtid, rga->payload.gc.slot,  +		     lga->payload.gc.gtid, lga->payload.gc.slot, lga->weight, +		     load)); + + +  /* ToDo: Dump event +  DumpRawGranEvent(CURRENT_PROC, taskIDtoPE(rga->payload.gc.gtid),  +		   GR_FETCH, CurrentTSO, (StgClosure *)(lga->payload.gc.slot), +		   0, spark_queue_len(ADVISORY_POOL)); +  */ + +  sendOpV(PP_FETCH, rga->payload.gc.gtid, 6, +	  (StgWord) rga->payload.gc.gtid, (StgWord) rga->payload.gc.slot,  +	  (StgWord) lga->weight, (StgWord) lga->payload.gc.gtid,  +	  (StgWord) lga->payload.gc.slot, (StgWord) load); +} + +/* + * unpackFetch unpacks a FETCH message into two Global addresses and a load + * figure.   +*/ + +//@cindex unpackFetch +static void +unpackFetch(globalAddr *lga, globalAddr *rga, int *load) +{ +  long buf[6]; + +  GetArgs(buf, 6);  + +  IF_PAR_DEBUG(fetch, +	       belch("** [%x] Unpacking Fetch for ((%x, %d, 0)) to ((%x, %d, %x)), load = %d",  +		     mytid, +		     (GlobalTaskId) buf[0], (int) buf[1],  +		     (GlobalTaskId) buf[3], (int) buf[4], buf[2], buf[5])); + +  lga->weight = 1; +  lga->payload.gc.gtid = (GlobalTaskId) buf[0]; +  lga->payload.gc.slot = (int) buf[1]; + +  rga->weight = (unsigned) buf[2]; +  rga->payload.gc.gtid = (GlobalTaskId) buf[3]; +  rga->payload.gc.slot = (int) buf[4]; + +  *load = (int) buf[5]; + +  ASSERT(rga->weight > 0); +} + +/* + * SendResume packs the remote blocking queue's GA and data into a message  + * and sends it. + +//@cindex RESUME + +   Structure of a RESUME message: + +      ------------------------------- +      | weight | slot | n | data ... +      ------------------------------- + +   data is a packed graph represented as an rtsPackBuffer +   n is the size of the graph (as returned by PackNearbyGraph) + packet hdr size + */ + +//@cindex sendResume +void +sendResume(globalAddr *rga, int nelem, rtsPackBuffer *data) // StgPtr data) +{ +  IF_PAR_DEBUG(resume, +	       PrintPacket(data); +	       belch("[] [%x] Sending Resume for ((%x, %d, %x))",  +		     mytid, +		     rga->payload.gc.gtid, rga->payload.gc.slot, rga->weight)); + +  sendOpNV(PP_RESUME, rga->payload.gc.gtid,  +	   nelem + PACK_BUFFER_HDR_SIZE, (StgPtr)data,  +	   2, (rtsWeight) rga->weight, (StgWord) rga->payload.gc.slot); +} + +/* + * unpackResume unpacks a Resume message into two Global addresses and + * a data array. + */ + +//@cindex unpackResume +static void +unpackResume(globalAddr *lga, int *nelem, rtsPackBuffer *data) +{ +    long buf[3]; + +    GetArgs(buf, 3);  + +    IF_PAR_DEBUG(resume, +		 belch("[] [%x] Unpacking Resume for ((%x, %d, %x))",  +		       mytid, mytid, +		       (int) buf[1], (unsigned) buf[0])); + +    /* +      RESUME event is written in awaken_blocked_queue +    DumpRawGranEvent(CURRENT_PROC, taskIDtoPE(lga->payload.gc.gtid),  +		     GR_RESUME, END_TSO_QUEUE, (StgClosure *)NULL, 0, 0); +    */ + +    lga->weight = (unsigned) buf[0]; +    lga->payload.gc.gtid = mytid; +    lga->payload.gc.slot = (int) buf[1]; + +    *nelem = (int) buf[2]; // includes PACK_BUFFER_HDR_SIZE; +    GetArgs(data, *nelem); +    *nelem -= PACK_BUFFER_HDR_SIZE; +} + +/* + * SendAck packs the global address being acknowledged, together with + * an array of global addresses for any closures shipped and sends them. + +//@cindex ACK + +   Structure of an ACK message: + +      |        GA 1          |        GA 2          |  +      +---------------------------------------------+------- +      | weight | gtid | slot | weight | gtid | slot |  .....  ngas times +      + --------------------------------------------+-------  + + */ + +//@cindex sendAck +void +sendAck(GlobalTaskId task, int ngas, globalAddr *gagamap) +{ +  static long *buffer; +  long *p; +  int i; + +  buffer = (long *) gumPackBuffer; + +  for(i = 0, p = buffer; i < ngas; i++, p += 6) { +    ASSERT(gagamap[1].weight > 0); +    p[0] = (long) gagamap->weight; +    p[1] = (long) gagamap->payload.gc.gtid; +    p[2] = (long) gagamap->payload.gc.slot; +    gagamap++; +    p[3] = (long) gagamap->weight; +    p[4] = (long) gagamap->payload.gc.gtid; +    p[5] = (long) gagamap->payload.gc.slot; +    gagamap++; +  } +  IF_PAR_DEBUG(ack, +	       belch(",, [%x] Sending Ack (%d pairs) to PE %x\n",  +		     mytid, ngas, task)); + +  sendOpN(PP_ACK, task, p - buffer, buffer); +} + +/* + * unpackAck unpacks an Acknowledgement message into a Global address, + * a count of the number of global addresses following and a map of  + * Global addresses + */ + +//@cindex unpackAck +static void +unpackAck(int *ngas, globalAddr *gagamap) +{ +  long GAarraysize; +  long buf[6]; +   +  GetArgs(&GAarraysize, 1); +   +  *ngas = GAarraysize / 6; +   +  IF_PAR_DEBUG(ack, +	       belch(",, [%x] Unpacking Ack (%d pairs) on %x\n",  +		     mytid, *ngas, mytid)); + +  while (GAarraysize > 0) { +    GetArgs(buf, 6); +    gagamap->weight = (rtsWeight) buf[0]; +    gagamap->payload.gc.gtid = (GlobalTaskId) buf[1]; +    gagamap->payload.gc.slot = (int) buf[2]; +    gagamap++; +    gagamap->weight = (rtsWeight) buf[3]; +    gagamap->payload.gc.gtid = (GlobalTaskId) buf[4]; +    gagamap->payload.gc.slot = (int) buf[5]; +    ASSERT(gagamap->weight > 0); +    gagamap++; +    GAarraysize -= 6; +  } +} + +/* + * SendFish packs the global address being acknowledged, together with + * an array of global addresses for any closures shipped and sends them. + +//@cindex FISH + + Structure of a FISH message: + +     +----------------------------------+ +     | orig PE | age | history | hunger | +     +----------------------------------+ + */ + +//@cindex sendFish +void +sendFish(GlobalTaskId destPE, GlobalTaskId origPE,  +	 int age, int history, int hunger) +{ +  IF_PAR_DEBUG(fish, +	       belch("$$ [%x] Sending Fish to %x (%d outstanding fishes)",  +		     mytid, destPE, outstandingFishes)); + +  sendOpV(PP_FISH, destPE, 4,  +	  (StgWord) origPE, (StgWord) age, (StgWord) history, (StgWord) hunger); + +  if (origPE == mytid) { +    //fishing = rtsTrue; +    outstandingFishes++; +  } +} + +/* + * unpackFish unpacks a FISH message into the global task id of the + * originating PE and 3 data fields: the age, history and hunger of the + * fish. The history + hunger are not currently used. + + */ + +//@cindex unpackFish +static void +unpackFish(GlobalTaskId *origPE, int *age, int *history, int *hunger) +{ +  long buf[4]; +   +  GetArgs(buf, 4); +   +  IF_PAR_DEBUG(fish, +	       belch("$$ [%x] Unpacking Fish from PE %x (age=%d)",  +		     mytid, (GlobalTaskId) buf[0], (int) buf[1])); + +  *origPE = (GlobalTaskId) buf[0]; +  *age = (int) buf[1]; +  *history = (int) buf[2]; +  *hunger = (int) buf[3]; +} + +/* + * SendFree sends (weight, slot) pairs for GAs that we no longer need + * references to.   + +//@cindex FREE + +   Structure of a FREE message: +    +       +----------------------------- +       | n | weight_1 | slot_1 | ... +       +----------------------------- + */ +//@cindex sendFree +void +sendFree(GlobalTaskId pe, int nelem, StgPtr data) +{ +    IF_PAR_DEBUG(free, +		 belch("!! [%x] Sending Free (%d GAs) to %x",  +		       mytid, nelem/2, pe)); + +    sendOpN(PP_FREE, pe, nelem, data); +} + +/* + * unpackFree unpacks a FREE message into the amount of data shipped and + * a data block. + */ +//@cindex unpackFree +static void +unpackFree(int *nelem, rtsPackBuffer *data) +{ +  long buf[1]; +   +  GetArgs(buf, 1); +  *nelem = (int) buf[0]; + +  IF_PAR_DEBUG(free, +	       belch("!! [%x] Unpacking Free (%d GAs)",  +		     mytid, *nelem/2)); + +  GetArgs(data, *nelem); +} + +/* + * SendSchedule sends a closure to be evaluated in response to a Fish + * message. The message is directed to the PE that originated the Fish + * (origPE), and includes the packed closure (data) along with its size + * (nelem). + +//@cindex SCHEDULE + +   Structure of a SCHEDULE message: + +       +------------------------------------ +       | PE | n | pack buffer of a graph ... +       +------------------------------------ + */ +//@cindex sendSchedule +void +sendSchedule(GlobalTaskId origPE, int nelem, rtsPackBuffer *data) // StgPtr data) +{ +  IF_PAR_DEBUG(schedule, +	       PrintPacket(data); +	       belch("-- [%x] Sending Schedule (%d elems) to %x\n",  +		     mytid, nelem, origPE)); + +  sendOpN(PP_SCHEDULE, origPE, nelem + PACK_BUFFER_HDR_SIZE, (StgPtr)data); +} + +/* + * unpackSchedule unpacks a SCHEDULE message into the Global address of + * the closure shipped, the amount of data shipped (nelem) and the data + * block (data). + */ + +//@cindex unpackSchedule +static void +unpackSchedule(int *nelem, rtsPackBuffer *data) +{ +    long buf[1]; + +    GetArgs(buf, 1); +    /* no. of elems, not counting the header of the pack buffer */ +    *nelem = (int) buf[0] - PACK_BUFFER_HDR_SIZE; + +    IF_PAR_DEBUG(schedule, +		 belch("-- [%x] Unpacking Schedule (%d elems) on %x\n",  +		       mytid, *nelem)); + +    /* automatic cast of flat pvm-data to rtsPackBuffer */ +    GetArgs(data, *nelem + PACK_BUFFER_HDR_SIZE); +} + +//@node Message-Processing Functions, GUM Message Processor, GUM Message Sending and Unpacking Functions, High Level Communications Routines +//@subsection Message-Processing Functions + +/* + * Message-Processing Functions + * + * The following routines process incoming GUM messages. Often reissuing + * messages in response. + * + * processFish unpacks a fish message, reissuing it if it's our own, + * sending work if we have it or sending it onwards otherwise. + */ + +/* + * blockFetch blocks a BlockedFetch node on some kind of black hole. + */ +//@cindex blockFetch +static void +blockFetch(StgBlockedFetch *bf, StgClosure *bh) { +  bf->node = bh; +  switch (get_itbl(bh)->type) { +  case BLACKHOLE: +    bf->link = END_BQ_QUEUE; +    //((StgBlockingQueue *)bh)->header.info = &BLACKHOLE_BQ_info; +    SET_INFO(bh, &BLACKHOLE_BQ_info);  // turn closure into a blocking queue +    ((StgBlockingQueue *)bh)->blocking_queue = (StgBlockingQueueElement *)bf; +     +    // put bh on the mutables list +    recordMutable((StgMutClosure *)bh); + +# if 0 +    /* +     * If we modify a black hole in the old generation, we have to +     * make sure it goes on the mutables list +     */ +     +    if (bh <= StorageMgrInfo.OldLim) { +      MUT_LINK(bh) = (StgWord) StorageMgrInfo.OldMutables; +      StorageMgrInfo.OldMutables = bh; +    } else +      MUT_LINK(bh) = MUT_NOT_LINKED; +# endif +    break; +     +  case BLACKHOLE_BQ: +    /* enqueue bf on blocking queue of closure bh */ +    bf->link = ((StgBlockingQueue *)bh)->blocking_queue; +    ((StgBlockingQueue *)bh)->blocking_queue = (StgBlockingQueueElement *)bf; + +    // put bh on the mutables list; ToDo: check +    recordMutable((StgMutClosure *)bh); +    break; + +  case FETCH_ME_BQ: +    /* enqueue bf on blocking queue of closure bh */ +    bf->link = ((StgFetchMeBlockingQueue *)bh)->blocking_queue; +    ((StgFetchMeBlockingQueue *)bh)->blocking_queue = (StgBlockingQueueElement *)bf; + +    // put bh on the mutables list; ToDo: check +    recordMutable((StgMutClosure *)bh); +    break; +     +  case RBH: +    /* enqueue bf on blocking queue of closure bh */ +    bf->link = ((StgRBH *)bh)->blocking_queue; +    ((StgRBH *)bh)->blocking_queue = (StgBlockingQueueElement *)bf; + +    // put bh on the mutables list; ToDo: check +    recordMutable((StgMutClosure *)bh); +    break; +     +  default: +    barf("Panic (blockFetch): thought %p was a black hole (IP %#lx, %s)", +	 (StgClosure *)bh, get_itbl((StgClosure *)bh),  +	 info_type((StgClosure *)bh)); +  } +  IF_PAR_DEBUG(verbose, +	       belch("## blockFetch: after block the BQ of %p (%s) is:", +		     bh, info_type(bh)); +	       print_bq(bh)); +} + + +/* + * processFetches constructs and sends resume messages for every + * BlockedFetch which is ready to be awakened. + * awaken_blocked_queue (in Schedule.c) is responsible for moving  + * BlockedFetches from a blocking queue to the PendingFetches queue. + */ +void GetRoots(void); +extern StgBlockedFetch *PendingFetches; + +nat +pending_fetches_len(void) +{ +  StgBlockedFetch *bf; +  nat n; + +  for (n=0, bf=PendingFetches; bf != END_BF_QUEUE; n++, bf = (StgBlockedFetch *)(bf->link)) { +    ASSERT(get_itbl(bf)->type==BLOCKED_FETCH); +  } +  return n; +} + +//@cindex processFetches +void +processFetches(void) { +  StgBlockedFetch *bf; +  StgClosure *closure, *next; +  StgInfoTable *ip; +  globalAddr rga; +  static rtsPackBuffer *packBuffer; +     +  IF_PAR_DEBUG(verbose, +	       belch("__ processFetches: %d  pending fetches", +		     pending_fetches_len())); +   +  for (bf = PendingFetches;  +       bf != END_BF_QUEUE; +       bf=(StgBlockedFetch *)(bf->link)) { +    /* the PendingFetches list contains only BLOCKED_FETCH closures */ +    ASSERT(get_itbl(bf)->type==BLOCKED_FETCH); + +    /* +     * Find the target at the end of the indirection chain, and +     * process it in much the same fashion as the original target +     * of the fetch.  Though we hope to find graph here, we could +     * find a black hole (of any flavor) or even a FetchMe. +     */ +    closure = bf->node; +    /* +      HACK 312: bf->node may have been evacuated since filling it; follow +       the evacuee in this case; the proper way to handle this is to +       traverse the blocking queue and update the node fields of +       BLOCKED_FETCH entries when evacuating an BLACKHOLE_BQ, FETCH_ME_BQ +       or RBH (but it's late and I'm tired)  +    */ +    if (get_itbl(closure)->type == EVACUATED) +      closure = ((StgEvacuated *)closure)->evacuee; + +    while ((next = IS_INDIRECTION(closure)) != NULL) { closure = next; } + +    ip = get_itbl(closure); +    if (ip->type == FETCH_ME) { +      /* Forward the Fetch to someone else */ +      rga.payload.gc.gtid = bf->ga.payload.gc.gtid; +      rga.payload.gc.slot = bf->ga.payload.gc.slot; +      rga.weight = bf->ga.weight; +       +      sendFetch(((StgFetchMe *)closure)->ga, &rga, 0 /* load */); + +      IF_PAR_DEBUG(forward, +		   belch("__ processFetches: Forwarding fetch from %lx to %lx", +			 mytid, rga.payload.gc.gtid)); + +    } else if (IS_BLACK_HOLE(closure)) { +      IF_PAR_DEBUG(verbose, +		   belch("__ processFetches: trying to send a BLACK_HOLE => doign a blockFetch on closure %p (%s)", +			 closure, info_type(closure))); +      bf->node = closure; +      blockFetch(bf, closure); +    } else { +      /* We now have some local graph to send back */ +      nat size; + +      packBuffer = gumPackBuffer; +      IF_PAR_DEBUG(verbose, +		   belch("__ processFetches: PackNearbyGraph of closure %p (%s)", +			 closure, info_type(closure))); + +      if ((packBuffer = PackNearbyGraph(closure, END_TSO_QUEUE, &size)) == NULL) { +	// Put current BF back on list +	bf->link = (StgBlockingQueueElement *)PendingFetches; +	PendingFetches = (StgBlockedFetch *)bf; +	// ToDo: check that nothing more has to be done to prepare for GC! +	GarbageCollect(GetRoots);  +	bf = PendingFetches; +	PendingFetches = (StgBlockedFetch *)(bf->link); +	closure = bf->node; +	packBuffer = PackNearbyGraph(closure, END_TSO_QUEUE, &size); +	ASSERT(packBuffer != (rtsPackBuffer *)NULL); +      } +      rga.payload.gc.gtid = bf->ga.payload.gc.gtid; +      rga.payload.gc.slot = bf->ga.payload.gc.slot; +      rga.weight = bf->ga.weight; +       +      sendResume(&rga, size, packBuffer); +    } +  } +  PendingFetches = END_BF_QUEUE; +} + +#if 0 +/* +  Alternatively to sending fetch messages directly from the FETCH_ME_entry +  code we could just store the data about the remote data in a global +  variable and send the fetch request from the main scheduling loop (similar +  to processFetches above). This would save an expensive STGCALL in the entry  +  code because we have to go back to the scheduler anyway. +*/ +//@cindex processFetches +void +processTheRealFetches(void) { +  StgBlockedFetch *bf; +  StgClosure *closure, *next; +     +  IF_PAR_DEBUG(verbose, +	       belch("__ processTheRealFetches: "); +	       printGA(&theGlobalFromGA); +	       printGA(&theGlobalToGA)); + +  ASSERT(theGlobalFromGA.payload.gc.gtid != 0 && +	 theGlobalToGA.payload.gc.gtid != 0); + +  /* the old version did this in the FETCH_ME entry code */ +  sendFetch(&theGlobalFromGA, &theGlobalToGA, 0/*load*/); +   +#if DEBUG +  theGlobalFromGA.payload.gc.gtid = 0; +  theGlobalToGA.payload.gc.gtid = 0; +#endif DEBUG +} +#endif + + +/* + * processFish unpacks a fish message, reissuing it if it's our own, + * sending work if we have it or sending it onwards otherwise. + */ +//@cindex processFish +static void +processFish(void) +{ +  GlobalTaskId origPE; +  int age, history, hunger; +  rtsSpark spark; +  static rtsPackBuffer *packBuffer;  + +  unpackFish(&origPE, &age, &history, &hunger); + +  if (origPE == mytid) { +    //fishing = rtsFalse;                   // fish has come home +    outstandingFishes--; +    last_fish_arrived_at = CURRENT_TIME;  // remember time (see schedule fct) +    return;                               // that's all +  } + +  ASSERT(origPE != mytid); +  IF_PAR_DEBUG(fish, +	       belch("$$ [%x] processing fish; %d sparks available", +		     mytid, spark_queue_len(ADVISORY_POOL))); +  while ((spark = findLocalSpark(rtsTrue)) != NULL) { +    nat size; +    // StgClosure *graph; + +    packBuffer = gumPackBuffer;  +    ASSERT(closure_SHOULD_SPARK((StgClosure *)spark)); +    if ((packBuffer = PackNearbyGraph(spark, END_TSO_QUEUE, &size)) == NULL) { +      IF_PAR_DEBUG(fish, +		   belch("$$ GC while trying to satisfy FISH via PackNearbyGraph of node %p", +			 (StgClosure *)spark)); +      GarbageCollect(GetRoots); +      /* Now go back and try again */ +    } else { +      IF_PAR_DEBUG(fish, +		   belch("$$ [%x] Replying to FISH from %x by sending graph @ %p (%s)", +			 mytid, origPE,  +			 (StgClosure *)spark, info_type((StgClosure *)spark))); +      sendSchedule(origPE, size, packBuffer); +      disposeSpark(spark); +      break; +    } +  } +  if (spark == (rtsSpark)NULL) { +    IF_PAR_DEBUG(fish, +		 belch("$$ [%x] No sparks available for FISH from %x", +		       mytid, origPE)); +    /* We have no sparks to give */ +    if (age < FISH_LIFE_EXPECTANCY) +      /* and the fish is atill young, send it to another PE to look for work */ +      sendFish(choosePE(), origPE, +	       (age + 1), NEW_FISH_HISTORY, NEW_FISH_HUNGER); + +    /* otherwise, send it home to die */ +    else +      sendFish(origPE, origPE, (age + 1), NEW_FISH_HISTORY, NEW_FISH_HUNGER); +    } +}  /* processFish */ + +/* + * processFetch either returns the requested data (if available)  + * or blocks the remote blocking queue on a black hole (if not). + */ + +//@cindex processFetch +static void +processFetch(void) +{ +  globalAddr ga, rga; +  int load; +  StgClosure *closure; +  StgInfoTable *ip; + +  unpackFetch(&ga, &rga, &load); +  IF_PAR_DEBUG(fetch, +	       belch("%% [%x] Rcvd Fetch for ((%x, %d, 0)), Resume ((%x, %d, %x)) (load %d) from %x", +		     mytid,  +		     ga.payload.gc.gtid, ga.payload.gc.slot, +		     rga.payload.gc.gtid, rga.payload.gc.slot, rga.weight, load, +		     rga.payload.gc.gtid)); + +  closure = GALAlookup(&ga); +  ASSERT(closure != (StgClosure *)NULL); +  ip = get_itbl(closure); +  if (ip->type == FETCH_ME) { +    /* Forward the Fetch to someone else */ +    sendFetch(((StgFetchMe *)closure)->ga, &rga, load); +  } else if (rga.payload.gc.gtid == mytid) { +    /* Our own FETCH forwarded back around to us */ +    StgFetchMeBlockingQueue *fmbq = (StgFetchMeBlockingQueue *)GALAlookup(&rga); +     +    IF_PAR_DEBUG(fetch, +		 belch("%% [%x] Fetch returned to sending PE; closure=%p (%s); receiver=%p (%s)", +		       mytid, closure, info_type(closure), fmbq, info_type(fmbq))); +    /* We may have already discovered that the fetch target is our own. */ +    if ((StgClosure *)fmbq != closure)  +      CommonUp((StgClosure *)fmbq, closure); +    (void) addWeight(&rga); +  } else if (IS_BLACK_HOLE(closure)) { +    /* This includes RBH's and FMBQ's */ +    StgBlockedFetch *bf; + +    ASSERT(GALAlookup(&rga) == NULL); + +    /* If we're hitting a BH or RBH or FMBQ we have to put a BLOCKED_FETCH +       closure into the BQ in order to denote that when updating this node +       the result should be sent to the originator of this fetch message. */ +    bf = (StgBlockedFetch *)createBlockedFetch(ga, rga); +    blockFetch(bf, closure); + +    IF_PAR_DEBUG(fetch, +		 belch("%% [%x] Blocking Fetch ((%x, %d, %x)) on %p (%s)", +		       mytid,  +		       rga.payload.gc.gtid, rga.payload.gc.slot, rga.weight,  +		       closure, info_type(closure))); +    } else {			 +      /* The target of the FetchMe is some local graph */ +      nat size; +      // StgClosure *graph; +      rtsPackBuffer *buffer = (rtsPackBuffer *)NULL; + +      if ((buffer = PackNearbyGraph(closure, END_TSO_QUEUE, &size)) == NULL) { +	GarbageCollect(GetRoots);  +	closure = GALAlookup(&ga); +	buffer = PackNearbyGraph(closure, END_TSO_QUEUE, &size); +	ASSERT(buffer != (rtsPackBuffer *)NULL); +      } +      sendResume(&rga, size, buffer); +    } +} + +/* + * processFree unpacks a FREE message and adds the weights to our GAs. + */ +//@cindex processFree +static void +processFree(void) +{ +  int nelem; +  static StgWord *buffer; +  int i; +  globalAddr ga; + +  buffer = (StgWord *)gumPackBuffer; +  unpackFree(&nelem, buffer); +  IF_PAR_DEBUG(free, +	       belch("!! [%x] Rcvd Free (%d GAs)", mytid, nelem / 2)); + +  ga.payload.gc.gtid = mytid; +  for (i = 0; i < nelem;) { +    ga.weight = (rtsWeight) buffer[i++]; +    ga.payload.gc.slot = (int) buffer[i++]; +    IF_PAR_DEBUG(free, +		 fprintf(stderr, "!! [%x] Processing free ", mytid);  +		 printGA(&ga); +		 fputc('\n', stderr); +		 ); +    (void) addWeight(&ga); +  } +} + +/* + * processResume unpacks a RESUME message into the graph, filling in + * the LA -> GA, and GA -> LA tables. Threads blocked on the original + * FetchMe (now a blocking queue) are awakened, and the blocking queue + * is converted into an indirection.  Finally it sends an ACK in response + * which contains any newly allocated GAs. + */ + +//@cindex processResume +static void +processResume(GlobalTaskId sender) +{ +  int nelem; +  nat nGAs; +  static rtsPackBuffer *packBuffer; +  StgClosure *newGraph, *old; +  globalAddr lga; +  globalAddr *gagamap; +   +  packBuffer = gumPackBuffer; +  unpackResume(&lga, &nelem, (StgPtr)packBuffer); + +  IF_PAR_DEBUG(resume, +	       fprintf(stderr, "[] [%x] Rcvd Resume for ", mytid);  +	       printGA(&lga); +	       fputc('\n', stderr); +	       PrintPacket((rtsPackBuffer *)packBuffer)); +   +  /*  +   * We always unpack the incoming graph, even if we've received the +   * requested node in some other data packet (and already awakened +   * the blocking queue). +  if (SAVE_Hp + packBuffer[0] >= SAVE_HpLim) { +    ReallyPerformThreadGC(packBuffer[0], rtsFalse); +    SAVE_Hp -= packBuffer[0]; +  } +   */ + +  // ToDo: Check for GC here !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +  /* Do this *after* GC; we don't want to release the object early! */ + +  if (lga.weight > 0) +    (void) addWeight(&lga); + +  old = GALAlookup(&lga); + +  if (RtsFlags.ParFlags.ParStats.Full) { +    // StgTSO *tso = END_TSO_QUEUE; +    StgBlockingQueueElement *bqe; + +    /* Write REPLY events to the log file, indicating that the remote +       data has arrived */ +    if (get_itbl(old)->type == FETCH_ME_BQ || +	get_itbl(old)->type == RBH)  +      for (bqe = ((StgFetchMeBlockingQueue *)old)->blocking_queue; +	   bqe->link != END_BQ_QUEUE; +	   bqe = bqe->link) +	if (get_itbl((StgClosure *)bqe)->type == TSO) +	  DumpRawGranEvent(CURRENT_PROC, taskIDtoPE(sender),  +			   GR_REPLY, ((StgTSO *)bqe), ((StgTSO *)bqe)->block_info.closure, +			   0, spark_queue_len(ADVISORY_POOL)); +  } + +  newGraph = UnpackGraph(packBuffer, &gagamap, &nGAs); +  ASSERT(newGraph != NULL); + +  /*  +   * Sometimes, unpacking will common up the resumee with the +   * incoming graph, but if it hasn't, we'd better do so now. +   */ +    +  if (get_itbl(old)->type == FETCH_ME_BQ) +    CommonUp(old, newGraph); + +  IF_PAR_DEBUG(resume, +	       DebugPrintGAGAMap(gagamap, nGAs)); +   +  sendAck(sender, nGAs, gagamap); +} + +/* + * processSchedule unpacks a SCHEDULE message into the graph, filling + * in the LA -> GA, and GA -> LA tables. The root of the graph is added to + * the local spark queue.  Finally it sends an ACK in response + * which contains any newly allocated GAs. + */ +//@cindex processSchedule +static void +processSchedule(GlobalTaskId sender) +{ +  nat nelem, space_required, nGAs; +  rtsBool success; +  static rtsPackBuffer *packBuffer; +  StgClosure *newGraph; +  globalAddr *gagamap; +   +  packBuffer = gumPackBuffer;		/* HWL */ +  unpackSchedule(&nelem, packBuffer); + +  IF_PAR_DEBUG(schedule, +	       belch("-- [%x] Rcvd Schedule (%d elems)", mytid, nelem); +	       PrintPacket(packBuffer)); + +  /* +   * For now, the graph is a closure to be sparked as an advisory +   * spark, but in future it may be a complete spark with +   * required/advisory status, priority etc. +   */ + +  /* +  space_required = packBuffer[0]; +  if (SAVE_Hp + space_required >= SAVE_HpLim) { +    ReallyPerformThreadGC(space_required, rtsFalse); +    SAVE_Hp -= space_required; +  } +  */ +  // ToDo: check whether GC is necessary !!!!!!!!!!!!!!!!!!!!!1 +  newGraph = UnpackGraph(packBuffer, &gagamap, &nGAs); +  ASSERT(newGraph != NULL); +  success = add_to_spark_queue(newGraph, rtsFalse); + +  IF_PAR_DEBUG(pack, +	       if (success) +  	         belch("+* added spark to unpacked graph %p; %d sparks available on [%x]",  +		     newGraph, spark_queue_len(ADVISORY_POOL), mytid); +	       else +                 belch("+* received non-sparkable closure %p; nothing added to spark pool; %d sparks available on [%x]",  +		     newGraph, spark_queue_len(ADVISORY_POOL), mytid); +	       belch("-* Unpacked graph with root at %p (%s):",  +		     newGraph, info_type(newGraph)); +	       PrintGraph(newGraph, 0)); + +  IF_PAR_DEBUG(pack, +  	       DebugPrintGAGAMap(gagamap, nGAs)); + +  if (nGAs > 0) +    sendAck(sender, nGAs, gagamap); + +  //fishing = rtsFalse; +  ASSERT(outstandingFishes>0); +  outstandingFishes--; +} + +/* + * processAck unpacks an ACK, and uses the GAGA map to convert RBH's + * (which represent shared thunks that have been shipped) into fetch-mes + * to remote GAs. + */ +//@cindex processAck +static void +processAck(void) +{ +  nat nGAs; +  globalAddr *gaga; +  globalAddr gagamap[256]; // ToDo: elim magic constant!!   MAX_GAS * 2];?? + +  unpackAck(&nGAs, gagamap); + +  IF_PAR_DEBUG(ack, +	       belch(",, [%x] Rcvd Ack (%d pairs)", mytid, nGAs); +	       DebugPrintGAGAMap(gagamap, nGAs)); + +  /* +   * For each (oldGA, newGA) pair, set the GA of the corresponding +   * thunk to the newGA, convert the thunk to a FetchMe, and return +   * the weight from the oldGA. +   */ +  for (gaga = gagamap; gaga < gagamap + nGAs * 2; gaga += 2) { +    StgClosure *old_closure = GALAlookup(gaga); +    StgClosure *new_closure = GALAlookup(gaga + 1); + +    ASSERT(old_closure != NULL); +    if (new_closure == NULL) { +      /* We don't have this closure, so we make a fetchme for it */ +      globalAddr *ga = setRemoteGA(old_closure, gaga + 1, rtsTrue); +       +      /* convertToFetchMe should be done unconditionally here. +	 Currently, we assign GAs to CONSTRs, too, (a bit of a hack), +	 so we have to check whether it is an RBH before converting + +	 ASSERT(get_itbl(old_closure)==RBH); +      */ +      if (get_itbl(old_closure)->type==RBH) +	convertToFetchMe(old_closure, ga); +    } else { +      /*  +       * Oops...we've got this one already; update the RBH to +       * point to the object we already know about, whatever it +       * happens to be. +       */ +      CommonUp(old_closure, new_closure); +       +      /*  +       * Increase the weight of the object by the amount just +       * received in the second part of the ACK pair. +       */ +      (void) addWeight(gaga + 1); +    } +    (void) addWeight(gaga); +  } +} + +//@node GUM Message Processor, Miscellaneous Functions, Message-Processing Functions, High Level Communications Routines +//@subsection GUM Message Processor + +/* + * GUM Message Processor + + * processMessages processes any messages that have arrived, calling + * appropriate routines depending on the message tag + * (opcode). N.B. Unless profiling it assumes that there {\em ARE} messages + * present and performs a blocking receive! During profiling it + * busy-waits in order to record idle time. + */ + +//@cindex processMessages +void +processMessages(void) +{ +  rtsPacket packet; +  OpCode opcode; +  GlobalTaskId task; +     +  do { +    packet = GetPacket();  /* Get next message; block until one available */ +    getOpcodeAndSender(packet, &opcode, &task); + +    switch (opcode) { +    case PP_FINISH: +      IF_PAR_DEBUG(verbose, +		   belch("== [%x] received FINISH", mytid)); +      /* setting this global variables eventually terminates the main +         scheduling loop for this PE and causes a shut-down, sending  +	 PP_FINISH to SysMan */ +      GlobalStopPending = rtsTrue; +      break; + +    case PP_FETCH: +      processFetch(); +      break; + +    case PP_RESUME: +      processResume(task); +      break; + +    case PP_ACK: +      processAck(); +      break; + +    case PP_FISH: +      processFish(); +      break; + +    case PP_FREE: +      processFree(); +      break; +       +    case PP_SCHEDULE: +      processSchedule(task); +      break; + +    default: +      /* Anything we're not prepared to deal with. */ +      barf("Task %x: Unexpected opcode %x from %x", +	   mytid, opcode, task); +    } /* switch */ + +  } while (PacketsWaiting());	/* While there are messages: process them */ +}				/* processMessages */ + +//@node Miscellaneous Functions, Index, GUM Message Processor, High Level Communications Routines +//@subsection Miscellaneous Functions + +/* + * ChoosePE selects a GlobalTaskId from the array of PEs 'at random'. + * Important properties: + *   - it varies during execution, even if the PE is idle + *   - it's different for each PE + *   - we never send a fish to ourselves + */ +extern long lrand48 (void); + +//@cindex choosePE +GlobalTaskId +choosePE(void) +{ +  long temp; + +  temp = lrand48() % nPEs; +  if (allPEs[temp] == mytid) {	/* Never send a FISH to yourself */ +    temp = (temp + 1) % nPEs; +  } +  return allPEs[temp]; +} + +/*  + * allocate a BLOCKED_FETCH closure and fill it with the relevant fields + * of the ga argument; called from processFetch when the local closure is + * under evaluation + */ +//@cindex createBlockedFetch +StgClosure * +createBlockedFetch (globalAddr ga, globalAddr rga) +{ +  StgBlockedFetch *bf; +  StgClosure *closure; + +  closure = GALAlookup(&ga); +  if ((bf = (StgBlockedFetch *)allocate(FIXED_HS + sizeofW(StgBlockedFetch))) == NULL) { +    GarbageCollect(GetRoots);  +    closure = GALAlookup(&ga); +    bf = (StgBlockedFetch *)allocate(FIXED_HS + sizeofW(StgBlockedFetch)); +    // ToDo: check whether really guaranteed to succeed 2nd time around +  } + +  ASSERT(bf != (StgClosure *)NULL); +  SET_INFO((StgClosure *)bf, &BLOCKED_FETCH_info); +  // ToDo: check whether other header info is needed +  bf->node = closure; +  bf->ga.payload.gc.gtid = rga.payload.gc.gtid; +  bf->ga.payload.gc.slot = rga.payload.gc.slot; +  bf->ga.weight = rga.weight; +  // bf->link = NULL;  debugging + +  IF_PAR_DEBUG(fetch, +	       fprintf(stderr, "%% [%x] created BF: closure=%p (%s), GA: ", +		       mytid, closure, info_type(closure)); +	       printGA(&(bf->ga)); +	       fputc('\n',stderr)); +  return bf; +} + +/* + * waitForTermination enters a loop ignoring spurious messages while + * waiting for the termination sequence to be completed.   + */ +//@cindex waitForTermination +void +waitForTermination(void) +{ +  do { +    rtsPacket p = GetPacket(); +    processUnexpected(p); +  } while (rtsTrue); +} + +#ifdef DEBUG +//@cindex DebugPrintGAGAMap +void +DebugPrintGAGAMap(globalAddr *gagamap, int nGAs) +{ +  int i; +   +  for (i = 0; i < nGAs; ++i, gagamap += 2) +    fprintf(stderr, "gagamap[%d] = ((%x, %d, %x)) -> ((%x, %d, %x))\n", i, +	    gagamap[0].payload.gc.gtid, gagamap[0].payload.gc.slot, gagamap[0].weight, +	    gagamap[1].payload.gc.gtid, gagamap[1].payload.gc.slot, gagamap[1].weight); +} +#endif + +//@cindex freeMsgBuffer +static StgWord **freeMsgBuffer = NULL; +//@cindex freeMsgIndex +static int      *freeMsgIndex  = NULL; + +//@cindex prepareFreeMsgBuffers +void +prepareFreeMsgBuffers(void) +{ +  int i; +   +  /* Allocate the freeMsg buffers just once and then hang onto them. */ +  if (freeMsgIndex == NULL) { +    freeMsgIndex = (int *) stgMallocBytes(nPEs * sizeof(int),  +					  "prepareFreeMsgBuffers (Index)"); +    freeMsgBuffer = (StgWord **) stgMallocBytes(nPEs * sizeof(long *),  +					  "prepareFreeMsgBuffers (Buffer)"); +     +    for(i = 0; i < nPEs; i++)  +      if (i != thisPE)  +	freeMsgBuffer[i] = (StgPtr) stgMallocWords(RtsFlags.ParFlags.packBufferSize, +					       "prepareFreeMsgBuffers (Buffer #i)"); +  } +   +  /* Initialize the freeMsg buffer pointers to point to the start of their +     buffers */ +  for (i = 0; i < nPEs; i++) +    freeMsgIndex[i] = 0; +} + +//@cindex freeRemoteGA +void +freeRemoteGA(int pe, globalAddr *ga) +{ +  int i; +   +  ASSERT(GALAlookup(ga) == NULL); +   +  if ((i = freeMsgIndex[pe]) + 2 >= RtsFlags.ParFlags.packBufferSize) { +    IF_PAR_DEBUG(free, +		 belch("Filled a free message buffer (sending remaining messages indivisually)"));	 + +    sendFree(ga->payload.gc.gtid, i, freeMsgBuffer[pe]); +    i = 0; +  } +  freeMsgBuffer[pe][i++] = (StgWord) ga->weight; +  freeMsgBuffer[pe][i++] = (StgWord) ga->payload.gc.slot; +  freeMsgIndex[pe] = i; + +#ifdef DEBUG +  ga->weight = 0x0f0f0f0f; +  ga->payload.gc.gtid = 0x666; +  ga->payload.gc.slot = 0xdeaddead; +#endif +} + +//@cindex sendFreeMessages +void +sendFreeMessages(void) +{ +  int i; +   +  for (i = 0; i < nPEs; i++)  +    if (freeMsgIndex[i] > 0) +      sendFree(allPEs[i], freeMsgIndex[i], freeMsgBuffer[i]); +} + +#endif /* PAR -- whole file */ + +//@node Index,  , Miscellaneous Functions, High Level Communications Routines +//@subsection Index + +//@index +//* ACK::  @cindex\s-+ACK +//* DebugPrintGAGAMap::  @cindex\s-+DebugPrintGAGAMap +//* FETCH::  @cindex\s-+FETCH +//* FISH::  @cindex\s-+FISH +//* FREE::  @cindex\s-+FREE +//* RESUME::  @cindex\s-+RESUME +//* SCHEDULE::  @cindex\s-+SCHEDULE +//* blockFetch::  @cindex\s-+blockFetch +//* choosePE::  @cindex\s-+choosePE +//* freeMsgBuffer::  @cindex\s-+freeMsgBuffer +//* freeMsgIndex::  @cindex\s-+freeMsgIndex +//* freeRemoteGA::  @cindex\s-+freeRemoteGA +//* gumPackBuffer::  @cindex\s-+gumPackBuffer +//* initMoreBuffers::  @cindex\s-+initMoreBuffers +//* prepareFreeMsgBuffers::  @cindex\s-+prepareFreeMsgBuffers +//* processAck::  @cindex\s-+processAck +//* processFetch::  @cindex\s-+processFetch +//* processFetches::  @cindex\s-+processFetches +//* processFish::  @cindex\s-+processFish +//* processFree::  @cindex\s-+processFree +//* processMessages::  @cindex\s-+processMessages +//* processResume::  @cindex\s-+processResume +//* processSchedule::  @cindex\s-+processSchedule +//* sendAck::  @cindex\s-+sendAck +//* sendFetch::  @cindex\s-+sendFetch +//* sendFish::  @cindex\s-+sendFish +//* sendFree::  @cindex\s-+sendFree +//* sendFreeMessages::  @cindex\s-+sendFreeMessages +//* sendResume::  @cindex\s-+sendResume +//* sendSchedule::  @cindex\s-+sendSchedule +//* unpackAck::  @cindex\s-+unpackAck +//* unpackFetch::  @cindex\s-+unpackFetch +//* unpackFish::  @cindex\s-+unpackFish +//* unpackFree::  @cindex\s-+unpackFree +//* unpackResume::  @cindex\s-+unpackResume +//* unpackSchedule::  @cindex\s-+unpackSchedule +//* waitForTermination::  @cindex\s-+waitForTermination +//@end index diff --git a/ghc/rts/parallel/LLC.h b/ghc/rts/parallel/LLC.h new file mode 100644 index 0000000000..eb6336600c --- /dev/null +++ b/ghc/rts/parallel/LLC.h @@ -0,0 +1,128 @@ +/* -------------------------------------------------------------------------- +   Time-stamp: <Wed Nov 17 1999 16:50:58 Stardate: [-30]3913.51 hwloidl> +   $Id: LLC.h,v 1.2 2000/01/13 14:34:07 hwloidl Exp $ + +   Low Level Communications Header (LLC.h) + +   Contains the definitions used by the Low-level Communications +   module of the GUM Haskell runtime environment. +   Based on the Graph for PVM implementation. + +   Phil Trinder, Glasgow University, 13th Dec 1994 +   Adapted for the 4.xx RTS +   H-W. Loidl, Heriot-Watt, November 1999 +   ----------------------------------------------------------------------- */ + +#ifndef __LLC_H +#define __LLC_H + +#ifdef PAR + +//@node Low Level Communications Header, , , +//@section Low Level Communications Header + +//@menu +//* Includes::			 +//* Macros and Constants::	 +//* PVM macros::		 +//* Externs::			 +//@end menu + +//@node Includes, Macros and Constants, Low Level Communications Header, Low Level Communications Header +//@subsection Includes + +#include "Rts.h" +#include "Parallel.h" + +#include "PEOpCodes.h" +#include "pvm3.h" + +//@node Macros and Constants, PVM macros, Includes, Low Level Communications Header +//@subsection Macros and Constants + +#define	ANY_TASK	(-1)	/* receive messages from any task */ +#define ANY_GLOBAL_TASK	ANY_TASK +#define ANY_OPCODE	(-1)	/* receive any opcode */ +#define	ALL_GROUP	(-1)	/* wait for barrier from every group member */ + +#define	PEGROUP		"PE" + +#define	MGRGROUP	"MGR" +#define	PECTLGROUP	"PECTL" + + +#define	PETASK		"PE" + +//@node PVM macros, Externs, Macros and Constants, Low Level Communications Header +//@subsection PVM macros + +#define	sync(gp,op)		do { \ +                                  broadcast(gp,op); \ +                                  pvm_barrier(gp,ALL_GROUP); \ +                                } while(0) + +#define broadcast(gp,op)	do { \ +                                  pvm_initsend(PvmDataDefault); \ +                                  pvm_bcast(gp,op); \ +                                } while(0) + +#define checkComms(c,s)		do { \ +                                  if ((c)<0) { \ +                                    pvm_perror(s); \ +                                    stg_exit(EXIT_FAILURE); \ +                                }} while(0) + +#define _my_gtid		pvm_mytid() +#define GetPacket()             pvm_recv(ANY_TASK,ANY_OPCODE) +#define PacketsWaiting()	(pvm_probe(ANY_TASK,ANY_OPCODE) != 0) + +#define SPARK_THREAD_DESCRIPTOR		1 +#define GLOBAL_THREAD_DESCRIPTOR	2 + +#define _extract_jump_field(v)	(v) + +#define MAX_DATA_WORDS_IN_PACKET	1024 + +/* basic PVM packing */ +#define PutArg1(a)		pvm_pklong(&(a),1,1) +#define PutArg2(a)		pvm_pklong(&(a),1,1) +#define PutArgN(n,a)		pvm_pklong(&(a),1,1) +#define PutArgs(b,n)		pvm_pklong(b,n,1) + +#define PutLit(l)		{ int a = l; PutArgN(?,a); } + +/* basic PVM unpacking */ +#define GetArg1(a)		pvm_upklong(&(a),1,1) +#define GetArg2(a)		pvm_upklong(&(a),1,1) +#define GetArgN(n,a)		pvm_upklong(&(a),1,1) +#define GetArgs(b,n)		pvm_upklong(b,n,1) + +//@node Externs,  , PVM macros, Low Level Communications Header +//@subsection Externs + +/* basic message passing routines */ +extern void sendOp   (OpCode,GlobalTaskId), +            sendOp1  (OpCode,GlobalTaskId,StgWord), +            sendOp2  (OpCode,GlobalTaskId,StgWord,StgWord), +	    sendOpV  (OpCode,GlobalTaskId,int,...),  +            sendOpN  (OpCode,GlobalTaskId,int,StgPtr), +            sendOpNV (OpCode,GlobalTaskId,int,StgPtr,int,...); + +/* extracting data out of a packet */ +OpCode        getOpcode (rtsPacket p); +void          getOpcodeAndSender (rtsPacket p, OpCode *popcode,  +			          GlobalTaskId *psender_id); +GlobalTaskId  senderTask (rtsPacket p); +rtsPacket     waitForPEOp (OpCode op, GlobalTaskId who); + +/* Init and shutdown routines */ +GlobalTaskId *startUpPE (unsigned nPEs); +void          shutDownPE(void); + +/* aux functions */ +char  *getOpName (unsigned op);  // returns string of opcode +void   processUnexpected (rtsPacket); +//void   NullException(void); + +#endif /*PAR */ +#endif /*defined __LLC_H */ diff --git a/ghc/rts/parallel/LLComms.c b/ghc/rts/parallel/LLComms.c new file mode 100644 index 0000000000..c40ae339b4 --- /dev/null +++ b/ghc/rts/parallel/LLComms.c @@ -0,0 +1,476 @@ +/* ---------------------------------------------------------------------------- + * Time-stamp: <Wed Jan 12 2000 12:29:53 Stardate: [-30]4193.64 hwloidl> + * $Id: LLComms.c,v 1.2 2000/01/13 14:34:07 hwloidl Exp $ + * + * GUM Low-Level Inter-Task Communication + * + * This module defines PVM Routines for PE-PE  communication. + * P. Trinder, December 5th. 1994. + * Adapted for the new RTS  + * P. Trinder, July 1998 + * H-W. Loidl, November 1999 + --------------------------------------------------------------------------- */ + +#ifdef PAR /* whole file */ + +//@node GUM Low-Level Inter-Task Communication, , , +//@section GUM Low-Level Inter-Task Communication + +/* + *This module defines the routines which communicate between PEs.  The + *code is based on Kevin Hammond's GRIP RTS. (OpCodes.h defines + *PEOp1 etc. in terms of sendOp1 etc.).   + * + *Routine	&	Arguments  + *		&		 + *sendOp	&	0			\\ + *sendOp1	&	1			\\ + *sendOp2	&	2			\\ + *sendOpN	&	vector			\\ + *sendOpV	&	variable		\\ + *sendOpNV	&	variable+ vector	\\ + * + *First the standard include files. + */ + +//@menu +//* Macros etc::		 +//* Includes::			 +//* Auxiliary functions::	 +//* Index::			 +//@end menu + +//@node Macros etc, Includes, GUM Low-Level Inter-Task Communication, GUM Low-Level Inter-Task Communication +//@subsection Macros etc + +#define NON_POSIX_SOURCE /* so says Solaris */ +#define UNUSED           /* nothing */ + +//@node Includes, Auxiliary functions, Macros etc, GUM Low-Level Inter-Task Communication +//@subsection Includes + +#include "Rts.h" +#include "RtsFlags.h" +#include "RtsUtils.h" +#include "Parallel.h" +#include "ParallelRts.h" +#if defined(DEBUG) +# include "ParallelDebug.h" +#endif +#include "LLC.h" + +#ifdef __STDC__ +#include <stdarg.h> +#else +#include <varargs.h> +#endif + +/* Cannot use std macro when compiling for SysMan */ +/* debugging enabled */ +// #define IF_PAR_DEBUG(c,s)  { s; } +/* debugging disabled */ +#define IF_PAR_DEBUG(c,s)  /* nothing */ + +//@node Auxiliary functions, Index, Includes, GUM Low-Level Inter-Task Communication +//@subsection Auxiliary functions + +/* + * heapChkCounter tracks the number of heap checks since the last probe. + * Not currently used! We check for messages when a thread is resheduled. + */ +int heapChkCounter = 0; + +/* + * Then some miscellaneous functions.  + * getOpName returns the character-string name of any OpCode. + */ + +char *UserPEOpNames[] = { PEOP_NAMES }; + +//@cindex getOpName +char * +getOpName(nat op) +{ +    if (op >= MIN_PEOPS && op <= MAX_PEOPS) +	return (UserPEOpNames[op - MIN_PEOPS]); +    else +	return ("Unknown PE OpCode"); +} + +/* + * traceSendOp handles the tracing of messages.  + */ + +//@cindex traceSendOp +static void +traceSendOp(OpCode op, GlobalTaskId dest UNUSED, +	     unsigned int data1 UNUSED, unsigned int data2 UNUSED) +{ +    char *OpName; + +    OpName = getOpName(op); +    IF_PAR_DEBUG(trace, +		 fprintf(stderr," %s [%x,%x] sent from %x to %x",  +		       OpName, data1, data2, mytid, dest)); +} + +/* + * sendOp sends a 0-argument message with OpCode {\em op} to + * the global task {\em task}. + */ + +//@cindex sendOp +void +sendOp(OpCode op, GlobalTaskId task) +{ +    traceSendOp(op, task,0,0); + +    pvm_initsend(PvmDataRaw); +    pvm_send(task, op); +} + +/* + * sendOp1 sends a 1-argument message with OpCode {\em op} + * to the global task {\em task}. + */ + +//@cindex sendOp1 +void +sendOp1(OpCode op, GlobalTaskId task, StgWord arg1) +{ +    traceSendOp(op, task, arg1,0); + +    pvm_initsend(PvmDataRaw); +    PutArg1(arg1); +    pvm_send(task, op); +} + + +/* + * sendOp2 is used by the FP code only.  + */ + +//@cindex sendOp2 +void +sendOp2(OpCode op, GlobalTaskId task, StgWord arg1, StgWord arg2) +{ +    traceSendOp(op, task, arg1, arg2); + +    pvm_initsend(PvmDataRaw); +    PutArg1(arg1); +    PutArg2(arg2); +    pvm_send(task, op); +} + +/* + * + * sendOpV takes a variable number of arguments, as specified by {\em n}.   + * For example, + * + *    sendOpV( PP_STATS, StatsTask, 3, start_time, stop_time, sparkcount); + */ + +//@cindex sendOpV +void +sendOpV(OpCode op, GlobalTaskId task, int n, ...) +{ +    va_list ap; +    int i; +    StgWord arg; + +    va_start(ap, n); + +    traceSendOp(op, task, 0, 0); + +    pvm_initsend(PvmDataRaw); + +    for (i = 0; i < n; ++i) { +	arg = va_arg(ap, StgWord); +	PutArgN(i, arg); +    } +    va_end(ap); + +    pvm_send(task, op); +} + +/*     + * + * sendOpNV takes a variable-size datablock, as specified by {\em + * nelem} and a variable number of arguments, as specified by {\em + * narg}. N.B. The datablock and the additional arguments are contiguous + * and are copied over together.  For example, + * + *        sendOpNV(PP_RESUME, tsoga.pe, 6, nelem, data, + *	    (W_) ga.weight, (W_) ga.loc.gc.gtid, (W_) ga.loc.gc.slot,  + *	    (W_) tsoga.weight, (W_) tsoga.loc.gc.gtid, (W_) tsoga.loc.gc.slot); + * + * Important: The variable arguments must all be StgWords. + + sendOpNV(_, tid, m, n, data, x1, ..., xm): + +                         |   n elems +     +------------------------------ +     | x1 | ... | xm | n | data .... +     +------------------------------ + */ + +//@cindex sendOpNV +void +sendOpNV(OpCode op, GlobalTaskId task, int nelem,  +	 StgWord *datablock, int narg, ...) +{ +    va_list ap; +    int i; +    StgWord arg; + +    va_start(ap, narg); + +    traceSendOp(op, task, 0, 0); +    IF_PAR_DEBUG(trace, +		 fprintf(stderr,"sendOpNV: op = %x (%s), task = %x, narg = %d, nelem = %d", +		       op, getOpName(op), task, narg, nelem)); + +    pvm_initsend(PvmDataRaw); + +    for (i = 0; i < narg; ++i) { +	arg = va_arg(ap, StgWord); +        IF_PAR_DEBUG(trace, +		     fprintf(stderr,"sendOpNV: arg = %d\n",arg)); +	PutArgN(i, arg); +    } +    arg = (StgWord) nelem; +    PutArgN(narg, arg); + +/*  for (i=0; i < nelem; ++i) fprintf(stderr, "%d ",datablock[i]); */ +/*  fprintf(stderr," in sendOpNV\n");*/ + +    PutArgs(datablock, nelem); +    va_end(ap); + +    pvm_send(task, op); +} + +/*     + * sendOpN take a variable size array argument, whose size is given by + * {\em n}.  For example, + * + *    sendOpN( PP_STATS, StatsTask, 3, stats_array); + */ + +//@cindex sendOpN +void +sendOpN(OpCode op, GlobalTaskId task, int n, StgPtr args) +{ +    long arg; + +    traceSendOp(op, task, 0, 0); + +    pvm_initsend(PvmDataRaw); +    arg = (long) n; +    PutArgN(0, arg); +    PutArgs(args, n); +    pvm_send(task, op); +} + +/* + * waitForPEOp waits for a packet from global task {\em who} with the + * OpCode {\em op}.  Other OpCodes are handled by processUnexpected. + */ +//@cindex waitForPEOp +rtsPacket  +waitForPEOp(OpCode op, GlobalTaskId who) +{ +  rtsPacket p; +  int nbytes; +  OpCode opCode; +  GlobalTaskId sender_id; +  rtsBool match; + +  do { +    IF_PAR_DEBUG(verbose, +		  fprintf(stderr,"waitForPEOp: op = %x (%s), who = %x\n",  +			  op, getOpName(op), who));  + +    while((p = pvm_recv(ANY_TASK,ANY_OPCODE)) < 0) +      pvm_perror("waitForPEOp: Waiting for PEOp"); +       +    pvm_bufinfo( p, &nbytes, &opCode, &sender_id ); +    IF_PAR_DEBUG(verbose, +		 fprintf(stderr,"waitForPEOp: received: OpCode = %x, sender_id = %x", +		       opCode, getOpName(opCode), sender_id));  + +    match = (op == ANY_OPCODE || op == opCode) &&  +            (who == ANY_TASK || who == sender_id); + +    if (match) +      return(p); + +    /* Handle the unexpected OpCodes */ +    processUnexpected(p); + +  } while(rtsTrue); +} + +/* + * processUnexpected processes unexpected messages. If the message is a + * FINISH it exits the prgram, and PVM gracefully + */ +//@cindex processUnexpected +void +processUnexpected(rtsPacket packet) +{ +    OpCode opCode = getOpcode(packet); + +    IF_PAR_DEBUG(verbose, +		 GlobalTaskId sender = senderTask(packet);  +		 fprintf(stderr,"== [%x] processUnexpected: Received %x (%s), sender %x\n", +		       mytid, opCode, getOpName(opCode), sender));  + +    switch (opCode) { +    case PP_FINISH: +        stg_exit(EXIT_SUCCESS); +	break; + +      /* Anything we're not prepared to deal with.  Note that ALL OpCodes +	 are discarded during termination -- this helps prevent bizarre +	 race conditions.  */ +      default: +	if (!GlobalStopPending) { +	  GlobalTaskId errorTask; +	  OpCode opCode; + +	  getOpcodeAndSender(packet,&opCode,&errorTask); +	  fprintf(stderr,"Task %x: Unexpected OpCode %x from %x in processUnexpected", +		mytid, opCode, errorTask ); +             +	  stg_exit(EXIT_FAILURE); +	} +    } +} + +//@cindex getOpcode +OpCode  +getOpcode(rtsPacket p) +{ +  int nbytes; +  OpCode OpCode; +  GlobalTaskId sender_id; +  pvm_bufinfo(p, &nbytes, &OpCode, &sender_id); +  return(OpCode); +} + +//@cindex getOpcodeAndSender +void +getOpcodeAndSender(rtsPacket p, OpCode *opCodep, GlobalTaskId *senderIdp) +{ +  int nbytes; +  pvm_bufinfo(p, &nbytes, opCodep, senderIdp); +} + +//@cindex senderTask +GlobalTaskId +senderTask(rtsPacket p) +{ +  int nbytes; +  OpCode opCode; +  GlobalTaskId sender_id; +  pvm_bufinfo(p, &nbytes, &opCode, &sender_id); +  return(sender_id); +} + +/* + * PEStartUp does the low-level comms specific startup stuff for a + * PE. It initialises the comms system, joins the appropriate groups, + * synchronises with the other PEs. Receives and records in a global + * variable the task-id of SysMan. If this is the main thread (discovered + * in main.lc), identifies itself to SysMan. Finally it receives + * from SysMan an array of the Global Task Ids of each PE, which is + * returned as the value of the function. + */ + +//@cindex startUpPE +GlobalTaskId * +startUpPE(nat nPEs) +{ +  int i; +  rtsPacket addr; +  long *buffer = (long *) stgMallocBytes(sizeof(long) * nPEs,  +					 "PEStartUp (buffer)"); +  GlobalTaskId *thePEs = (GlobalTaskId *)  +    stgMallocBytes(sizeof(GlobalTaskId) * nPEs,  +		   "PEStartUp (PEs)"); + +  mytid = _my_gtid;	/* Initialise PVM and get task id into global var.*/ + +  IF_PAR_DEBUG(verbose, +	       fprintf(stderr,"== [%x] PEStartup: Task id = [%x], No. PEs = %d \n",  +		       mytid, mytid, nPEs)); +  checkComms(pvm_joingroup(PEGROUP), "PEStartup"); +  IF_PAR_DEBUG(verbose, +	       fprintf(stderr,"== [%x] PEStartup: Joined PEGROUP\n", mytid)); +  checkComms(pvm_joingroup(PECTLGROUP), "PEStartup"); +  IF_PAR_DEBUG(verbose, +	       fprintf(stderr,"== [%x] PEStartup: Joined PECTLGROUP\n", mytid)); +  checkComms(pvm_barrier(PECTLGROUP, nPEs+1), "PEStartup"); +  IF_PAR_DEBUG(verbose, +	       fprintf(stderr,"== [%x] PEStartup, Passed PECTLGROUP barrier\n", mytid)); + +  addr = waitForPEOp(PP_SYSMAN_TID, ANY_GLOBAL_TASK); +  SysManTask = senderTask(addr); +  if (IAmMainThread) {         /* Main Thread Identifies itself to SysMan */ +    pvm_initsend(PvmDataDefault); +    pvm_send(SysManTask, PP_MAIN_TASK); +  }  +  IF_PAR_DEBUG(verbose, +	       fprintf(stderr,"== [%x] Thread waits for %s\n",  +		       mytid, getOpName(PP_PETIDS))); +  addr = waitForPEOp(PP_PETIDS, ANY_GLOBAL_TASK); +  GetArgs(buffer, nPEs); +  for (i = 0; i < nPEs; ++i) { +    thePEs[i] = (GlobalTaskId) buffer[i]; +    IF_PAR_DEBUG(verbose, +		 fprintf(stderr,"== [%x] PEStartup: PEs[%d] = %x \n",  +			 mytid, i, thePEs[i]));  +  } +  free(buffer); +  return thePEs; +} + +/* + * PEShutdown does the low-level comms-specific shutdown stuff for a + * single PE. It leaves the groups and then exits from pvm. + */ +//@cindex shutDownPE +void +shutDownPE(void) +{     +  IF_PAR_DEBUG(verbose, +	       fprintf(stderr, "== [%x] PEshutdown\n", mytid)); + +  checkComms(pvm_lvgroup(PEGROUP),"PEShutDown"); +  checkComms(pvm_lvgroup(PECTLGROUP),"PEShutDown"); +  checkComms(pvm_exit(),"PEShutDown"); +} + +#endif /* PAR -- whole file */ + +//@node Index,  , Auxiliary functions, GUM Low-Level Inter-Task Communication +//@subsection Index + +//@index +//* getOpName::  @cindex\s-+getOpName +//* traceSendOp::  @cindex\s-+traceSendOp +//* sendOp::  @cindex\s-+sendOp +//* sendOp1::  @cindex\s-+sendOp1 +//* sendOp2::  @cindex\s-+sendOp2 +//* sendOpV::  @cindex\s-+sendOpV +//* sendOpNV::  @cindex\s-+sendOpNV +//* sendOpN::  @cindex\s-+sendOpN +//* waitForPEOp::  @cindex\s-+waitForPEOp +//* processUnexpected::  @cindex\s-+processUnexpected +//* getOpcode::  @cindex\s-+getOpcode +//* getOpcodeAndSender::  @cindex\s-+getOpcodeAndSender +//* senderTask::  @cindex\s-+senderTask +//* startUpPE::  @cindex\s-+startUpPE +//* shutDownPE::  @cindex\s-+shutDownPE +//@end index diff --git a/ghc/rts/parallel/PEOpCodes.h b/ghc/rts/parallel/PEOpCodes.h new file mode 100644 index 0000000000..8380f46dcf --- /dev/null +++ b/ghc/rts/parallel/PEOpCodes.h @@ -0,0 +1,52 @@ +#ifndef PEOPCODES_H +#define PEOPCODES_H + +/************************************************************************ +*                         PEOpCodes.h                                   * +*									* +*	This file contains definitions for all the GUM PE Opcodes       * +*       It's based on the GRAPH for PVM version                         * +*       Phil Trinder, Glasgow University 8th December 1994              * +*									* +************************************************************************/ + +#define REPLY_OK		0x00 + +/*Startup + Shutdown*/ +#define	PP_SYSMAN_TID		0x50 +#define	PP_MAIN_TASK		0x51 +#define	PP_FINISH		0x52 +#define	PP_PETIDS		0x53 + +/* Stats stuff */ +#define	PP_STATS		0x54 +#define PP_STATS_ON		0x55 +#define PP_STATS_OFF		0x56 + +#define PP_FAIL			0x57 + +/*Garbage Collection*/ +#define PP_GC_INIT              0x58 +#define PP_FULL_SYSTEM          0x59 +#define PP_GC_POLL              0x5a + +/*GUM Messages*/ +#define PP_FETCH                0x5b +#define PP_RESUME               0x5c +#define PP_ACK                  0x5d +#define PP_FISH                 0x5e +#define PP_SCHEDULE             0x5f +#define PP_FREE			0x60 + +#define	MIN_PEOPS		0x50 +#define	MAX_PEOPS		0x60 + +#define	PEOP_NAMES		"Init", "IOInit", \ +				"Finish", "PETIDS", \ +                                "Stats", "Stats_On", "Stats_Off", \ +  				"Fail", \ +                                "GCInit", "FullSystem", "GCPoll", \ +                                "Fetch","Resume","ACK","Fish","Schedule", \ +				"Free" + +#endif /* PEOPCODES_H */ diff --git a/ghc/rts/parallel/Pack.c b/ghc/rts/parallel/Pack.c new file mode 100644 index 0000000000..b5484a1064 --- /dev/null +++ b/ghc/rts/parallel/Pack.c @@ -0,0 +1,2614 @@ +/*  +   Time-stamp: <Thu Dec 16 1999 18:21:17 Stardate: [-30]4058.61 software> +   $Id: Pack.c,v 1.2 2000/01/13 14:34:08 hwloidl Exp $ + +   Graph packing and unpacking code for sending it to another processor +   and retrieving the original graph structure from the packet. +   In the old RTS the code was split into Pack.c and Unpack.c (now deceased) +   Used in GUM and GrAnSim. + +   The GrAnSim version of the code defines routines for *simulating* the +   packing of closures in the same way it is done in the parallel runtime +   system. Basically GrAnSim only puts the addresses of the closures to be +   transferred into a buffer. This buffer will then be associated with the +   event of transferring the graph. When this event is scheduled, the +   @UnpackGraph@ routine is called and the buffer can be discarded +   afterwards. + +   Note that in GranSim we need many buffers, not just one per PE.  */ + +//@node Graph packing, , , +//@section Graph packing + +#if defined(PAR) || defined(GRAN)   /* whole file */ + +#define _HS (sizeofW(StgHeader)) + +//@menu +//* Includes::			 +//* Prototypes::		 +//* Global variables::		 +//* ADT of Closure Queues::	 +//* Initialisation for packing::   +//* Packing Functions::		 +//* Low level packing routines::   +//* Unpacking routines::	 +//* Aux fcts for packing::	 +//* Printing Packet Contents::	 +//* End of file::		 +//@end menu +//*/ + +//@node Includes, Prototypes, Graph packing, Graph packing +//@subsection Includes + +#include "Rts.h" +#include "RtsFlags.h" +#include "RtsUtils.h" +#include "ClosureTypes.h" +#include "Storage.h" +#include "Hash.h" +#include "Parallel.h" +#include "GranSimRts.h" +#include "ParallelRts.h" +# if defined(DEBUG) +# include "ParallelDebug.h" +# endif +#include "FetchMe.h" + +/* Which RTS flag should be used to get the size of the pack buffer ? */ +# if defined(PAR) +#  define RTS_PACK_BUFFER_SIZE   RtsFlags.ParFlags.packBufferSize +# else   /* GRAN */ +#  define RTS_PACK_BUFFER_SIZE   RtsFlags.GranFlags.packBufferSize +# endif + +//@node Prototypes, Global variables, Includes, Graph packing +//@subsection Prototypes +/*  +   Code declarations.  +*/ + +//@node ADT of closure queues, Init for packing, Prototypes, Prototypes +//@subsubsection ADT of closure queues + +static inline void    	  AllocClosureQueue(nat size); +static inline void    	  InitClosureQueue(void); +static inline rtsBool 	  QueueEmpty(void); +static inline void    	  QueueClosure(StgClosure *closure); +static inline StgClosure *DeQueueClosure(void); + +//@node Init for packing, Packing routines, ADT of closure queues, Prototypes +//@subsubsection Init for packing + +static void     initPacking(void); +# if defined(PAR) +rtsBool         initPackBuffer(void); +# elif defined(GRAN) +rtsPackBuffer  *InstantiatePackBuffer (void); +static void     reallocPackBuffer (void); +# endif + +//@node Packing routines, Low level packing fcts, Init for packing, Prototypes +//@subsubsection Packing routines + +static void    PackClosure (StgClosure *closure); + +//@node Low level packing fcts, Unpacking routines, Packing routines, Prototypes +//@subsubsection Low level packing fcts + +# if defined(GRAN) +static inline void    Pack (StgClosure *data); +# else +static inline void    Pack (StgWord data); + +static void    PackPLC (StgPtr addr); +static void    PackOffset (int offset); +static void    GlobaliseAndPackGA (StgClosure *closure); +# endif + +//@node Unpacking routines, Aux fcts for packing, Low level packing fcts, Prototypes +//@subsubsection Unpacking routines + +# if defined(PAR) +void        InitPendingGABuffer(nat size);  +void        CommonUp(StgClosure *src, StgClosure *dst); +StgClosure *UnpackGraph(rtsPackBuffer *packBuffer, +			globalAddr **gamap, +			nat *nGAs); +# elif defined(GRAN) +void        CommonUp(StgClosure *src, StgClosure *dst); +StgClosure *UnpackGraph(rtsPackBuffer* buffer); +#endif + +//@node Aux fcts for packing,  , Unpacking routines, Prototypes +//@subsubsection Aux fcts for packing + +# if defined(PAR) +static void 	DonePacking(void); +static void 	AmPacking(StgClosure *closure); +static int  	OffsetFor(StgClosure *closure); +static rtsBool  NotYetPacking(int offset); +static rtsBool  RoomToPack (nat size, nat ptrs); +       rtsBool  isOffset(globalAddr *ga); +       rtsBool  isFixed(globalAddr *ga); +# elif defined(GRAN) +static void     DonePacking(void); +static rtsBool  NotYetPacking(StgClosure *closure); +# endif + +//@node Global variables, ADT of Closure Queues, Prototypes, Graph packing +//@subsection Global variables +/* +  Static data declarations +*/ + +static nat     pack_locn,           /* ptr to first free loc in pack buffer */ +               clq_size, clq_pos, +               buf_id = 1;          /* identifier for buffer */ +static nat     unpacked_size; +static nat     reservedPAsize;        /* Space reserved for primitive arrays */ +static rtsBool RoomInBuffer; + +# if defined(GRAN) +/*  +   The pack buffer +   To be pedantic: in GrAnSim we're packing *addresses* of closures, +   not the closures themselves. +*/ +static rtsPackBuffer *Bonzo = NULL;                /* size: can be set via option */ +# else +static rtsPackBuffer *Bonzo = NULL;                /* size: can be set via option */ +# endif + +/* +  Bit of a hack for testing if a closure is the root of the graph. This is +  set in @PackNearbyGraph@ and tested in @PackClosure@.   +*/ + +static nat          packed_thunks = 0; +static StgClosure  *graph_root; + +# if defined(PAR) +/* +  The offset hash table is used during packing to record the location in +  the pack buffer of each closure which is packed. +*/ +//@cindex offsetTable +static HashTable *offsetTable; + +//@cindex PendingGABuffer +static globalAddr *PendingGABuffer;   +/* is initialised in main; */ +# endif /* PAR */ + +//@node ADT of Closure Queues, Initialisation for packing, Global variables, Graph packing +//@subsection ADT of Closure Queues + +//@menu +//* Closure Queues::		 +//* Init routines::		 +//* Basic routines::		 +//@end menu + +//@node Closure Queues, Init routines, ADT of Closure Queues, ADT of Closure Queues +//@subsubsection Closure Queues +/* +  Closure Queues + +  These routines manage the closure queue. +*/ + +static nat clq_pos, clq_size; + +static StgClosure **ClosureQueue = NULL;   /* HWL: init in main */ + +//@node Init routines, Basic routines, Closure Queues, ADT of Closure Queues +//@subsubsection Init routines + +/* @InitClosureQueue@ initialises the closure queue. */ + +//@cindex AllocClosureQueue +static inline void +AllocClosureQueue(size) +nat size; +{ +  ASSERT(ClosureQueue == NULL); +  ClosureQueue = (StgClosure**) stgMallocWords(size, "AllocClosureQueue"); +} + +//@cindex InitClosureQueue +static inline void +InitClosureQueue(void) +{ +  clq_pos = clq_size = 0; + +  if ( ClosureQueue == NULL )  +     AllocClosureQueue(RTS_PACK_BUFFER_SIZE); +} + +//@node Basic routines,  , Init routines, ADT of Closure Queues +//@subsubsection Basic routines + +/* +  QueueEmpty returns rtsTrue if the closure queue is empty; rtsFalse otherwise. +*/ + +//@cindex QueueEmpty +static inline rtsBool +QueueEmpty(void) +{ +  return(clq_pos >= clq_size); +} + +/* QueueClosure adds its argument to the closure queue. */ + +//@cindex QueueClosure +static inline void +QueueClosure(closure) +StgClosure *closure; +{ +  if(clq_size < RTS_PACK_BUFFER_SIZE ) +    ClosureQueue[clq_size++] = closure; +  else +    barf("Closure Queue Overflow (EnQueueing %p (%s))",  +	 closure, info_type(closure)); +} + +/* DeQueueClosure returns the head of the closure queue. */ + +//@cindex DeQueueClosure +static inline StgClosure *  +DeQueueClosure(void) +{ +  if(!QueueEmpty()) +    return(ClosureQueue[clq_pos++]); +  else +    return((StgClosure*)NULL); +} + +//@node Initialisation for packing, Packing Functions, ADT of Closure Queues, Graph packing +//@subsection Initialisation for packing +/* +  Simple Packing Routines + +  About packet sizes in GrAnSim: In GrAnSim we use a malloced block of +  gransim_pack_buffer_size words to simulate a packet of pack_buffer_size +  words.  In the simulated PackBuffer we only keep the addresses of the +  closures that would be packed in the parallel system (see Pack). To +  decide if a packet overflow occurs pack_buffer_size must be compared +  versus unpacked_size (see RoomToPack).  Currently, there is no multi +  packet strategy implemented, so in the case of an overflow we just stop +  adding closures to the closure queue.  If an overflow of the simulated +  packet occurs, we just realloc some more space for it and carry on as +  usual.  -- HWL */ + +# if defined(GRAN) +rtsPackBuffer * +InstantiatePackBuffer (void) { +  extern rtsPackBuffer *Bonzo; + +  Bonzo = (rtsPackBuffer *) stgMallocWords(sizeofW(rtsPackBuffer),  +			 "InstantiatePackBuffer: failed to alloc packBuffer"); +  Bonzo->size = RtsFlags.GranFlags.packBufferSize_internal; +  Bonzo->buffer = (StgWord **) stgMallocWords(RtsFlags.GranFlags.packBufferSize_internal, +				 "InstantiatePackBuffer: failed to alloc GranSim internal packBuffer"); +  /* NB: gransim_pack_buffer_size instead of pack_buffer_size -- HWL */ +  /* stgMallocWords is now simple allocate in Storage.c */ + +  return (Bonzo); +} + +/*  +   Reallocate the GranSim internal pack buffer to make room for more closure +   pointers. This is independent of the check for packet overflow as in GUM +*/ +static void +reallocPackBuffer (void) { + +  ASSERT(pack_locn >= (int)Bonzo->size+sizeofW(rtsPackBuffer)); + +  IF_GRAN_DEBUG(packBuffer, +		belch("** Increasing size of PackBuffer %p to %d words (PE %u @ %d)\n", +		      Bonzo, Bonzo->size+REALLOC_SZ, +		      CurrentProc, CurrentTime[CurrentProc])); +   +  Bonzo = (rtsPackBuffer*)realloc(Bonzo,  +				  sizeof(StgClosure*)*(REALLOC_SZ + +						       (int)Bonzo->size + +						       sizeofW(rtsPackBuffer))) ; +  if (Bonzo==(rtsPackBuffer*)NULL)  +    barf("Failing to realloc %d more words for PackBuffer %p (PE %u @ %d)\n",  +	 REALLOC_SZ, Bonzo, CurrentProc, CurrentTime[CurrentProc]); +   +  Bonzo->size += REALLOC_SZ; + +  ASSERT(pack_locn < Bonzo->size+sizeofW(rtsPackBuffer)); +} +# endif + +# if defined(PAR) +/* @initPacking@ initialises the packing buffer etc. */ +//@cindex initPackBuffer +rtsBool +initPackBuffer(void) +{ +  if (Bonzo == NULL) { /* not yet allocated */ + +      if ((Bonzo = (rtsPackBuffer *)  +	             stgMallocWords(sizeofW(rtsPackBuffer)+RtsFlags.ParFlags.packBufferSize, +					       "initPackBuffer")) == NULL) +	return rtsFalse; +       +      InitPendingGABuffer(RtsFlags.ParFlags.packBufferSize); +      AllocClosureQueue(RtsFlags.ParFlags.packBufferSize); +  } +  return rtsTrue; +} +# endif  + +static void +initPacking(void) +{ +# if defined(GRAN) +  Bonzo = InstantiatePackBuffer();     /* for GrAnSim only -- HWL */ +                                       /* NB: free in UnpackGraph */ +# endif + +  Bonzo->id = buf_id++;  /* buffer id are only used for debugging! */ +  pack_locn = 0;         /* the index into the actual pack buffer */ +  unpacked_size = 0;     /* the size of the whole graph when unpacked */ +  reservedPAsize = 0; +  RoomInBuffer = rtsTrue; +  InitClosureQueue(); +  packed_thunks = 0;   /* total number of thunks packed so far */ +# if defined(PAR) +  offsetTable = allocHashTable(); +# endif +} + +//@node Packing Functions, Low level packing routines, Initialisation for packing, Graph packing +//@subsection Packing Functions + +//@menu +//* Packing Sections of Nearby Graph::	 +//* Packing Closures::		 +//@end menu + +//@node Packing Sections of Nearby Graph, Packing Closures, Packing Functions, Packing Functions +//@subsubsection Packing Sections of Nearby Graph +/* +  Packing Sections of Nearby Graph + +  @PackNearbyGraph@ packs a closure and associated graph into a static +  buffer (@PackBuffer@).  It returns the address of this buffer and the +  size of the data packed into the buffer (in its second parameter, +  @packBufferSize@).  The associated graph is packed in a depth first +  manner, hence it uses an explicit queue of closures to be packed rather +  than simply using a recursive algorithm.  Once the packet is full, +  closures (other than primitive arrays) are packed as FetchMes, and their +  children are not queued for packing.  */ + +//@cindex PackNearbyGraph + +/* NB: this code is shared between GranSim and GUM; +       tso only used in GranSim */ +rtsPackBuffer * +PackNearbyGraph(closure, tso, packBufferSize) +StgClosure* closure; +StgTSO* tso; +nat *packBufferSize; +{ +  extern rtsPackBuffer *Bonzo; +  /* Ensure enough heap for all possible RBH_Save closures */ + +  ASSERT(RTS_PACK_BUFFER_SIZE > 0); + +  /* ToDo: check that we have enough heap for the packet +     ngoq ngo' +     if (Hp + PACK_HEAP_REQUIRED > HpLim)  +     return NULL; +  */ + +  initPacking(); +# if defined(GRAN) +  graph_root = closure; +# endif + +  IF_GRAN_DEBUG(pack, +		belch(">>> Packing <<%d>> (buffer @ %p); graph root @ %p [PE %d]\n    demanded by TSO %d (%p) [PE %u]", +		      Bonzo->id, Bonzo, closure, where_is(closure),  +		      tso->id, tso, where_is((StgClosure*)tso))); + +  IF_GRAN_DEBUG(pack, +		belch("** PrintGraph of %p is:", closure);  +		PrintGraph(closure,0)); + +  IF_PAR_DEBUG(pack, +	       belch(">>> Packing <<%d>> (buffer @ %p); graph root @ %p [%x]\n    demanded by TSO %d (%p)", +		     Bonzo->id, Bonzo, closure, mytid, +		     tso->id, tso));  + +  IF_PAR_DEBUG(pack, +	       belch("** PrintGraph of %p is:", closure);  +	       belch("** pack_locn=%d", pack_locn); +	       PrintGraph(closure,0)); + +  QueueClosure(closure); +  do { +    PackClosure(DeQueueClosure()); +  } while (!QueueEmpty()); +   +# if defined(PAR) + +  /* Record how much space is needed to unpack the graph */ +  Bonzo->tso = tso; // ToDo: check: used in GUM or only for debugging? +  Bonzo->unpacked_size = unpacked_size; +  Bonzo->size = pack_locn; + +  /* Set the size parameter */ +  ASSERT(pack_locn <= RtsFlags.ParFlags.packBufferSize); +  *packBufferSize = pack_locn; + +# else  /* GRAN */ + +  /* Record how much space is needed to unpack the graph */ +  // PackBuffer[PACK_FLAG_LOCN] = (P_) MAGIC_PACK_FLAG;  for testing +  Bonzo->tso = tso; +  Bonzo->unpacked_size = unpacked_size; + +  // ASSERT(pack_locn <= PackBuffer[PACK_SIZE_LOCN]+PACK_HDR_SIZE); +  /* ToDo: Print an earlier, more meaningful message */ +  if (pack_locn==0)   /* i.e. packet is empty */ +    barf("EMPTY PACKET! Can't transfer closure %p at all!!\n", +	 closure); +  Bonzo->size = pack_locn; +  *packBufferSize = pack_locn; + +# endif + +  DonePacking();                               /* {GrAnSim}vaD 'ut'Ha' */ + +# if defined(GRAN) +  IF_GRAN_DEBUG(pack , +		belch("** Finished <<%d>> packing graph %p; closures packed: %d; thunks packed: %d; size of graph: %d", +		      Bonzo->id, closure, Bonzo->size, packed_thunks, Bonzo->unpacked_size)); +  if (RtsFlags.GranFlags.GranSimStats.Global) { +    globalGranStats.tot_packets++;  +    globalGranStats.tot_packet_size += pack_locn;  +  } +   +  IF_GRAN_DEBUG(pack, PrintPacket(Bonzo)); +# elif defined(PAR) +  IF_GRAN_DEBUG(pack , +		belch("** Finished <<%d>> packing graph %p; closures packed: %d; thunks packed: %d; size of graph: %d", +		      Bonzo->id, closure, Bonzo->size, packed_thunks, Bonzo->unpacked_size); +		PrintPacket(Bonzo)); +# endif   /* GRAN */ + +  return (Bonzo); +} + +//@cindex PackOneNode + +# if defined(GRAN) +/* This version is used when the node is already local */ + +rtsPackBuffer * +PackOneNode(closure, tso, packBufferSize) +StgClosure* closure; +StgTSO* tso; +nat *packBufferSize; +{ +  extern rtsPackBuffer *Bonzo; +  int i, clpack_locn; + +  initPacking(); + +  IF_GRAN_DEBUG(pack, +		belch("** PackOneNode: %p (%s)[PE %d] requested by TSO %d (%p) [PE %d]", +		      closure, info_type(closure), +		      where_is(closure), tso->id, tso, where_is((StgClosure *)tso))); + +  Pack(closure); + +  /* Record how much space is needed to unpack the graph */ +  Bonzo->tso = tso; +  Bonzo->unpacked_size = unpacked_size; + +  /* Set the size parameter */ +  ASSERT(pack_locn <= RTS_PACK_BUFFER_SIZE); +  Bonzo->size =  pack_locn; +  *packBufferSize = pack_locn; + +  if (RtsFlags.GranFlags.GranSimStats.Global) { +    globalGranStats.tot_packets++;  +    globalGranStats.tot_packet_size += pack_locn;  +  } +  IF_GRAN_DEBUG(pack, +    PrintPacket(Bonzo)); + +  return (Bonzo); +} +# endif  /* GRAN */ + +#if defined(GRAN) + +/* +   PackTSO and PackStkO are entry points for two special kinds of closure +   which are used in the parallel RTS.  Compared with other closures they +   are rather awkward to pack because they don't follow the normal closure +   layout (where all pointers occur before all non-pointers).  Luckily, +   they're only needed when migrating threads between processors.  */ + +//@cindex PackTSO +rtsPackBuffer* +PackTSO(tso, packBufferSize) +StgTSO *tso; +nat *packBufferSize; +{ +  extern rtsPackBuffer *Bonzo; +  IF_GRAN_DEBUG(pack, +		belch("** Packing TSO %d (%p)", tso->id, tso)); +  *packBufferSize = 0; +  // PackBuffer[0] = PackBuffer[1] = 0; ??? +  return(Bonzo); +} + +//@cindex PackStkO +rtsPackBuffer* +PackStkO(stko, packBufferSize) +StgPtr stko; +nat *packBufferSize; +{ +  extern rtsPackBuffer *Bonzo; +  IF_GRAN_DEBUG(pack, +		belch("** Packing STKO %p", stko)); +  *packBufferSize = 0; +  // PackBuffer[0] = PackBuffer[1] = 0; +  return(Bonzo); +} + +void +PackFetchMe(StgClosure *closure) +{ +  barf("{PackFetchMe}Daq Qagh: no FetchMe closures in GRAN!"); +} + +#elif defined(PAR) + +rtsPackBuffer* +PackTSO(tso, packBufferSize) +StgTSO *tso; +nat *packBufferSize; +{ +  barf("{PackTSO}Daq Qagh: trying to pack a TSO; thread migrations not supported, yet"); +} + +rtsPackBuffer* +PackStkO(stko, packBufferSize) +StgPtr stko; +nat *packBufferSize; +{ +  barf("{PackStkO}Daq Qagh: trying to pack a STKO; thread migrations not supported, yet"); +} + +//@cindex PackFetchMe +void +PackFetchMe(StgClosure *closure) +{ +  StgInfoTable *ip; +  nat i; + +#if defined(GRAN) +  barf("{PackFetchMe}Daq Qagh: no FetchMe closures in GRAN!"); +#else +  /* Pack a FetchMe closure instead of closure */ +  ip = &FETCH_ME_info; +  /* this assumes that the info ptr is always the first word in a closure*/ +  Pack((StgWord)ip); +  for (i = 1; i < _HS; ++i)               // pack rest of fixed header +    Pack((StgWord)*(((StgPtr)closure)+i)); +   +  unpacked_size += _HS; // ToDo: check +#endif +} + +#endif + +//@node Packing Closures,  , Packing Sections of Nearby Graph, Packing Functions +//@subsubsection Packing Closures +/* +  Packing Closures + +  @PackClosure@ is the heart of the normal packing code.  It packs a single +  closure into the pack buffer, skipping over any indirections and +  globalising it as necessary, queues any child pointers for further +  packing, and turns it into a @FetchMe@ or revertible black hole (@RBH@) +  locally if it was a thunk.  Before the actual closure is packed, a +  suitable global address (GA) is inserted in the pack buffer.  There is +  always room to pack a fetch-me to the closure (guaranteed by the +  RoomToPack calculation), and this is packed if there is no room for the +  entire closure. + +  Space is allocated for any primitive array children of a closure, and +  hence a primitive array can always be packed along with it's parent +  closure.  */ + +//@cindex PackClosure + +# if defined(PAR) + +void +PackClosure(closure) +StgClosure *closure; +{ +  StgInfoTable *info; +  StgClosure *indirectee, *rbh; +  nat size, ptrs, nonptrs, vhs, i, clpack_locn; +  rtsBool is_CONSTR = rtsFalse; +  char str[80]; + +  ASSERT(closure!=NULL); +  indirectee = closure; +  do { +    /* Don't pack indirection closures */ +    closure =  indirectee; +    indirectee = IS_INDIRECTION(closure); +    IF_PAR_DEBUG(pack, +		 if (indirectee)  +		   belch("** Shorted an indirection (%s) at %p (-> %p)",  +			 info_type(closure), closure, indirectee)); +  } while (indirectee); + +  clpack_locn = OffsetFor(closure); + +  /* If the closure has been packed already, just pack an indirection to it +     to guarantee that the graph doesn't become a tree when unpacked */ +  if (!NotYetPacking(clpack_locn)) { +    StgInfoTable *info; + +    PackOffset(clpack_locn); +    return; +  } + +  /* +   * PLCs reside on all of the PEs already. Just pack the +   * address as a GA (a bit of a kludge, since an address may +   * not fit in *any* of the individual GA fields). Const, +   * charlike and small intlike closures are converted into +   * PLCs. +   */ +  switch (get_itbl(closure)->type) { + +#  ifdef DEBUG +    // check error cases only in a debugging setup +  case RET_BCO: +  case RET_SMALL: +  case RET_VEC_SMALL: +  case RET_BIG: +  case RET_VEC_BIG: +  case RET_DYN: +    barf("** {Pack}Daq Qagh: found return vector %p (%s) when packing (thread migration not implemented)",  +	 closure, info_type(closure)); +    /* never reached */ +     +  case UPDATE_FRAME: +  case STOP_FRAME: +  case CATCH_FRAME: +  case SEQ_FRAME: +    barf("** {Pack}Daq Qagh: found stack frame %p (%s) when packing (thread migration not implemented)",  +	 closure, info_type(closure)); +    /* never reached */ + +  case TSO: +  case BLOCKED_FETCH: +  case EVACUATED: +    /* something's very wrong */ +    barf("** {Pack}Daq Qagh: found %s (%p) when packing",  +	 info_type(closure), closure); +    /* never reached */ +#  endif + +  case CONSTR_CHARLIKE: +    IF_PAR_DEBUG(pack, +		 belch("** Packing a charlike closure %d",  +		       ((StgIntCharlikeClosure*)closure)->data)); +     +    PackPLC(CHARLIKE_CLOSURE(((StgIntCharlikeClosure*)closure)->data)); +    return; +       +  case CONSTR_INTLIKE: +    { +      StgInt val = ((StgIntCharlikeClosure*)closure)->data; +       +      if ((val <= MAX_INTLIKE) && (val >= MIN_INTLIKE)) { +	IF_PAR_DEBUG(pack, +		     belch("** Packing a small intlike %d as a PLC", val)); +	PackPLC(INTLIKE_CLOSURE(val)); +	return; +      } else { +	IF_PAR_DEBUG(pack, +		     belch("** Packing a big intlike %d as a normal closure",  +			   val)); +	break; +      } +    } + +  case CONSTR: +  case CONSTR_1_0: +  case CONSTR_0_1: +  case CONSTR_2_0: +  case CONSTR_1_1: +  case CONSTR_0_2: +    /* it's a constructor (i.e. plain data) but we don't know  +       how many ptrs, non-ptrs there are => use generic code */ +    IF_PAR_DEBUG(pack, +		 belch("** Packing a CONSTR %p (%s) using generic packing with GA",  +		       closure, info_type(closure))); +    // is_CONSTR = rtsTrue; +    break; +    /* fall through to generic packing code */ + +  case CONSTR_STATIC: +  case CONSTR_NOCAF_STATIC:// For now we ship indirections to CAFs: They are +			   // evaluated on each PE if needed +    IF_PAR_DEBUG(pack, +      belch("** Packing a %p (%s) as a PLC",  +	    closure, info_type(closure))); + +    PackPLC(closure); +    return; + +  case MVAR: +    /* MVARs may not be copied; they are sticky objects in the new RTS */ +    /* therefore we treat them just as RBHs etc (what a great system!) */ +    IF_PAR_DEBUG(pack, +		 belch("** Found an MVar at %p (%s)",  +		       closure, info_type(closure))); +    /* fall through !! */ + +  case THUNK_SELECTOR: // ToDo: fix packing of this strange beast +    IF_PAR_DEBUG(pack, +		 belch("** Found an THUNK_SELECTORE at %p (%s)",  +		       closure, info_type(closure))); +    /* fall through !! */ + +  case CAF_BLACKHOLE: +  case SE_CAF_BLACKHOLE: +  case SE_BLACKHOLE: +  case BLACKHOLE: +  case RBH: +  case FETCH_ME: +  case FETCH_ME_BQ: + +    /* If it's a (revertible) black-hole, pack a FetchMe closure to it */ +    //ASSERT(pack_locn > PACK_HDR_SIZE); +     +    IF_PAR_DEBUG(pack, +		 belch("** Packing a BH or FM at %p (%s) of (fixed size %d)",  +		       closure, info_type(closure), _HS)); + +    /* Need a GA even when packing a constructed FETCH_ME (cruel world!) */ +    GlobaliseAndPackGA(closure); + +    PackFetchMe(closure); +    return; + +  default: +/*      IF_PAR_DEBUG(pack, */ +/*  		 belch("** Not a PLC or BH ... ")); */ +  } /* switch */ + +  /* get info about basic layout of the closure */ +  info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str); + +  ASSERT(!IS_BLACK_HOLE(closure)); + +  IF_PAR_DEBUG(pack, +	       fprintf(stderr, "** packing %p (%s) (size=%d, ptrs=%d, nonptrs=%d)\n", +		       closure, info_type(closure), size, ptrs, nonptrs)); + +  /* +   * Now peek ahead to see whether the closure has any primitive array +   * children +   */ +  /* +      ToDo: fix this code -- HWL +    for (i = 0; i < ptrs; ++i) { +      StgInfoTable * childInfo; +      nat childSize, childPtrs, childNonPtrs, childVhs; +       +      // extract i-th pointer out of closure  +      childInfo = get_closure_info(((PP_) (closure))[i + FIXED_HS + vhs], +				   &childSize, &childPtrs, &childNonPtrs, &childVhs, str); +      if (IS_BIG_MOTHER(childInfo)) { +	reservedPAsize += PACK_GA_SIZE + FIXED_HS + childVhs + childNonPtrs +	  + childPtrs * PACK_FETCHME_SIZE; +      } +    } +    */ +  /* Record the location of the GA */ +  AmPacking(closure); + +  /* Pack the global address */ +  if (!is_CONSTR) { +    GlobaliseAndPackGA(closure); +  } else { +    IF_PAR_DEBUG(pack, +		 belch("** No GA allocated for CONSTR %p (%s)", +		       closure, info_type(closure))); +  } + +  /* +   * Pack a fetchme to the closure if it's a black hole, or the buffer is full +   * and it isn't a primitive array. N.B. Primitive arrays are always packed +   * (because their parents index into them directly) +   */ + +  // ToDo: pack FMs if no more room available in packet (see below) +  if (!(RoomToPack(PACK_GA_SIZE + _HS + vhs + nonptrs, ptrs))) +    barf("** Qagh: Pack: not enough room in packet to pack closure %p (%s)", +	 closure, info_type(closure)); + +  /* +    Has been moved into the switch statement +     +    if (IS_BLACK_HOLE(closure))  +    !(RoomToPack(PACK_GA_SIZE + FIXED_HS + vhs + nonptrs, ptrs) +    || IS_BIG_MOTHER(info)))  +    { +       +      ASSERT(pack_locn > PACK_HDR_SIZE); +       +      info = FetchMe_info; +      for (i = 0; i < FIXED_HS; ++i) { +	if (i == INFO_HDR_POSN) +	  Pack((StgWord) FetchMe_info); +	else +	  Pack(closure[i]); +      } + +      unpacked_size += FIXED_HS + FETCHME_CLOSURE_SIZE(dummy); + +    } else { +  */ +  if (info->type == ARR_WORDS || info->type == MUT_ARR_PTRS || +      info->type == MUT_ARR_PTRS_FROZEN || info->type == MUT_VAR) +    belch("** ghuH: found %s; packing of primitive arrays not yet implemented", +	  info_type(closure)); + +  /* At last! A closure we can actually pack! */ +  if (ip_MUTABLE(info) && (info->type != FETCH_ME)) +    fprintf(stderr, "** ghuH: Replicated a Mutable closure!\n"); +       +  /*  +     Remember, the generic closure layout is as follows: +        +-------------------------------------------------+ +	| FIXED HEADER | VARIABLE HEADER | PTRS | NON-PRS | +        +-------------------------------------------------+ +  */ +  /* pack fixed and variable header */ +  for (i = 0; i < _HS + vhs; ++i) +    Pack((StgWord)*(((StgPtr)closure)+i)); +       +  /* register all ptrs for further packing */ +  for (i = 0; i < ptrs; ++i) +    QueueClosure(((StgClosure *) *(((StgPtr)closure)+(i+_HS+vhs)))); + +  /* pack non-ptrs */ +  for (i = 0; i < nonptrs; ++i) +    Pack((StgWord)*(((StgPtr)closure)+(i+_HS+vhs+ptrs))); +       +  unpacked_size += _HS + (size < MIN_UPD_SIZE ? MIN_UPD_SIZE : size); + +  /* +   * Record that this is a revertable black hole so that we can fill in +   * its address from the fetch reply.  Problem: unshared thunks may cause +   * space leaks this way, their GAs should be deallocated following an +   * ACK. +   */ +       +  // IS_UPDATABLE(closure) == !closure_UNPOINTED(closure) !? HWL +  if (closure_THUNK(closure) && !closure_UNPOINTED(closure)) {  +    rbh = convertToRBH(closure); +    ASSERT(rbh == closure); // rbh at the same position (minced version) +    packed_thunks++; +  } +} + +# else  /* GRAN */ + +/* Fake the packing of a closure */ + +void +PackClosure(closure) +StgClosure *closure; +{ +  StgInfoTable *info, *childInfo; +  nat size, ptrs, nonptrs, vhs; +  char info_hdr_ty[80]; +  nat i; +  StgClosure *indirectee, *rbh; +  char str[80]; +  rtsBool is_mutable, will_be_rbh, no_more_thunks_please; + +  is_mutable = rtsFalse; + +  /* In GranSim we don't pack and unpack closures -- we just simulate +     packing by updating the bitmask. So, the graph structure is unchanged +     i.e. we don't short out indirections here. -- HWL */ + +  /* Nothing to do with packing but good place to (sanity) check closure; +     if the closure is a thunk, it must be unique; otherwise we have copied +     work at some point before that which violates one of our main global +     assertions in GranSim/GUM */ +  ASSERT(!closure_THUNK(closure) || is_unique(closure)); + +  IF_GRAN_DEBUG(pack, +		belch("**  Packing closure %p (%s)", +		      closure, info_type(closure))); + +  if (where_is(closure) != where_is(graph_root)) { +    IF_GRAN_DEBUG(pack, +		  belch("**   faking a FETCHME [current PE: %d, closure's PE: %d]", +			where_is(graph_root), where_is(closure))); + +    /* GUM would pack a FETCHME here; simulate that by increasing the */ +    /* unpacked size accordingly but don't pack anything -- HWL */ +    unpacked_size += _HS + 2 ; // sizeofW(StgFetchMe); +    return;  +  } + +  /* If the closure's not already being packed */ +  if (!NotYetPacking(closure))  +    /* Don't have to do anything in GrAnSim if closure is already */ +    /* packed -- HWL */ +    { +      IF_GRAN_DEBUG(pack, +		    belch("**    Closure %p is already packed and omitted now!", +			    closure)); +      return; +    } + +  switch (get_itbl(closure)->type) { +    /* ToDo: check for sticky bit here? */ +    /* BH-like closures which must not be moved to another PE */ +    case CAF_BLACKHOLE:       /* # of ptrs, nptrs: 0,2 */ +    case SE_BLACKHOLE:        /* # of ptrs, nptrs: 0,2 */ +    case SE_CAF_BLACKHOLE:    /* # of ptrs, nptrs: 0,2 */ +    case BLACKHOLE:           /* # of ptrs, nptrs: 0,2 */ +    case BLACKHOLE_BQ:        /* # of ptrs, nptrs: 1,1 */ +    case RBH:                 /* # of ptrs, nptrs: 1,1 */ +      /* same for these parallel specific closures */ +    case BLOCKED_FETCH: +    case FETCH_ME: +    case FETCH_ME_BQ: +      IF_GRAN_DEBUG(pack, +	belch("**    Avoid packing BH-like closures (%p, %s)!",  +	      closure, info_type(closure))); +      /* Just ignore RBHs i.e. they stay where they are */ +      return; + +    case THUNK_SELECTOR: +      { +	StgClosure *sel = ((StgSelector *)closure)->selectee; + +	IF_GRAN_DEBUG(pack, +		      belch("**    Avoid packing THUNK_SELECTOR (%p, %s) but queuing %p (%s)!",  +			    closure, info_type(closure), sel, info_type(sel))); +	QueueClosure(sel); +	IF_GRAN_DEBUG(pack, +		      belch("**    [%p (%s) (Queueing closure) ....]", +			    sel, info_type(sel))); +      } +      return; + +    case CONSTR_STATIC: +    case CONSTR_NOCAF_STATIC: +                                  /* For now we ship indirections to CAFs: +				   * They are evaluated on each PE if needed */ +      IF_GRAN_DEBUG(pack, +	belch("**    Nothing to pack for %p (%s)!",  +	      closure, info_type(closure))); +      // Pack(closure); GUM only +      return; + +    case CONSTR_CHARLIKE: +    case CONSTR_INTLIKE: +      IF_GRAN_DEBUG(pack, +	belch("**    Nothing to pack for %s (%p)!",  +	      closure, info_type(closure))); +      // PackPLC(((StgIntCharlikeClosure *)closure)->data); GUM only +      return; + +    case AP_UPD:    +    case PAP: +      /* partial applications; special treatment necessary? */ +      break; + +    case CAF_UNENTERED:    /* # of ptrs, nptrs: 1,3 */ +    case CAF_ENTERED:      /* # of ptrs, nptrs: 0,4  (allegedly bogus!!) */ +      /* CAFs; special treatment necessary? */ +      break; + +    case MVAR: +      barf("{PackClosure}Daq Qagh: found an MVAR (%p, %s); ToDo: implement proper treatment of MVARs", +	   closure, info_type(closure)); + +    case ARR_WORDS: +    case MUT_VAR: +    case MUT_ARR_PTRS: +    case MUT_ARR_PTRS_FROZEN: +      /* Mutable objects; require special treatment to ship all data */ +      is_mutable = rtsTrue; +      break;	   + +    case WEAK: +    case FOREIGN: +    case STABLE_NAME: +	  /* weak pointers and other FFI objects */ +      barf("{PackClosure}Daq Qagh: found an FFI object (%p, %s); FFI not yet supported by GranSim, sorry", +	   closure, info_type(closure)); + +    case TSO: +      /* parallel objects */ +      barf("{PackClosure}Daq Qagh: found a TSO when packing (%p, %s); thread migration not yet implemented, sorry", +	   closure, info_type(closure)); + +    case BCO: +      /* Hugs objects (i.e. closures used by the interpreter) */ +      barf("{PackClosure}Daq Qagh: found a Hugs closure when packing (%p, %s); GranSim not yet integrated with Hugs, sorry", +	   closure, info_type(closure)); +       +    case IND:              /* # of ptrs, nptrs: 1,0 */ +    case IND_STATIC:       /* # of ptrs, nptrs: 1,0 */ +    case IND_PERM:         /* # of ptrs, nptrs: 1,1 */ +    case IND_OLDGEN:       /* # of ptrs, nptrs: 1,1 */ +    case IND_OLDGEN_PERM:  /* # of ptrs, nptrs: 1,1 */ +      /* we shouldn't find an indirection here, because we have shorted them +	 out at the beginning of this functions already. +      */ +      break; +      /* should be: +      barf("{PackClosure}Daq Qagh: found indirection when packing (%p, %s)", +	   closure, info_type(closure)); +      */ + +    case UPDATE_FRAME: +    case CATCH_FRAME: +    case SEQ_FRAME: +    case STOP_FRAME: +      /* stack frames; should never be found when packing for now; +	 once we support thread migration these have to be covered properly +      */ +      barf("{PackClosure}Daq Qagh: found stack frame when packing (%p, %s)", +	   closure, info_type(closure)); + +    case RET_BCO: +    case RET_SMALL: +    case RET_VEC_SMALL: +    case RET_BIG: +    case RET_VEC_BIG: +    case RET_DYN: +      /* vectored returns; should never be found when packing; */ +      barf("{PackClosure}Daq Qagh: found vectored return (%p, %s)", +	   closure, info_type(closure)); + +    case INVALID_OBJECT: +      barf("{PackClosure}Daq Qagh: found Invalid object (%p, %s)", +	   closure, info_type(closure)); + +    default: +      /*  +	 Here we know that the closure is a CONSTR, FUN or THUNK (maybe +	 a specialised version with wired in #ptr/#nptr info; currently +	 we treat these specialised versions like the generic version) +      */ +    }     /* switch */ + +    /* Otherwise it's not Fixed */ + +    info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str); +    will_be_rbh = closure_THUNK(closure) && !closure_UNPOINTED(closure); + +    IF_GRAN_DEBUG(pack, +		belch("**    Info on closure %p (%s): size=%d; ptrs=%d", +		      closure, info_type(closure), +		      size, ptrs,  +		      (will_be_rbh) ? "will become RBH" : "will NOT become RBH")); +     +    // check whether IS_UPDATABLE(closure) == !closure_UNPOINTED(closure) -- HWL +    no_more_thunks_please =  +      (RtsFlags.GranFlags.ThunksToPack>0) &&  +      (packed_thunks>=RtsFlags.GranFlags.ThunksToPack); + +    /* +      should be covered by get_closure_info +    if (info->type == FETCH_ME || info->type == FETCH_ME_BQ ||  +	info->type == BLACKHOLE || info->type == RBH ) +      size = ptrs = nonptrs = vhs = 0; +    */ +    /* Now peek ahead to see whether the closure has any primitive */ +    /* array children */  +    /*  +       ToDo: fix this code +       for (i = 0; i < ptrs; ++i) { +       P_ childInfo; +       W_ childSize, childPtrs, childNonPtrs, childVhs; +        +       childInfo = get_closure_info(((StgPtrPtr) (closure))[i + FIXED_HS + vhs], +       &childSize, &childPtrs, &childNonPtrs, +       &childVhs, junk_str); +       if (IS_BIG_MOTHER(childInfo)) { +       reservedPAsize += PACK_GA_SIZE + FIXED_HS +  +       childVhs + childNonPtrs + +       childPtrs * PACK_FETCHME_SIZE; +       PAsize += PACK_GA_SIZE + FIXED_HS + childSize; +       PAptrs += childPtrs; +       } +       } +    */ +    /* Don't pack anything (GrAnSim) if it's a black hole, or the buffer +     * is full and it isn't a primitive array. N.B. Primitive arrays are +     * always packed (because their parents index into them directly) */ + +    if (IS_BLACK_HOLE(closure)) +	/* +	  ToDo: fix this code +	  ||  +	  !(RoomToPack(PACK_GA_SIZE + FIXED_HS + vhs + nonptrs, ptrs)  +	  || IS_BIG_MOTHER(info)))  +	  */ +      return; + +    /* At last! A closure we can actually pack! */ + +    if (closure_MUTABLE(closure)) // not nec. && (info->type != FETCHME)) +      belch("ghuH: Replicated a Mutable closure!"); + +    if (RtsFlags.GranFlags.GranSimStats.Global &&   +	no_more_thunks_please && will_be_rbh) { +      globalGranStats.tot_cuts++; +      if ( RtsFlags.GranFlags.Debug.pack )  +	belch("**    PackClosure (w/ ThunksToPack=%d): Cutting tree with root at %#x\n", +		RtsFlags.GranFlags.ThunksToPack, closure); +    } else if (will_be_rbh || (closure==graph_root) ) { +      packed_thunks++; +      globalGranStats.tot_thunks++; +    } + +    if (no_more_thunks_please && will_be_rbh)  +      return; /* don't pack anything */ + +    /* actual PACKING done here --  HWL */ +    Pack(closure);          +    for (i = 0; i < ptrs; ++i) { +      /* extract i-th pointer from closure */ +      QueueClosure((StgClosure *)payloadPtr(closure,i)); +      IF_GRAN_DEBUG(pack, +		    belch("**    [%p (%s) (Queueing closure) ....]", +			  payloadPtr(closure,i), info_type(payloadPtr(closure,i)))); +    } + +    /*  +       for packing words (GUM only) do something like this: + +       for (i = 0; i < ptrs; ++i) { +         Pack(payloadWord(obj,i+j)); +       } +    */ +    /* Turn thunk into a revertible black hole. */ +    if (will_be_rbh) {  +	rbh = convertToRBH(closure); +	ASSERT(rbh != NULL); +    } +} +# endif  /* PAR */ + +//@node Low level packing routines, Unpacking routines, Packing Functions, Graph packing +//@subsection Low level packing routines + +/* +   @Pack@ is the basic packing routine.  It just writes a word of data into +   the pack buffer and increments the pack location.  */ + +//@cindex Pack + +# if defined(PAR) +static inline void +Pack(data) +StgWord data; +{ +  ASSERT(pack_locn < RtsFlags.ParFlags.packBufferSize); +  Bonzo->buffer[pack_locn++] = data; +} +#endif + +#if defined(GRAN) +static inline void +Pack(closure) +StgClosure *closure; +{ +  StgInfoTable *info; +  nat size, ptrs, nonptrs, vhs; +  char str[80]; + +  /* This checks the size of the GrAnSim internal pack buffer. The simulated +     pack buffer is checked via RoomToPack (as in GUM) */ +  if (pack_locn >= (int)Bonzo->size+sizeofW(rtsPackBuffer))  +    reallocPackBuffer(); + +  if (closure==(StgClosure*)NULL)  +    belch("Qagh {Pack}Daq: Trying to pack 0"); +  Bonzo->buffer[pack_locn++] = closure; +  /* ASSERT: Data is a closure in GrAnSim here */ +  info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str); +  unpacked_size += _HS + (size < MIN_UPD_SIZE ?  +				        MIN_UPD_SIZE :  +				        size); +} +# endif  /* GRAN */ + +/* +   If a closure is local, make it global.  Then, divide its weight for +   export.  The GA is then packed into the pack buffer.  */ + +# if defined(PAR) + +static void +GlobaliseAndPackGA(closure) +StgClosure *closure; +{ +  globalAddr *ga; +  globalAddr packGA; + +  if ((ga = LAGAlookup(closure)) == NULL) +    ga = makeGlobal(closure, rtsTrue); +  splitWeight(&packGA, ga); +  ASSERT(packGA.weight > 0); + +  IF_PAR_DEBUG(pack, +	       fprintf(stderr, "** Globalising closure %p (%s) with GA",  +		       closure, info_type(closure)); +	       printGA(&packGA); +	       fputc('\n', stderr)); + + +  Pack((StgWord) packGA.weight); +  Pack((StgWord) packGA.payload.gc.gtid); +  Pack((StgWord) packGA.payload.gc.slot); +} + +/* +   @PackPLC@ makes up a bogus GA for a PLC. Weight 0 implies that a PLC +   address follows instead of PE, slot.  */ + +//@cindex PackPLC + +static void +PackPLC(addr) +StgPtr addr; +{ +  Pack(0L);			/* weight */ +  Pack((StgWord) addr);		/* address */ +} + +/* +   @PackOffset@ packs a special GA value that will be interpreted as an +   offset to a closure in the pack buffer.  This is used to avoid unfolding +   the graph structure into a tree.  */ + +static void +PackOffset(offset) +int offset; +{ +  IF_PAR_DEBUG(pack, +	       belch("** Packing Offset %d at pack location %u", +		     offset, pack_locn)); +  Pack(1L);			/* weight */ +  Pack(0L);			/* pe */ +  Pack(offset);		        /* slot/offset */ +} +# endif  /* PAR */ + +//@node Unpacking routines, Aux fcts for packing, Low level packing routines, Graph packing +//@subsection Unpacking routines + +/* +  This was formerly in the (now deceased) module Unpack.c + +  Unpacking closures which have been exported to remote processors + +  This module defines routines for unpacking closures in the parallel +  runtime system (GUM). + +  In the case of GrAnSim, this module defines routines for *simulating* the +  unpacking of closures as it is done in the parallel runtime system. +*/ + +//@node GUM code, Local Definitions, Unpacking routines, Unpacking routines +//@subsubsection GUM code + +#if defined(PAR)  + +//@cindex InitPendingGABuffer +void +InitPendingGABuffer(size) +nat size;  +{ +  PendingGABuffer = (globalAddr *)  +                      stgMallocBytes(size*2*sizeof(globalAddr), +				     "InitPendingGABuffer"); +} + +/* +  @CommonUp@ commons up two closures which we have discovered to be +  variants of the same object.  One is made an indirection to the other.  */ + +//@cindex CommonUp +void +CommonUp(StgClosure *src, StgClosure *dst) +{ +  StgBlockingQueueElement *bqe; + +  ASSERT(src != (StgClosure *)NULL && dst != (StgClosure *)NULL); +  ASSERT(src != dst); + +  IF_PAR_DEBUG(verbose, +	       belch("__ CommonUp %p (%s) with %p (%s)", +		     src, info_type(src), dst, info_type(dst))); +   +  switch (get_itbl(src)->type) { +  case BLACKHOLE_BQ: +    bqe = ((StgBlockingQueue *)src)->blocking_queue; +    break; + +  case FETCH_ME_BQ: +    bqe = ((StgFetchMeBlockingQueue *)src)->blocking_queue; +    break; +     +  case RBH: +    bqe = ((StgRBH *)src)->blocking_queue; +    break; +     +  case BLACKHOLE: +  case FETCH_ME: +    bqe = END_BQ_QUEUE; +    break; + +  default: +    /* Don't common up anything else */ +    return; +  } +  /* NB: this also awakens the blocking queue for src */ +  UPD_IND(src, dst); +  // updateWithIndirection(src, dst); +  /* +    ASSERT(!IS_BIG_MOTHER(INFO_PTR(dst))); +    if (bqe != END_BQ_QUEUE) +    awaken_blocked_queue(bqe, src); +  */ +} + +/* +  @UnpackGraph@ unpacks the graph contained in a message buffer.  It +  returns a pointer to the new graph.  The @gamap@ parameter is set to +  point to an array of (oldGA,newGA) pairs which were created as a result +  of unpacking the buffer; @nGAs@ is set to the number of GA pairs which +  were created. + +  The format of graph in the pack buffer is as defined in @Pack.lc@.  */ + +//@cindex UnpackGraph +StgClosure * +UnpackGraph(packBuffer, gamap, nGAs) +rtsPackBuffer *packBuffer; +globalAddr **gamap; +nat *nGAs; +{ +  nat size, ptrs, nonptrs, vhs; +  StgWord **buffer, **bufptr, **slotptr; +  globalAddr ga, *gaga; +  StgClosure *closure, *existing, +             *graphroot, *graph, *parent; +  StgInfoTable *ip, *oldip; +  nat bufsize, i, +      pptr = 0, pptrs = 0, pvhs; +  rtsBool hasGA; +  char str[80]; + +  initPackBuffer();                  /* in case it isn't already init'd */ +  graphroot = (StgClosure *)NULL; + +  gaga = PendingGABuffer; + +  InitClosureQueue(); + +  /* Unpack the header */ +  bufsize = packBuffer->size; +  buffer = packBuffer->buffer; +  bufptr = buffer; + +  /* allocate heap */ +  if (bufsize > 0) { +    graph = allocate(bufsize); +    ASSERT(graph != NULL); +  } + +  parent = (StgClosure *)NULL; + +  do { +    /* This is where we will ultimately save the closure's address */ +    slotptr = bufptr; + +    /* First, unpack the next GA or PLC */ +    ga.weight = (rtsWeight) *bufptr++; + +    if (ga.weight > 0) { +      ga.payload.gc.gtid = (GlobalTaskId) *bufptr++; +      ga.payload.gc.slot = (int) *bufptr++; +    } else { +      ga.payload.plc = (StgPtr) *bufptr++; +    } + +    /* Now unpack the closure body, if there is one */ +    if (isFixed(&ga)) { +      /* No more to unpack; just set closure to local address */ +      IF_PAR_DEBUG(pack, +		   belch("_* Unpacked PLC at %x", ga.payload.plc));  +      hasGA = rtsFalse; +      closure = ga.payload.plc; +    } else if (isOffset(&ga)) { +      /* No more to unpack; just set closure to cached address */ +      IF_PAR_DEBUG(pack, +		   belch("_* Unpacked indirection to %p (was offset %x)",  +			 (StgClosure *) buffer[ga.payload.gc.slot], +			 ga.payload.gc.slot));  +      ASSERT(parent != (StgClosure *)NULL); +      hasGA = rtsFalse; +      closure = (StgClosure *) buffer[ga.payload.gc.slot]; +    } else { +      /* Now we have to build something. */ +      hasGA = rtsTrue; + +      ASSERT(bufsize > 0); + +      /* +       * Close your eyes.  You don't want to see where we're looking. You +       * can't get closure info until you've unpacked the variable header, +       * but you don't know how big it is until you've got closure info. +       * So...we trust that the closure in the buffer is organized the +       * same way as they will be in the heap...at least up through the +       * end of the variable header. +       */ +      ip = get_closure_info(bufptr, &size, &ptrs, &nonptrs, &vhs, str); +	   +      /*  +	 Remember, the generic closure layout is as follows: +	 +-------------------------------------------------+ +	 | FIXED HEADER | VARIABLE HEADER | PTRS | NON-PRS | +	 +-------------------------------------------------+ +      */ +      /* Fill in the fixed header */ +      for (i = 0; i < _HS; i++) +	((StgPtr)graph)[i] = (StgWord)*bufptr++; + +      if (ip->type == FETCH_ME) +	size = ptrs = nonptrs = vhs = 0; + +      /* Fill in the packed variable header */ +      for (i = 0; i < vhs; i++) +	((StgPtr)graph)[_HS + i] = (StgWord)*bufptr++; + +      /* Pointers will be filled in later */ + +      /* Fill in the packed non-pointers */ +      for (i = 0; i < nonptrs; i++) +	((StgPtr)graph)[_HS + i + vhs + ptrs] = (StgWord)*bufptr++; +                 +      /* Indirections are never packed */ +      // ASSERT(INFO_PTR(graph) != (W_) Ind_info_TO_USE); + +      /* Add to queue for processing */ +      QueueClosure(graph); +	 +      /* +       * Common up the new closure with any existing closure having the same +       * GA +       */ + +      if ((existing = GALAlookup(&ga)) == NULL) { +	globalAddr *newGA; +	/* Just keep the new object */ +	IF_PAR_DEBUG(pack, +		     belch("_* Unpacking new GA ((%x, %d, %x))",  +			   ga.payload.gc.gtid, ga.payload.gc.slot, ga.weight)); + +	closure = graph; +	newGA = setRemoteGA(graph, &ga, rtsTrue); +	if (ip->type == FETCH_ME) +	  // FETCHME_GA(closure) = newGA; +	  ((StgFetchMe *)closure)->ga = newGA; +      } else { +	/* Two closures, one global name.  Someone loses */ +	oldip = get_itbl(existing); + +	if ((oldip->type == FETCH_ME ||  +	     // ToDo: don't pack a GA for these in the first place +             oldip->type == CONSTR || +             oldip->type == CONSTR_1_0 || +             oldip->type == CONSTR_0_1 || +             oldip->type == CONSTR_2_0 || +             oldip->type == CONSTR_1_1 || +             oldip->type == CONSTR_0_2 || +	     IS_BLACK_HOLE(existing)) && +	    ip->type != FETCH_ME) { + +	  /* What we had wasn't worth keeping */ +	  closure = graph; +	  CommonUp(existing, graph); +	} else { +	  StgWord ty; + +	  /* +	   * Either we already had something worthwhile by this name or +	   * the new thing is just another FetchMe.  However, the thing we +	   * just unpacked has to be left as-is, or the child unpacking +	   * code will fail.  Remember that the way pointer words are +	   * filled in depends on the info pointers of the parents being +	   * the same as when they were packed. +	   */ +	  IF_PAR_DEBUG(pack, +		       belch("_* Unpacking old GA ((%x, %d, %x)), keeping %#lx",  +			     ga.payload.gc.gtid, ga.payload.gc.slot, ga.weight, +			     existing)); + +	  closure = existing; +	  // HACK +	  ty = get_itbl(closure)->type; +	  if (ty == CONSTR || +	      ty == CONSTR_1_0 || +	      ty == CONSTR_0_1 || +	      ty == CONSTR_2_0 || +	      ty == CONSTR_1_1 || +	      ty == CONSTR_0_2) +	    CommonUp(closure, graph); +	   +	} +	/* Pool the total weight in the stored ga */ +	(void) addWeight(&ga); +      } + +      /* Sort out the global address mapping */ +      if (hasGA || // (ip_THUNK(ip) && !ip_UNPOINTED(ip)) ||  +	  (ip_MUTABLE(ip) && ip->type != FETCH_ME)) { +	/* Make up new GAs for single-copy closures */ +	globalAddr *newGA = makeGlobal(closure, rtsTrue); +	 +	// keep this assertion! +	// ASSERT(closure == graph); + +	/* Create an old GA to new GA mapping */ +	*gaga++ = ga; +	splitWeight(gaga, newGA); +	ASSERT(gaga->weight == 1L << (BITS_IN(unsigned) - 1)); +	gaga++; +      } +      graph += _HS + (size < MIN_UPD_SIZE ? MIN_UPD_SIZE : size); +    } + +    /* +     * Set parent pointer to point to chosen closure.  If we're at the top of +     * the graph (our parent is NULL), then we want to arrange to return the +     * chosen closure to our caller (possibly in place of the allocated graph +     * root.) +     */ +    if (parent == NULL) +      graphroot = closure; +    else +      ((StgPtr)parent)[_HS + pvhs + pptr] = (StgWord) closure; + +    /* Save closure pointer for resolving offsets */ +    *slotptr = (StgWord) closure; + +    /* Locate next parent pointer */ +    pptr++; +    while (pptr + 1 > pptrs) { +      parent = DeQueueClosure(); + +      if (parent == NULL) +	break; +      else { +	(void) get_closure_info(parent, &size, &pptrs, &nonptrs, +					&pvhs, str); +	pptr = 0; +      } +    } +  } while (parent != NULL); + +  //ASSERT(bufsize == 0 || graph - 1 <= SAVE_Hp); + +  *gamap = PendingGABuffer; +  *nGAs = (gaga - PendingGABuffer) / 2; + +  /* ToDo: are we *certain* graphroot has been set??? WDP 95/07 */ +  ASSERT(graphroot!=NULL); +  return (graphroot); +} +#endif  /* PAR */ + +//@node GranSim Code,  , Local Definitions, Unpacking routines +//@subsubsection GranSim Code + +/* +   For GrAnSim: No actual unpacking should be necessary. We just +   have to walk over the graph and set the bitmasks appropriately. +   Since we use RBHs similarly to GUM but without an ACK message/event +   we have to revert the RBH from within the UnpackGraph routine (good luck!) +   -- HWL  +*/ + +#if defined(GRAN) +void +CommonUp(StgClosure *src, StgClosure *dst) +{ +  barf("CommonUp: should never be entered in a GranSim setup"); +} + +StgClosure* +UnpackGraph(buffer) +rtsPackBuffer* buffer; +{ +  nat size, ptrs, nonptrs, vhs, +      bufptr = 0; +  StgClosure *closure, *graphroot, *graph; +  StgInfoTable *ip; +  StgWord bufsize, unpackedsize, +          pptr = 0, pptrs = 0, pvhs; +  StgTSO* tso; +  char str[240], str1[80]; +  int i; + +  bufptr = 0; +  graphroot = buffer->buffer[0]; + +  tso = buffer->tso; + +  /* Unpack the header */ +  unpackedsize = buffer->unpacked_size; +  bufsize = buffer->size; + +  IF_GRAN_DEBUG(pack, +		belch("<<< Unpacking <<%d>> (buffer @ %p):\n    (root @ %p, PE %d,size=%d), demanded by TSO %d (%p)[PE %d]", +		      buffer->id, buffer, graphroot, where_is(graphroot),  +		      bufsize, tso->id, tso,  +		      where_is((StgClosure *)tso))); + +  do { +    closure = buffer->buffer[bufptr++]; /* that's all we need for GrAnSim -- HWL */ +       +    /* Actually only ip is needed; rest is useful for TESTING -- HWL */ +    ip = get_closure_info(closure,  +			  &size, &ptrs, &nonptrs, &vhs, str); +       +    IF_GRAN_DEBUG(pack, +		  sprintf(str, "**    (%p): Changing bitmask[%s]: 0x%x ", +			  closure, (closure_HNF(closure) ? "NF" : "__"), +			  PROCS(closure))); + +    if (get_itbl(closure)->type == RBH) { +      /* if it's an RBH, we have to revert it into a normal closure, thereby +	 awakening the blocking queue; not that this is code currently not +	 needed in GUM, but it should be added with the new features in +	 GdH (and the implementation of an NACK message) +      */ +      // closure->header.gran.procs = PE_NUMBER(CurrentProc); +      SET_GRAN_HDR(closure, PE_NUMBER(CurrentProc));    /* Move node */ + +      IF_GRAN_DEBUG(pack, +		    strcat(str, " (converting RBH) "));  + +      convertFromRBH(closure);   /* In GUM that's done by convertToFetchMe */ + +      IF_GRAN_DEBUG(pack, +		    belch("::  closure %p (%s) is a RBH; after reverting: IP=%p", +			  closure, info_type(closure), get_itbl(closure))); +    } else if (IS_BLACK_HOLE(closure)) { +      IF_GRAN_DEBUG(pack, +		    belch("::  closure %p (%s) is a BH; copying node to %d", +			  closure, info_type(closure), CurrentProc)); +      closure->header.gran.procs |= PE_NUMBER(CurrentProc); /* Copy node */ +    } else if ( (closure->header.gran.procs & PE_NUMBER(CurrentProc)) == 0 ) { +      if (closure_HNF(closure)) { +	IF_GRAN_DEBUG(pack, +		      belch("::  closure %p (%s) is a HNF; copying node to %d", +			    closure, info_type(closure), CurrentProc)); +	closure->header.gran.procs |= PE_NUMBER(CurrentProc); /* Copy node */ +      } else {  +	IF_GRAN_DEBUG(pack, +		      belch("::  closure %p (%s) is no (R)BH or HNF; moving node to %d", +			    closure, info_type(closure), CurrentProc)); +	closure->header.gran.procs = PE_NUMBER(CurrentProc);  /* Move node */ +      } +    } + +    IF_GRAN_DEBUG(pack, +		  sprintf(str1, "0x%x",   PROCS(closure)); strcat(str, str1)); +    IF_GRAN_DEBUG(pack, belch(str)); +     +  } while (bufptr<buffer->size) ;   /*  (parent != NULL);  */ + +  /* In GrAnSim we allocate pack buffers dynamically! -- HWL */ +  free(buffer->buffer); +  free(buffer); + +  IF_GRAN_DEBUG(pack, +		belch("PrintGraph of %p is:", graphroot); PrintGraph(graphroot,0)); + +  return (graphroot); +} +#endif  /* GRAN */ + +//@node Aux fcts for packing, Printing Packet Contents, Unpacking routines, Graph packing +//@subsection Aux fcts for packing + +//@menu +//* Offset table::		 +//* Packet size::		 +//* Types of Global Addresses::	  +//* Closure Info::		 +//@end menu + +//@node Offset table, Packet size, Aux fcts for packing, Aux fcts for packing +//@subsubsection Offset table + +/* +   DonePacking is called when we've finished packing.  It releases memory +   etc.  */ + +//@cindex DonePacking + +# if defined(PAR) + +static void +DonePacking(void) +{ +  freeHashTable(offsetTable, NULL); +  offsetTable = NULL; +} + +/* +   AmPacking records that the closure is being packed.  Note the abuse of +   the data field in the hash table -- this saves calling @malloc@!  */ + +//@cindex AmPacking + +static void +AmPacking(closure) +StgClosure *closure; +{ +/*    IF_PAR_DEBUG(pack, */ +/*  	       fprintf(stderr, "** AmPacking %p (%s)(IP %p) at %u\n",  */ +/*  		       closure, info_type(closure), get_itbl(closure), pack_locn)); */ + +  insertHashTable(offsetTable, (StgWord) closure, (void *) (StgWord) pack_locn); +} + +/* +   OffsetFor returns an offset for a closure which is already being packed.  */ + +//@cindex OffsetFor + +static int +OffsetFor(closure) +StgClosure *closure; +{ +  return (int) (StgWord) lookupHashTable(offsetTable, (StgWord) closure); +} + +/* +   NotYetPacking determines whether the closure's already being packed. +   Offsets $<$ @PACK_HDR_SIZE@ (e.g. 0) mean no.  */ + +//@cindex NotYetPacking + +static rtsBool +NotYetPacking(offset) +int offset; +{ +  return(offset == 0); // ToDo: what if root is found again?? FIX  +} + +# else  /* GRAN */ + +static void +DonePacking(void) +{ +  /* nothing */ +} + +/*  +   NotYetPacking searches through the whole pack buffer for closure.  */ + +static rtsBool +NotYetPacking(closure) +StgClosure *closure; +{ nat i; +  rtsBool found = rtsFalse; + +  for (i=0; (i<pack_locn) && !found; i++) +    found = Bonzo->buffer[i]==closure; + +  return (!found); +} +# endif + +//@node Packet size, Types of Global Addresses, Offset table, Aux fcts for packing +//@subsubsection Packet size + +/* +  RoomToPack determines whether there's room to pack the closure into +  the pack buffer based on  + +  o how full the buffer is already, +  o the closures' size and number of pointers (which must be packed as GAs), +  o the size and number of pointers held by any primitive arrays that it  +    points to +   +    It has a *side-effect* (naughty, naughty) in assigning RoomInBuffer  +    to rtsFalse. +*/ + +//@cindex RoomToPack +static rtsBool +RoomToPack(size, ptrs) +nat size, ptrs; +{ +# if defined(PAR) +  if (RoomInBuffer && +      (pack_locn + reservedPAsize + size + +       ((clq_size - clq_pos) + ptrs) * PACK_FETCHME_SIZE >= RTS_PACK_BUFFER_SIZE)) +    { +      IF_PAR_DEBUG(pack, +		   fprintf(stderr, "Buffer full\n")); + +      RoomInBuffer = rtsFalse; +    } +# else   /* GRAN */ +  if (RoomInBuffer && +      (unpacked_size + reservedPAsize + size + +       ((clq_size - clq_pos) + ptrs) * PACK_FETCHME_SIZE >= RTS_PACK_BUFFER_SIZE)) +    { +      IF_GRAN_DEBUG(packBuffer, +		    fprintf(stderr, "Buffer full\n")); +      RoomInBuffer = rtsFalse; +    } +# endif +  return (RoomInBuffer); +} + +//@node Types of Global Addresses, Closure Info, Packet size, Aux fcts for packing +//@subsubsection Types of Global Addresses + +/* +  Types of Global Addresses + +  These routines determine whether a GA is one of a number of special types +  of GA. +*/ + +# if defined(PAR) +//@cindex isOffset +rtsBool +isOffset(ga) +globalAddr *ga; +{ +    return (ga->weight == 1 && ga->payload.gc.gtid == 0); +} + +//@cindex isFixed +rtsBool +isFixed(ga) +globalAddr *ga; +{ +    return (ga->weight == 0); +} +# endif + +//@node Closure Info,  , Types of Global Addresses, Aux fcts for packing +//@subsubsection Closure Info + +/* +   Closure Info + +   @get_closure_info@ determines the size, number of pointers etc. for this +   type of closure -- see @SMInfoTables.lh@ for the legal info. types etc. + +[Can someone please keep this function up to date.  I keep needing it + (or something similar) for interpretive code, and it keeps + bit-rotting.  {\em It really belongs somewhere else too}.  KH @@ 17/2/95] */ + +#if 0 + +// {Parallel.h}Daq ngoqvam vIroQpu' + +# if defined(GRAN) || defined(PAR) +/* extracting specific info out of closure; currently only used in GRAN -- HWL */ +//@cindex get_closure_info +StgInfoTable* +get_closure_info(node, size, ptrs, nonptrs, vhs, info_hdr_ty) +StgClosure* node; +nat *size, *ptrs, *nonptrs, *vhs; +char *info_hdr_ty; +{ +  StgInfoTable *info; + +  info = get_itbl(node); +  /* the switch shouldn't be necessary, really; just use default case */ +  switch (info->type) { +#if 0 +   case CONSTR_1_0: +   case THUNK_1_0: +   case FUN_1_0: +     *size = sizeW_fromITBL(info); +     *ptrs = (nat) 1; // (info->layout.payload.ptrs); +     *nonptrs = (nat) 0; // (info->layout.payload.nptrs); +     *vhs = (nat) 0; // unknown +     info_hdr_type(node, info_hdr_ty); +     return info; +      +  case CONSTR_0_1: +  case THUNK_0_1: +  case FUN_0_1: +     *size = sizeW_fromITBL(info); +     *ptrs = (nat) 0; // (info->layout.payload.ptrs); +     *nonptrs = (nat) 1; // (info->layout.payload.nptrs); +     *vhs = (nat) 0; // unknown +     info_hdr_type(node, info_hdr_ty); +     return info; + +  case CONSTR_2_0: +  case THUNK_2_0: +  case FUN_2_0: +     *size = sizeW_fromITBL(info); +     *ptrs = (nat) 2; // (info->layout.payload.ptrs); +     *nonptrs = (nat) 0; // (info->layout.payload.nptrs); +     *vhs = (nat) 0; // unknown +     info_hdr_type(node, info_hdr_ty); +     return info; + +  case CONSTR_1_1: +  case THUNK_1_1: +  case FUN_1_1: +     *size = sizeW_fromITBL(info); +     *ptrs = (nat) 1; // (info->layout.payload.ptrs); +     *nonptrs = (nat) 1; // (info->layout.payload.nptrs); +     *vhs = (nat) 0; // unknown +     info_hdr_type(node, info_hdr_ty); +     return info; + +  case CONSTR_0_2: +  case THUNK_0_2: +  case FUN_0_2: +     *size = sizeW_fromITBL(info); +     *ptrs = (nat) 0; // (info->layout.payload.ptrs); +     *nonptrs = (nat) 2; // (info->layout.payload.nptrs); +     *vhs = (nat) 0; // unknown +     info_hdr_type(node, info_hdr_ty); +     return info; +#endif +  case RBH: +    { +      StgInfoTable *rip = REVERT_INFOPTR(info); // closure to revert to +      *size = sizeW_fromITBL(rip); +      *ptrs = (nat) (rip->layout.payload.ptrs); +      *nonptrs = (nat) (rip->layout.payload.nptrs); +      *vhs = (nat) 0; // unknown +      info_hdr_type(node, info_hdr_ty); +      return rip;  // NB: we return the reverted info ptr for a RBH!!!!!! +    } + +  default: +    *size = sizeW_fromITBL(info); +    *ptrs = (nat) (info->layout.payload.ptrs); +    *nonptrs = (nat) (info->layout.payload.nptrs); +    *vhs = (nat) 0; // unknown +    info_hdr_type(node, info_hdr_ty); +    return info; +  } +}  + +//@cindex IS_BLACK_HOLE +rtsBool +IS_BLACK_HOLE(StgClosure* node)           +{  +  StgInfoTable *info; +  info = get_itbl(node); +  return ((info->type == BLACKHOLE || info->type == RBH) ? rtsTrue : rtsFalse); +} + +//@cindex IS_INDIRECTION +StgClosure * +IS_INDIRECTION(StgClosure* node)           +{  +  StgInfoTable *info; +  info = get_itbl(node); +  switch (info->type) { +    case IND: +    case IND_OLDGEN: +    case IND_PERM: +    case IND_OLDGEN_PERM: +    case IND_STATIC: +      /* relies on indirectee being at same place for all these closure types */ +      return (((StgInd*)node) -> indirectee); +    default: +      return NULL; +  } +} + +/* +rtsBool +IS_THUNK(StgClosure* node) +{ +  StgInfoTable *info; +  info = get_itbl(node); +  return ((info->type == THUNK || +	   info->type == THUNK_STATIC || +	   info->type == THUNK_SELECTOR) ? rtsTrue : rtsFalse); +} +*/ + +# endif /* GRAN */ +#endif /* 0 */ + +# if 0 +/* ngoq ngo' */ + +P_ +get_closure_info(closure, size, ptrs, nonptrs, vhs, type) +P_ closure; +W_ *size, *ptrs, *nonptrs, *vhs; +char *type; +{ +   P_ ip = (P_) INFO_PTR(closure); + +   if (closure==NULL) { +     fprintf(stderr, "Qagh {get_closure_info}Daq: NULL closure\n"); +     *size = *ptrs = *nonptrs = *vhs = 0;  +     strcpy(type,"ERROR in get_closure_info"); +     return; +   } else if (closure==PrelBase_Z91Z93_closure) { +     /* fprintf(stderr, "Qagh {get_closure_info}Daq: PrelBase_Z91Z93_closure closure\n"); */ +     *size = *ptrs = *nonptrs = *vhs = 0;  +     strcpy(type,"PrelBase_Z91Z93_closure"); +     return; +   }; + +    ip = (P_) INFO_PTR(closure); + +    switch (INFO_TYPE(ip)) { +    case INFO_SPEC_U_TYPE: +    case INFO_SPEC_S_TYPE: +    case INFO_SPEC_N_TYPE: +	*size = SPEC_CLOSURE_SIZE(closure); +	*ptrs = SPEC_CLOSURE_NoPTRS(closure); +	*nonptrs = SPEC_CLOSURE_NoNONPTRS(closure); +	*vhs = 0 /*SPEC_VHS*/; +	strcpy(type,"SPEC"); +	break; + +    case INFO_GEN_U_TYPE: +    case INFO_GEN_S_TYPE: +    case INFO_GEN_N_TYPE: +	*size = GEN_CLOSURE_SIZE(closure); +	*ptrs = GEN_CLOSURE_NoPTRS(closure); +	*nonptrs = GEN_CLOSURE_NoNONPTRS(closure); +	*vhs = GEN_VHS; +	strcpy(type,"GEN"); +	break; + +    case INFO_DYN_TYPE: +	*size = DYN_CLOSURE_SIZE(closure); +	*ptrs = DYN_CLOSURE_NoPTRS(closure); +	*nonptrs = DYN_CLOSURE_NoNONPTRS(closure); +	*vhs = DYN_VHS; +	strcpy(type,"DYN"); +	break; + +    case INFO_TUPLE_TYPE: +	*size = TUPLE_CLOSURE_SIZE(closure); +	*ptrs = TUPLE_CLOSURE_NoPTRS(closure); +	*nonptrs = TUPLE_CLOSURE_NoNONPTRS(closure); +	*vhs = TUPLE_VHS; +	strcpy(type,"TUPLE"); +	break; + +    case INFO_DATA_TYPE: +	*size = DATA_CLOSURE_SIZE(closure); +	*ptrs = DATA_CLOSURE_NoPTRS(closure); +	*nonptrs = DATA_CLOSURE_NoNONPTRS(closure); +	*vhs = DATA_VHS; +	strcpy(type,"DATA"); +	break; + +    case INFO_IMMUTUPLE_TYPE: +    case INFO_MUTUPLE_TYPE: +	*size = MUTUPLE_CLOSURE_SIZE(closure); +	*ptrs = MUTUPLE_CLOSURE_NoPTRS(closure); +	*nonptrs = MUTUPLE_CLOSURE_NoNONPTRS(closure); +	*vhs = MUTUPLE_VHS; +	strcpy(type,"(IM)MUTUPLE"); +	break; + +    case INFO_STATIC_TYPE: +	*size = STATIC_CLOSURE_SIZE(closure); +	*ptrs = STATIC_CLOSURE_NoPTRS(closure); +	*nonptrs = STATIC_CLOSURE_NoNONPTRS(closure); +	*vhs = STATIC_VHS; +	strcpy(type,"STATIC"); +	break; + +    case INFO_CAF_TYPE: +    case INFO_IND_TYPE: +	*size = IND_CLOSURE_SIZE(closure); +	*ptrs = IND_CLOSURE_NoPTRS(closure); +	*nonptrs = IND_CLOSURE_NoNONPTRS(closure); +	*vhs = IND_VHS; +	strcpy(type,"CAF|IND"); +	break; + +    case INFO_CONST_TYPE: +	*size = CONST_CLOSURE_SIZE(closure); +	*ptrs = CONST_CLOSURE_NoPTRS(closure); +	*nonptrs = CONST_CLOSURE_NoNONPTRS(closure); +	*vhs = CONST_VHS; +	strcpy(type,"CONST"); +	break; + +    case INFO_SPEC_RBH_TYPE: +	*size = SPEC_RBH_CLOSURE_SIZE(closure); +	*ptrs = SPEC_RBH_CLOSURE_NoPTRS(closure); +	*nonptrs = SPEC_RBH_CLOSURE_NoNONPTRS(closure); +	if (*ptrs <= 2) { +	    *nonptrs -= (2 - *ptrs); +	    *ptrs = 1; +	} else +	    *ptrs -= 1; +	*vhs = SPEC_RBH_VHS; +	strcpy(type,"SPEC_RBH"); +	break; + +    case INFO_GEN_RBH_TYPE: +	*size = GEN_RBH_CLOSURE_SIZE(closure); +	*ptrs = GEN_RBH_CLOSURE_NoPTRS(closure); +	*nonptrs = GEN_RBH_CLOSURE_NoNONPTRS(closure); +	if (*ptrs <= 2) { +	    *nonptrs -= (2 - *ptrs); +	    *ptrs = 1; +	} else +	    *ptrs -= 1; +	*vhs = GEN_RBH_VHS; +	strcpy(type,"GEN_RBH"); +	break; + +    case INFO_CHARLIKE_TYPE: +	*size = CHARLIKE_CLOSURE_SIZE(closure); +	*ptrs = CHARLIKE_CLOSURE_NoPTRS(closure); +	*nonptrs = CHARLIKE_CLOSURE_NoNONPTRS(closure); +	*vhs = CHARLIKE_VHS; +	strcpy(type,"CHARLIKE"); +	break; + +    case INFO_INTLIKE_TYPE: +	*size = INTLIKE_CLOSURE_SIZE(closure); +	*ptrs = INTLIKE_CLOSURE_NoPTRS(closure); +	*nonptrs = INTLIKE_CLOSURE_NoNONPTRS(closure); +	*vhs = INTLIKE_VHS; +	strcpy(type,"INTLIKE"); +	break; + +#  if !defined(GRAN) +    case INFO_FETCHME_TYPE: +	*size = FETCHME_CLOSURE_SIZE(closure); +        *ptrs = FETCHME_CLOSURE_NoPTRS(closure); +        *nonptrs = FETCHME_CLOSURE_NoNONPTRS(closure); +        *vhs = FETCHME_VHS; +	strcpy(type,"FETCHME"); +	break; + +    case INFO_FMBQ_TYPE: +	*size = FMBQ_CLOSURE_SIZE(closure); +        *ptrs = FMBQ_CLOSURE_NoPTRS(closure); +        *nonptrs = FMBQ_CLOSURE_NoNONPTRS(closure); +        *vhs = FMBQ_VHS; +	strcpy(type,"FMBQ"); +	break; +#  endif + +    case INFO_BQ_TYPE: +	*size = BQ_CLOSURE_SIZE(closure); +        *ptrs = BQ_CLOSURE_NoPTRS(closure); +        *nonptrs = BQ_CLOSURE_NoNONPTRS(closure); +        *vhs = BQ_VHS; +	strcpy(type,"BQ"); +	break; + +    case INFO_BH_TYPE: +	*size = BH_CLOSURE_SIZE(closure); +        *ptrs = BH_CLOSURE_NoPTRS(closure); +        *nonptrs = BH_CLOSURE_NoNONPTRS(closure); +        *vhs = BH_VHS; +	strcpy(type,"BH"); +	break; + +    case INFO_TSO_TYPE: +	*size = 0; /* TSO_CLOSURE_SIZE(closure); */ +        *ptrs = 0; /* TSO_CLOSURE_NoPTRS(closure); */ +        *nonptrs = 0; /* TSO_CLOSURE_NoNONPTRS(closure); */ +        *vhs = TSO_VHS; +	strcpy(type,"TSO"); +	break; + +    case INFO_STKO_TYPE: +        *size = 0; +    	*ptrs = 0; +        *nonptrs = 0; +    	*vhs = STKO_VHS; +    	strcpy(type,"STKO"); +        break; + +    default: +	fprintf(stderr, "get_closure_info:  Unexpected closure type (%lu), closure %lx\n", +	  INFO_TYPE(ip), (StgWord) closure); +	EXIT(EXIT_FAILURE); +    } + +    return ip; +} +# endif + +# if 0 +// Use allocate in Storage.c instead +/* +   @AllocateHeap@ will bump the heap pointer by @size@ words if the space +   is available, but it will not perform garbage collection. +   ToDo: check whether we can use an existing STG allocation routine -- HWL +*/ + + +//@cindex AllocateHeap +StgPtr +AllocateHeap(size) +nat size; +{ +  StgPtr newClosure; +   +  /* Allocate a new closure */ +  if (Hp + size > HpLim) +    return NULL; +   +  newClosure = Hp + 1; +  Hp += size; +   +  return newClosure; +} +# endif + +# if defined(PAR) + +//@cindex doGlobalGC +void +doGlobalGC(void) +{ +  fprintf(stderr,"Splat -- we just hit global GC!\n"); +  stg_exit(EXIT_FAILURE); +  //fishing = rtsFalse; +  outstandingFishes--; +} + +# endif /* PAR */ + +//@node Printing Packet Contents, End of file, Aux fcts for packing, Graph packing +//@subsection Printing Packet Contents +/* +  Printing Packet Contents +  */ + +#if defined(DEBUG) || defined(GRAN_CHECK) + +//@cindex PrintPacket + +#if defined(PAR) +void +PrintPacket(packBuffer) +rtsPackBuffer *packBuffer; +{ +  StgClosure *parent, *graphroot, *closure_start; +  StgInfoTable *ip, *oldip; +  globalAddr ga; +  StgWord **buffer, **bufptr, **slotptr; + +  nat bufsize; +  nat pptr = 0, pptrs = 0, pvhs; +  nat unpack_locn = 0; +  nat gastart = unpack_locn; +  nat closurestart = unpack_locn; +  nat i; +  nat size, ptrs, nonptrs, vhs; +  char str[80]; + +  /* NB: this whole routine is more or less a copy of UnpackGraph with all +     unpacking components replaced by printing fcts +     Long live higher-order fcts! +  */ +  initPackBuffer();                  /* in case it isn't already init'd */ +  graphroot = (StgClosure *)NULL; + +  // gaga = PendingGABuffer; + +  InitClosureQueue(); + +  /* Unpack the header */ +  bufsize = packBuffer->size; +  buffer = packBuffer->buffer; +  bufptr = buffer; + +  /* allocate heap  +  if (bufsize > 0) { +    graph = allocate(bufsize); +    ASSERT(graph != NULL); +  } +  */ + +  fprintf(stderr, ".* Printing <<%d>> (buffer @ %p):\n",  +	  packBuffer->id, packBuffer); +  fprintf(stderr, ".*   size: %d; unpacked_size: %d; tso: %p; buffer: %p\n", +	  packBuffer->size, packBuffer->unpacked_size,  +	  packBuffer->tso, packBuffer->buffer); + +  parent = (StgClosure *)NULL; + +  do { +    /* This is where we will ultimately save the closure's address */ +    slotptr = bufptr; + +    /* First, unpack the next GA or PLC */ +    ga.weight = (rtsWeight) *bufptr++; + +    if (ga.weight > 0) { +      ga.payload.gc.gtid = (GlobalTaskId) *bufptr++; +      ga.payload.gc.slot = (int) *bufptr++; +    } else +      ga.payload.plc = (StgPtr) *bufptr++; +     +    /* Now unpack the closure body, if there is one */ +    if (isFixed(&ga)) { +      fprintf(stderr, ".* [%u]: PLC @ %p\n", gastart, ga.payload.plc); +      // closure = ga.payload.plc; +    } else if (isOffset(&ga)) { +      fprintf(stderr, ".* [%u]: OFFSET TO [%d]\n", gastart, ga.payload.gc.slot); +      // closure = (StgClosure *) buffer[ga.payload.gc.slot]; +    } else { +      /* Print normal closures */ + +      ASSERT(bufsize > 0); + +      fprintf(stderr, ".* [%u]: ((%x, %d, %x)) ", gastart,  +              ga.payload.gc.gtid, ga.payload.gc.slot, ga.weight); + +      closure_start = bufptr; +      ip = get_closure_info(bufptr, &size, &ptrs, &nonptrs, &vhs, str); +	   +      /*  +	 Remember, the generic closure layout is as follows: +	 +-------------------------------------------------+ +	 | FIXED HEADER | VARIABLE HEADER | PTRS | NON-PRS | +	 +-------------------------------------------------+ +      */ +      /* Print fixed header */ +      fprintf(stderr, "FH [");  +      for (i = 0; i < _HS; i++) +	fprintf(stderr, " %p", *bufptr++); + +      if (ip->type == FETCH_ME) +	size = ptrs = nonptrs = vhs = 0; + +      /* Print variable header */ +      fprintf(stderr, "] VH [");  +      for (i = 0; i < vhs; i++) +	fprintf(stderr, " %p", *bufptr++); + +      fprintf(stderr, "] %d PTRS [", ptrs);  + +      /* Pointers will be filled in later */ + +      fprintf(stderr, " ] %d NON-PTRS [", nonptrs);  +      /* Print non-pointers */ +      for (i = 0; i < nonptrs; i++) +	fprintf(stderr, " %p", *bufptr++); + +      fprintf(stderr, "] (%s)\n", str); + +      /* Indirections are never packed */ +      // ASSERT(INFO_PTR(graph) != (W_) Ind_info_TO_USE); + +      /* Add to queue for processing  +	 When just printing the packet we do not have an unpacked closure +	 in hand, so we feed it the packet entry;  +	 again, this assumes that at least the fixed header of the closure +	 has the same layout in the packet; also we may not overwrite entries +	 in the packet (done in Unpack), but for printing that's a bad idea +	 anyway */ +      QueueClosure((StgClosure *)closure_start); +	 +      /* No Common up needed for printing */ + +      /* No Sort out the global address mapping for printing */ + +    } /* normal closure case */ + +    /* Locate next parent pointer */ +    pptr++; +    while (pptr + 1 > pptrs) { +      parent = DeQueueClosure(); + +      if (parent == NULL) +	break; +      else { +	(void) get_closure_info(parent, &size, &pptrs, &nonptrs, +					&pvhs, str); +	pptr = 0; +      } +    } +  } while (parent != NULL); +  fprintf(stderr, ".* --- End packet <<%d>> ---\n", packBuffer->id); +} +#else  /* GRAN */ +void +PrintPacket(buffer) +rtsPackBuffer *buffer; +{ +    // extern char *info_hdr_type(P_ infoptr);  /* defined in Threads.lc */ +    // extern char *display_info_type(P_ infoptr);      /* defined in Threads.lc */ + +    StgInfoTable *info; +    nat size, ptrs, nonptrs, vhs; +    char info_hdr_ty[80]; +    char str1[80], str2[80], junk_str[80]; + +    /* globalAddr ga; */ + +    nat bufsize, unpacked_size ; +    StgClosure *parent; +    nat pptr = 0, pptrs = 0, pvhs; + +    nat unpack_locn = 0; +    nat gastart = unpack_locn; +    nat closurestart = unpack_locn; + +    StgTSO *tso; +    StgClosure *closure, *p; + +    nat i; + +    fprintf(stderr, "*** Printing <<%d>> (buffer @ %p):\n", buffer->id, buffer); +    fprintf(stderr, "  size: %d; unpacked_size: %d; tso: %d (%p); buffer: %p\n", +	    buffer->size, buffer->unpacked_size, buffer->tso, buffer->buffer); +    fputs("  contents: ", stderr); +    for (unpack_locn=0; unpack_locn<buffer->size; unpack_locn++) { +      closure = buffer->buffer[unpack_locn]; +      fprintf(stderr, ", %p (%s)",  +	      closure, info_type(closure));  +    } +    fputc('\n', stderr); + +#if 0 +    /* traverse all elements of the graph; omitted for now, but might be usefule */ +    InitClosureQueue(); + +    tso = buffer->tso; + +    /* Unpack the header */ +    unpacked_size = buffer->unpacked_size; +    bufsize = buffer->size; + +    fprintf(stderr, "Packet %p, size %u (unpacked size is %u); demanded by TSO %d (%p)[PE %d]\n--- Begin ---\n",  +	            buffer, bufsize, unpacked_size,   +	            tso->id, tso, where_is((StgClosure*)tso)); + +    do { +	closurestart = unpack_locn; +	closure = buffer->buffer[unpack_locn++]; +	 +	fprintf(stderr, "[%u]: (%p) ", closurestart, closure); + +	info = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str1); +	strcpy(str2, str1); +	fprintf(stderr, "(%s|%s) ", str1, str2); +	 +        if (info->type == FETCH_ME || info->type == FETCH_ME_BQ ||  +	    IS_BLACK_HOLE(closure)) +	  size = ptrs = nonptrs = vhs = 0; +	 +	if (closure_THUNK(closure)) { +		if (closure_UNPOINTED(closure)) +		    fputs("UNPOINTED ", stderr); +		else +		    fputs("POINTED ", stderr); +	}  +        if (IS_BLACK_HOLE(closure)) { +		fputs("BLACK HOLE\n", stderr); +	} else { +		/* Fixed header */ +		fprintf(stderr, "FH [");  +		for (i = 0, p = (StgClosure*)&(closure->header); i < _HS; i++, p++) +		    fprintf(stderr, " %p", *p); +	 +		/* Variable header  +		if (vhs > 0) { +		    fprintf(stderr, "] VH [%p", closure->payload[_HS]); +	 +		    for (i = 1; i < vhs; i++) +			fprintf(stderr, " %p", closure->payload[_HS+i]); +		} +		*/ +		fprintf(stderr, "] PTRS %u", ptrs); +	 +		/* Non-pointers */ +		if (nonptrs > 0) { +		    fprintf(stderr, " NPTRS [%p", closure->payload[_HS+vhs]); +		 +		    for (i = 1; i < nonptrs; i++) +			fprintf(stderr, " %p", closure->payload[_HS+vhs+i]); +	 +		    putc(']', stderr); +		} +		putc('\n', stderr); +	} +    } while (unpack_locn<bufsize) ;  /* (parent != NULL); */ + +    fprintf(stderr, "--- End ---\n\n"); +#endif /* 0 */ +} +#endif /* PAR */ +#endif /* DEBUG || GRAN_CHECK */ + +#endif /* PAR  || GRAN  -- whole file */ + +//@node End of file,  , Printing Packet Contents, Graph packing +//@subsection End of file +//@index +//* AllocClosureQueue::  @cindex\s-+AllocClosureQueue +//* AllocateHeap::  @cindex\s-+AllocateHeap +//* AmPacking::  @cindex\s-+AmPacking +//* CommonUp::  @cindex\s-+CommonUp +//* DeQueueClosure::  @cindex\s-+DeQueueClosure +//* DonePacking::  @cindex\s-+DonePacking +//* IS_BLACK_HOLE::  @cindex\s-+IS_BLACK_HOLE +//* IS_INDIRECTION::  @cindex\s-+IS_INDIRECTION +//* InitClosureQueue::  @cindex\s-+InitClosureQueue +//* InitPendingGABuffer::  @cindex\s-+InitPendingGABuffer +//* NotYetPacking::  @cindex\s-+NotYetPacking +//* OffsetFor::  @cindex\s-+OffsetFor +//* Pack::  @cindex\s-+Pack +//* PackClosure::  @cindex\s-+PackClosure +//* PackNearbyGraph::  @cindex\s-+PackNearbyGraph +//* PackOneNode::  @cindex\s-+PackOneNode +//* PackPLC::  @cindex\s-+PackPLC +//* PackStkO::  @cindex\s-+PackStkO +//* PackTSO::  @cindex\s-+PackTSO +//* PendingGABuffer::  @cindex\s-+PendingGABuffer +//* PrintPacket::  @cindex\s-+PrintPacket +//* QueueClosure::  @cindex\s-+QueueClosure +//* QueueEmpty::  @cindex\s-+QueueEmpty +//* RoomToPack::  @cindex\s-+RoomToPack +//* UnpackGraph::  @cindex\s-+UnpackGraph +//* doGlobalGC::  @cindex\s-+doGlobalGC +//* get_closure_info::  @cindex\s-+get_closure_info +//* get_closure_info::  @cindex\s-+get_closure_info +//* initPackBuffer::  @cindex\s-+initPackBuffer +//* isFixed::  @cindex\s-+isFixed +//* isOffset::  @cindex\s-+isOffset +//* offsetTable::  @cindex\s-+offsetTable +//@end index diff --git a/ghc/rts/parallel/ParInit.c b/ghc/rts/parallel/ParInit.c new file mode 100644 index 0000000000..d54ff000f2 --- /dev/null +++ b/ghc/rts/parallel/ParInit.c @@ -0,0 +1,227 @@ +/* -------------------------------------------------------------------------- +   Time-stamp: <Sat Dec 04 1999 18:26:22 Stardate: [-30]3998.84 hwloidl> +   $Id: ParInit.c,v 1.2 2000/01/13 14:34:08 hwloidl Exp $ + +   Initialising the parallel RTS + +   An extension based on Kevin Hammond's GRAPH for PVM version +   P. Trinder, January 17th 1995. +   Adapted for the new RTS +   P. Trinder, July 1997. +   H-W. Loidl, November 1999. + +   ------------------------------------------------------------------------ */ + +#ifdef PAR /* whole file */ + +#define NON_POSIX_SOURCE /* so says Solaris */ + +//@menu +//* Includes::			 +//* Global variables::		 +//* Initialisation Routines::	 +//@end menu + +//@node Includes, Global variables +//@subsection Includes + +#include "Rts.h" +#include "RtsFlags.h" +#include "RtsUtils.h" +#include "ParallelRts.h" +#include <setjmp.h> +#include "LLC.h" +#include "HLC.h" + +//@node Global variables, Initialisation Routines, Includes +//@subsection Global variables + +/* Global conditions defined here. */ + +rtsBool	IAmMainThread = rtsFalse,	/* Set for the main thread	*/ +	GlobalStopPending = rtsFalse;	/* Terminating			*/ + +/* Task identifiers for various interesting global tasks. */ + +GlobalTaskId IOTask = 0,                /* The IO Task Id		*/ +             SysManTask = 0,            /* The System Manager Task Id	*/ +             mytid = 0;                 /* This PE's Task Id		*/ + +rtsTime 	main_start_time;	/* When the program started	*/ +rtsTime   	main_stop_time;	    	/* When the program finished    */ +jmp_buf		exit_parallel_system;	/* How to abort from the RTS	*/ + + +//rtsBool fishing = rtsFalse;             /* We have no fish out in the stream */ +rtsTime last_fish_arrived_at = 0;       /* Time of arrival of most recent fish*/ +nat     outstandingFishes = 0;          /* Number of active fishes */  + +//@cindex spark queue +/* GranSim: a globally visible array of spark queues */ +rtsSpark *pending_sparks_hd[SPARK_POOLS],  /* ptr to start of a spark pool */  +         *pending_sparks_tl[SPARK_POOLS],  /* ptr to end of a spark pool */  +         *pending_sparks_lim[SPARK_POOLS], +         *pending_sparks_base[SPARK_POOLS];  + +//@cindex spark_limit +/* max number of sparks permitted on the PE;  +   see RtsFlags.ParFlags.maxLocalSparks */ +nat spark_limit[SPARK_POOLS]; + +globalAddr theGlobalFromGA, theGlobalToGA; +/* +  HAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACK !! see FETCH_ME_entry +  Only used within FETCH_ME_entry as local vars, but they shouldn't +  be defined locally in there -- that would move %esp and you'll never +  return from STG land. +  -- HWL +*/ +globalAddr *rga_GLOBAL; +globalAddr *lga_GLOBAL; +globalAddr fmbqga_GLOBAL; +StgClosure *p_GLOBAL; + +//@cindex PendingFetches +/* A list of fetch reply messages not yet processed; this list is filled +   by awaken_blocked_queue and processed by processFetches */ +StgBlockedFetch *PendingFetches = END_BF_QUEUE; + +//@cindex allPEs +GlobalTaskId *allPEs; + +//@cindex nPEs +nat nPEs = 0; + +//@cindex sparksIgnored +nat sparksIgnored = 0, sparksCreated = 0,  +    threadsIgnored = 0, threadsCreated = 0; + +//@cindex advisory_thread_count +nat advisory_thread_count = 0; + +/* Where to write the log file  +   This is now in Parallel.c  +FILE *gr_file = NULL; +char gr_filename[STATS_FILENAME_MAXLEN]; +*/ + +/* Flag handling. */ + +#if 0 +/* that's now all done via RtsFlags.ParFlags... */ +rtsBool TraceSparks =    rtsFalse;		/* Enable the spark trace mode 		*/ +rtsBool SparkLocally =   rtsFalse;		/* Use local threads if possible 	*/ +rtsBool DelaySparks =    rtsFalse;		/* Use delayed sparking 		*/ +rtsBool LocalSparkStrategy =   rtsFalse;	/* Either delayed threads or local threads*/ +rtsBool GlobalSparkStrategy =  rtsFalse;	/* Export all threads	    	     	*/ + +rtsBool DeferGlobalUpdates =   rtsFalse;	/* Defer updating of global nodes	*/ +#endif + +//@node Initialisation Routines,  , Global variables +//@subsection Initialisation Routines + +/* +  par_exit defines how to terminate the program.  If the exit code is +  non-zero (i.e. an error has occurred), the PE should not halt until +  outstanding error messages have been processed.  Otherwise, messages +  might be sent to non-existent Task Ids.  The infinite loop will actually +  terminate, since STG_Exception will call myexit\tr{(0)} when +  it received a PP_FINISH from the system manager task. +*/ +//@cindex par_exit +void +shutdownParallelSystem(StgInt n) +{ +  belch("   entered shutdownParallelSystem ..."); +  ASSERT(GlobalStopPending = rtsTrue); +  sendOp(PP_FINISH, SysManTask); +  if (n != 0)  +    waitForTermination(); +  else +    waitForPEOp(PP_FINISH, SysManTask); +  shutDownPE(); +  IF_PAR_DEBUG(verbose, +	       belch("--++ shutting down PE %lx, %ld sparks created, %ld sparks Ignored, %ld threads created, %ld threads Ignored",  +		     (W_) mytid, sparksCreated, sparksIgnored, +		     threadsCreated, threadsIgnored)); +  exit(n); +} + +/* Some prototypes */ +void srand48 (long); +time_t time (time_t *); + +//@cindex initParallelSystem +void +initParallelSystem(void) +{ +  belch("entered initParallelSystem ..."); + +  /* Don't buffer standard channels... */ +  setbuf(stdout,NULL); +  setbuf(stderr,NULL); + +  srand48(time(NULL) * getpid());  /*Initialise Random-number generator seed*/ +                                   /* Used to select target of FISH message*/ + +  theGlobalFromGA.payload.gc.gtid = 0; +  theGlobalToGA.payload.gc.gtid = 0; + +  //IF_PAR_DEBUG(verbose, +	       belch("initPackBuffer ..."); +  if (!initPackBuffer()) +    barf("initPackBuffer"); + +  // IF_PAR_DEBUG(verbose, +	       belch("initMoreBuffers ..."); +  if (!initMoreBuffers()) +    barf("initMoreBuffers"); + +  // IF_PAR_DEBUG(verbose, +	       belch("initSparkPools ..."); +  if (!initSparkPools()) +    barf("initSparkPools"); +} + +/*  + * SynchroniseSystem synchronises the reduction task with the system + * manager, and initialises the Global address tables (LAGA & GALA) + */ + +//@cindex SynchroniseSystem +void +SynchroniseSystem(void) +{ +  int i; + +  fprintf(stderr, "SynchroniseSystem: nPEs=%d\n", nPEs);  + +  initEachPEHook();                  /* HWL: hook to be execed on each PE */ + +  fprintf(stderr, "SynchroniseSystem: initParallelSystem\n"); +  initParallelSystem(); +  allPEs = startUpPE(nPEs); + +  /* Initialize global address tables */ +  initGAtables(); + +  /* Record the shortened the PE identifiers for LAGA etc. tables */ +  for (i = 0; i < nPEs; ++i) { +    fprintf(stderr, "[%x] registering %d-th PE as %x\n", mytid, i, allPEs[i]); +    registerTask(allPEs[i]); +  } +} + +#endif /* PAR -- whole file */ + +//@index +//* PendingFetches::  @cindex\s-+PendingFetches +//* SynchroniseSystem::  @cindex\s-+SynchroniseSystem +//* allPEs::  @cindex\s-+allPEs +//* initParallelSystem::  @cindex\s-+initParallelSystem +//* nPEs::  @cindex\s-+nPEs +//* par_exit::  @cindex\s-+par_exit +//* spark queue::  @cindex\s-+spark queue +//* sparksIgnored::  @cindex\s-+sparksIgnored +//@end index diff --git a/ghc/rts/parallel/ParInit.h b/ghc/rts/parallel/ParInit.h new file mode 100644 index 0000000000..add7ad9426 --- /dev/null +++ b/ghc/rts/parallel/ParInit.h @@ -0,0 +1,19 @@ +/* ----------------------------------------------------------------------------- + * ParInit.h,1 + *  + * Phil Trinder + * July 1998 + * + * External Parallel Initialisation Interface + * + * ---------------------------------------------------------------------------*/ + +#ifndef PARINIT_H +#define PARINIT_H + +extern void RunParallelSystem (P_); +extern void initParallelSystem(void); +extern void SynchroniseSystem(void); +extern void par_exit(I_); + +#endif PARINIT_H diff --git a/ghc/rts/parallel/ParTypes.h b/ghc/rts/parallel/ParTypes.h new file mode 100644 index 0000000000..b280eaee8b --- /dev/null +++ b/ghc/rts/parallel/ParTypes.h @@ -0,0 +1,39 @@ +/* --------------------------------------------------------------------------- + * Time-stamp: <Tue Nov 09 1999 16:31:38 Stardate: [-30]3873.44 hwloidl> + * $Id: ParTypes.h,v 1.2 2000/01/13 14:34:08 hwloidl Exp $   + * + * Runtime system types for GUM + * + * ------------------------------------------------------------------------- */ + +#ifndef PARTYPES_H +#define PARTYPES_H + +#ifdef PAR /* all of it */ + +// now in Parallel.h  +//typedef struct hashtable  HashTable; +//typedef struct hashlist   HashList; + +/* Global addresses now live in Parallel.h (needed in Closures.h) */ +// gaddr + +// now in Parallel.h  +/* (GA, LA) pairs  +typedef struct gala { +    globalAddr   ga; +    StgPtr       la; +    struct gala *next; +    rtsBool      preferred; +} rtsGaLa; +*/ + +#if defined(GRAN) +typedef unsigned long TIME; +typedef unsigned char Proc; +typedef unsigned char EVTTYPE; +#endif + +#endif /* PAR */ + +#endif /* ! PARTYPES_H */ diff --git a/ghc/rts/parallel/Parallel.c b/ghc/rts/parallel/Parallel.c new file mode 100644 index 0000000000..8feb516a82 --- /dev/null +++ b/ghc/rts/parallel/Parallel.c @@ -0,0 +1,776 @@ +/* +  Time-stamp: <Sat Dec 04 1999 19:43:39 Stardate: [-30]3999.10 hwloidl> + +  Basic functions for use in either GranSim or GUM. +*/ + +#if defined(GRAN) || defined(PAR)                              /* whole file */ + +//@menu +//* Includes::			 +//* Variables and constants::	 +//* Writing to the log-file::	 +//* Dumping routines::		 +//@end menu + +//@node Includes, Variables and constants +//@subsection Includes + +#include "Rts.h" +#include "RtsFlags.h" +#include "RtsUtils.h" +#include "GranSimRts.h" +#include "ParallelRts.h" + + +//@node Variables and constants, Writing to the log-file, Includes +//@subsection Variables and constants + +/* Where to write the log file */ +FILE *gr_file = NULL; +char gr_filename[STATS_FILENAME_MAXLEN]; + +//@node Writing to the log-file, Dumping routines, Variables and constants +//@subsection Writing to the log-file +/* +  Writing to the log-file + +  These routines dump event-based info to the main log-file. +  The code for writing log files is shared between GranSim and GUM. +*/ + +/*  + * If you're not using GNUC and you're on a 32-bit machine, you're  + * probably out of luck here.  However, since CONCURRENT currently + * requires GNUC, I'm not too worried about it.  --JSM + */ + +//@cindex init_gr_simulation +#if defined(GRAN) +void +init_gr_simulation(rts_argc, rts_argv, prog_argc, prog_argv) +char *prog_argv[], *rts_argv[]; +int prog_argc, rts_argc; +{ +  nat i; +  char *extension = RtsFlags.GranFlags.GranSimStats.Binary ? "gb" : "gr"; + +  if (RtsFlags.GranFlags.GranSimStats.Global) +    init_gr_stats(); + +  /* init global constants for costs of basic operations */ +  gran_arith_cost = RtsFlags.GranFlags.Costs.arith_cost; +  gran_branch_cost = RtsFlags.GranFlags.Costs.branch_cost; +  gran_load_cost = RtsFlags.GranFlags.Costs.load_cost; +  gran_store_cost = RtsFlags.GranFlags.Costs.store_cost; +  gran_float_cost = RtsFlags.GranFlags.Costs.float_cost; + +  if (RtsFlags.GranFlags.GranSimStats.Suppressed) +    return; + +  if (!RtsFlags.GranFlags.GranSimStats.Full)  +    return; + +  sprintf(gr_filename, GR_FILENAME_FMT, prog_argv[0], extension); + +  if ((gr_file = fopen(gr_filename, "w")) == NULL) { +    barf("Can't open granularity simulation report file %s\n",  +	 gr_filename); +  } + +  setbuf(gr_file, NULL); // for debugging turn buffering off + +  /* write header with program name, options and setup to gr_file */ +  fputs("Granularity Simulation for ", gr_file); +  for (i = 0; i < prog_argc; ++i) { +    fputs(prog_argv[i], gr_file); +    fputc(' ', gr_file); +  } + +  if (rts_argc > 0) { +    fputs("+RTS ", gr_file); +     +    for (i = 0; i < rts_argc; ++i) { +      fputs(rts_argv[i], gr_file); +      fputc(' ', gr_file); +    } +  } + +  fputs("\nStart time: ", gr_file); +  fputs(time_str(), gr_file);               /* defined in RtsUtils.c */ +  fputc('\n', gr_file); +     +  fputs("\n\n--------------------\n\n", gr_file); + +  fputs("General Parameters:\n\n", gr_file); + +  if (RtsFlags.GranFlags.Light)  +    fprintf(gr_file, "GrAnSim-Light\nPEs infinite, %s Scheduler, %sMigrate Threads %s, %s\n", +	    RtsFlags.GranFlags.DoFairSchedule?"Fair":"Unfair", +	    RtsFlags.GranFlags.DoThreadMigration?"":"Don't ", +	    RtsFlags.GranFlags.DoThreadMigration && RtsFlags.GranFlags.DoStealThreadsFirst?" Before Sparks":"", +	    RtsFlags.GranFlags.DoAsyncFetch ? "Asynchronous Fetch" : +	    "Block on Fetch"); +  else  +    fprintf(gr_file, "PEs %u, %s Scheduler, %sMigrate Threads %s, %s\n", +	    RtsFlags.GranFlags.proc,RtsFlags.GranFlags.DoFairSchedule?"Fair":"Unfair", +	    RtsFlags.GranFlags.DoThreadMigration?"":"Don't ", +	    RtsFlags.GranFlags.DoThreadMigration && RtsFlags.GranFlags.DoStealThreadsFirst?" Before Sparks":"", +	    RtsFlags.GranFlags.DoAsyncFetch ? "Asynchronous Fetch" : +	    "Block on Fetch"); +   +  if (RtsFlags.GranFlags.DoBulkFetching)  +    if (RtsFlags.GranFlags.ThunksToPack) +      fprintf(gr_file, "Bulk Fetching: Fetch %d Thunks in Each Packet (Packet Size = %d closures)\n", +	      RtsFlags.GranFlags.ThunksToPack,  +	      RtsFlags.GranFlags.packBufferSize); +    else +      fprintf(gr_file, "Bulk Fetching: Fetch as many closures as possible (Packet Size = %d closures)\n", +	      RtsFlags.GranFlags.packBufferSize); +  else +    fprintf(gr_file, "Incremental Fetching: Fetch Exactly One Closure in Each Packet\n"); +   +  fprintf(gr_file, "Fetch Strategy(%u):If outstanding fetches %s\n", +	  RtsFlags.GranFlags.FetchStrategy, +	  RtsFlags.GranFlags.FetchStrategy==0 ? +	    " block (block-on-fetch)": +	  RtsFlags.GranFlags.FetchStrategy==1 ? +	    "only run runnable threads": +	  RtsFlags.GranFlags.FetchStrategy==2 ?  +	    "create threads only from local sparks": +	  RtsFlags.GranFlags.FetchStrategy==3 ?  +	    "create threads from local or global sparks": +	  RtsFlags.GranFlags.FetchStrategy==4 ? +	    "create sparks and steal threads if necessary": +	  "unknown"); + +  if (RtsFlags.GranFlags.DoPrioritySparking) +    fprintf(gr_file, "Priority Sparking (i.e. keep sparks ordered by priority)\n"); + +  if (RtsFlags.GranFlags.DoPriorityScheduling) +    fprintf(gr_file, "Priority Scheduling (i.e. keep threads ordered by priority)\n"); + +  fprintf(gr_file, "Thread Creation Time %u, Thread Queue Time %u\n", +	  RtsFlags.GranFlags.Costs.threadcreatetime,  +	  RtsFlags.GranFlags.Costs.threadqueuetime); +  fprintf(gr_file, "Thread DeSchedule Time %u, Thread Schedule Time %u\n", +	  RtsFlags.GranFlags.Costs.threaddescheduletime,  +	  RtsFlags.GranFlags.Costs.threadscheduletime); +  fprintf(gr_file, "Thread Context-Switch Time %u\n", +	  RtsFlags.GranFlags.Costs.threadcontextswitchtime); +  fputs("\n\n--------------------\n\n", gr_file); + +  fputs("Communication Metrics:\n\n", gr_file); +  fprintf(gr_file, +	  "Latency %u (1st) %u (rest), Fetch %u, Notify %u (Global) %u (Local)\n", +	  RtsFlags.GranFlags.Costs.latency,  +	  RtsFlags.GranFlags.Costs.additional_latency,  +	  RtsFlags.GranFlags.Costs.fetchtime, +	  RtsFlags.GranFlags.Costs.gunblocktime,  +	  RtsFlags.GranFlags.Costs.lunblocktime); +  fprintf(gr_file, +	  "Message Creation %u (+ %u after send), Message Read %u\n", +	  RtsFlags.GranFlags.Costs.mpacktime,  +	  RtsFlags.GranFlags.Costs.mtidytime,  +	  RtsFlags.GranFlags.Costs.munpacktime); +  fputs("\n\n--------------------\n\n", gr_file); + +  fputs("Instruction Metrics:\n\n", gr_file); +  fprintf(gr_file, "Arith %u, Branch %u, Load %u, Store %u, Float %u, Alloc %u\n", +	  RtsFlags.GranFlags.Costs.arith_cost,  +	  RtsFlags.GranFlags.Costs.branch_cost, +	  RtsFlags.GranFlags.Costs.load_cost,  +	  RtsFlags.GranFlags.Costs.store_cost,  +	  RtsFlags.GranFlags.Costs.float_cost,  +	  RtsFlags.GranFlags.Costs.heapalloc_cost); +  fputs("\n\n++++++++++++++++++++\n\n", gr_file); + +# if 0 +  /* binary log files are currently not supported */ +  if (RtsFlags.GranFlags.GranSimStats.Binary) +    grputw(sizeof(rtsTime)); +# endif + +  return (0); +} + +#elif defined(PAR) + +void +init_gr_simulation(rts_argc, rts_argv, prog_argc, prog_argv) +char *prog_argv[], *rts_argv[]; +int prog_argc, rts_argc; +{ +  nat i; +  char time_string[TIME_STR_LEN], node_str[NODE_STR_LEN]; +  char *extension = RtsFlags.ParFlags.ParStats.Binary ? "gb" : "gr"; + +  sprintf(gr_filename, GR_FILENAME_FMT_GUM, prog_argv[0], thisPE, extension); + +  if (!RtsFlags.ParFlags.ParStats.Full)  +    return; + +  if ((gr_file = fopen(gr_filename, "w")) == NULL) +    barf("Can't open activity report file %s\n", gr_filename); + +  setbuf(gr_file, NULL); // for debugging turn buffering off + +  /* write header with program name, options and setup to gr_file */ +  for (i = 0; i < prog_argc; ++i) { +    fputs(prog_argv[i], gr_file); +    fputc(' ', gr_file); +  } + +  if (rts_argc > 0) { +    fputs("+RTS ", gr_file); +     +    for (i = 0; i < rts_argc; ++i) { +      fputs(rts_argv[i], gr_file); +      fputc(' ', gr_file); +    } +  } +  fputc('\n', gr_file); + +  /* record the absolute start time to allow synchronisation of log-files */ +  fputs("Start-Time: ", gr_file); +  fputs(time_str(), gr_file); +  fputc('\n', gr_file); +     +  startTime = CURRENT_TIME; +  ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/); +  fprintf(gr_file, "PE %2u [%s]: TIME\n", thisPE, time_string); + +  /* +  IF_PAR_DEBUG(verbose, +	       belch("== Start-time: %ld (%s)", +		     startTime, time_string)); +  */ +# if 0 +    ngoq Dogh'q' vImuS + +    if (startTime > LL(1000000000)) { +      fprintf(gr_file, "PE %2u [%lu%lu]: TIME\n", thisPE,  +	    (rtsTime) (startTime / LL(1000000000)), +	    (rtsTime) (startTime % LL(1000000000))); +    } else { +      fprintf(gr_file, "PE %2u [%lu]: TIME\n", thisPE, (TIME) startTime); +    }  +    /* binary log files are currently not supported */ +    if (RtsFlags.GranFlags.GranSimStats.Binary) +	grputw(sizeof(rtsTime)); +# endif + +    return; +} +#endif /* PAR */ + +//@cindex end_gr_simulation +#if defined(GRAN) +void +end_gr_simulation(void) +{ +   char time_string[TIME_STR_LEN]; + +   ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/); + +   if (RtsFlags.GranFlags.GranSimStats.Suppressed) +     return; + +   /* Print event stats */ +   if (RtsFlags.GranFlags.GranSimStats.Global) { +     nat i; +    +     fprintf(stderr,"Total yields: %d\n", +             globalGranStats.tot_yields); + +     fprintf(stderr,"Total number of threads created: %d ; per PE:\n", +             globalGranStats.tot_threads_created); +     for (i=0; i<RtsFlags.GranFlags.proc; i++) { +       fprintf(stderr,"  PE %d: %d\t",  +	       i, globalGranStats.threads_created_on_PE[i]); +       if (i+1 % 4 == 0) fprintf(stderr,"\n"); +     } +     if (RtsFlags.GranFlags.proc+1 % 4 != 0) fprintf(stderr,"\n"); +     fprintf(stderr,"Total number of threads migrated: %d\n", +             globalGranStats.tot_TSOs_migrated); + +     fprintf(stderr,"Total number of sparks created: %d ; per PE:\n", +             globalGranStats.tot_sparks_created); +     for (i=0; i<RtsFlags.GranFlags.proc; i++) { +       fprintf(stderr,"  PE %d: %d\t",  +	       i, globalGranStats.sparks_created_on_PE[i]); +       if (i+1 % 4 == 0) fprintf(stderr,"\n"); +     } +     if (RtsFlags.GranFlags.proc+1 % 4 != 0) fprintf(stderr,"\n"); + +     fprintf(stderr,"Event statistics (number of events: %d):\n", +             globalGranStats.noOfEvents); +     for (i=0; i<=MAX_EVENT; i++) { +       fprintf(stderr,"  %s (%d): \t%d \t%f%%\t%f%%\n", +               event_names[i],i,globalGranStats.event_counts[i], +               (float)(100*globalGranStats.event_counts[i])/(float)(globalGranStats.noOfEvents), +               (i==ContinueThread ? 0.0 : +   		   (float)(100*(globalGranStats.event_counts[i])/(float)(globalGranStats.noOfEvents-globalGranStats.event_counts[ContinueThread])) )); +     } +     fprintf(stderr,"Randomized steals: %ld sparks, %ld threads \n \t(Sparks: #%u (avg ntimes=%f; avg fl=%f)\n\t(Threads: %ld)",  +   	             globalGranStats.rs_sp_count,  +	             globalGranStats.rs_t_count,  +	             globalGranStats.no_of_steals,  +   	             (float)globalGranStats.ntimes_total/(float)stg_max(globalGranStats.no_of_steals,1), +   	             (float)globalGranStats.fl_total/(float)stg_max(globalGranStats.no_of_steals,1), +	             globalGranStats.no_of_migrates); +     fprintf(stderr,"Moved sparks: %d  Withered sparks: %d (%.2f %%)\n", +   	      globalGranStats.tot_sparks, globalGranStats.withered_sparks, +             ( globalGranStats.tot_sparks == 0 ? 0 : +                  (float)(100*globalGranStats.withered_sparks)/(float)(globalGranStats.tot_sparks)) ); +     /* Print statistics about priority sparking */ +     if (RtsFlags.GranFlags.DoPrioritySparking) { +   	fprintf(stderr,"About Priority Sparking:\n"); +   	fprintf(stderr,"  Total no. NewThreads: %d   Avg. spark queue len: %.2f \n", globalGranStats.tot_sq_probes, (float)globalGranStats.tot_sq_len/(float)globalGranStats.tot_sq_probes); +     } +     /* Print statistics about priority sparking */ +     if (RtsFlags.GranFlags.DoPriorityScheduling) { +   	fprintf(stderr,"About Priority Scheduling:\n"); +   	fprintf(stderr,"  Total no. of StartThreads: %d (non-end: %d) Avg. thread queue len: %.2f\n",  +   		globalGranStats.tot_add_threads, globalGranStats.non_end_add_threads,  +   		(float)globalGranStats.tot_tq_len/(float)globalGranStats.tot_add_threads); +     } +     /* Blocking queue statistics */ +     if (1) { +   	fprintf(stderr,"Blocking queue statistcs:\n"); +   	fprintf(stderr,"  Total no. of FMBQs generated: %d\n", +		globalGranStats.tot_FMBQs); +   	fprintf(stderr,"  Total no. of bqs awakened: %d\n", +		globalGranStats.tot_awbq); +   	fprintf(stderr,"  Total length of all bqs: %d\tAvg length of bqs: %.2f\n", +		globalGranStats.tot_bq_len, (float)globalGranStats.tot_bq_len/(float)globalGranStats.tot_awbq); +   	fprintf(stderr,"  Percentage of local TSOs in BQs: %.2f\n", +		(float)globalGranStats.tot_bq_len*100.0/(float)globalGranStats.tot_bq_len); +   	fprintf(stderr,"  Total time spent processing BQs: %lx\n", +		globalGranStats.tot_bq_processing_time); +     } + +     /* Fetch misses and thunk stealing */ +     fprintf(stderr,"Number of fetch misses: %d\n",  +	     globalGranStats.fetch_misses); + +     /* Print packet statistics if GUMM fetching is turned on */ +     if (RtsFlags.GranFlags.DoBulkFetching) { +   	fprintf(stderr,"Packet statistcs:\n"); +   	fprintf(stderr,"  Total no. of packets: %d   Avg. packet size: %.2f \n", globalGranStats.tot_packets, (float)globalGranStats.tot_packet_size/(float)globalGranStats.tot_packets); +   	fprintf(stderr,"  Total no. of thunks: %d   Avg. thunks/packet: %.2f \n", globalGranStats.tot_thunks, (float)globalGranStats.tot_thunks/(float)globalGranStats.tot_packets); +   	fprintf(stderr,"  Total no. of cuts: %d   Avg. cuts/packet: %.2f\n", globalGranStats.tot_cuts, (float)globalGranStats.tot_cuts/(float)globalGranStats.tot_packets); +        /*  +   	if (closure_queue_overflows>0)  +   	  fprintf(stderr,"  Number of closure queue overflows: %u\n", +   		  closure_queue_overflows); +	*/ +     } +   } /* RtsFlags.GranFlags.GranSimStats.Global */ + +#  if defined(GRAN_COUNT) +#  error "GRAN_COUNT not supported; should be parallel ticky profiling, really" +    fprintf(stderr,"Update count statistics:\n"); +    fprintf(stderr,"  Total number of updates: %u\n",nUPDs); +    fprintf(stderr,"  Needed to awaken BQ: %u with avg BQ len of: %f\n", +	    nUPDs_BQ,(float)BQ_lens/(float)nUPDs_BQ); +    fprintf(stderr,"  Number of PAPs: %u\n",nPAPs); +#  endif + +    fprintf(stderr, "Simulation finished after @ %s @ cycles. %d sparks created, %d sparks ignored. Check %s for details.\n", +	    time_string, sparksCreated, sparksIgnored, gr_filename); + +    if (RtsFlags.GranFlags.GranSimStats.Full)  +      fclose(gr_file); +} + +#elif defined(PAR) + +/* +  Under GUM we print only one line.  +*/ +void +end_gr_simulation(void) +{ +  char time_string[TIME_STR_LEN]; + +  ullong_format_string(CURRENT_TIME-startTime, time_string, rtsFalse/*no commas!*/); + +  fprintf(stderr, "Computation finished after @ %s @ ms. %d sparks created, %d sparks ignored. Check %s for details.\n", +	    time_string, sparksCreated, sparksIgnored, gr_filename); + +  if (RtsFlags.ParFlags.ParStats.Full)  +    fclose(gr_file); +} +#endif /* PAR */ + +//@node Dumping routines,  , Writing to the log-file +//@subsection Dumping routines + +//@cindex DumpGranEvent +void +DumpGranEvent(name, tso) +GranEventType name; +StgTSO *tso; +{ +    DumpRawGranEvent(CURRENT_PROC, (PEs)0, name, tso, END_TSO_QUEUE, (StgInt)0, (StgInt)0); +} + +//@cindex DumpRawGranEvent +void +DumpRawGranEvent(proc, p, name, tso, node, sparkname, len) +PEs proc, p;         /* proc ... where it happens; p ... where node lives */ +GranEventType name; +StgTSO *tso; +StgClosure *node; +StgInt sparkname, len; +{ +  FILE *output_file; // DEBUGGING ONLY !!!!!!!!!!!!!!!!!!!!!!!!!1 +  StgWord id; +  char time_string[TIME_STR_LEN], node_str[NODE_STR_LEN]; +  ullong_format_string(TIME_ON_PROC(proc), time_string, rtsFalse/*no commas!*/); + +  output_file = gr_file; +  ASSERT(output_file!=NULL); +# if defined(GRAN) +  IF_DEBUG(gran, +	   fprintf(stderr, "GRAN: Dumping info to file with handle %#x\n", output_file)) +		    +  if (RtsFlags.GranFlags.GranSimStats.Suppressed) +    return; +# endif + +  id = tso == NULL ? -1 : tso->id; +  if (node==stgCast(StgClosure*,&END_TSO_QUEUE_closure)) +      strcpy(node_str,"________");  /* "END_TSO_QUEUE"); */ +  else +      sprintf(node_str,"0x%-6lx",node); + +  if (name > GR_EVENT_MAX) +	name = GR_EVENT_MAX; + +  if (BINARY_STATS) +    barf("binary log files not yet supported"); +#if 0 +    /* ToDo: fix code for writing binary GrAnSim statistics */ +    switch (name) {  +      case GR_START: +      case GR_STARTQ: +                      grputw(name); +		      grputw(proc); +		      abort();        /* die please: a single word */ +				      /* doesn't represent long long times */ +		      grputw(TIME_ON_PROC(proc)); +		      grputw((StgWord)node); +		      break; +      case GR_FETCH: +      case GR_REPLY: +      case GR_BLOCK: +		      grputw(name); +		      grputw(proc); +		      abort();        /* die please: a single word */ +				      /* doesn't represent long long times */ +		      grputw(TIME_ON_PROC(proc));  /* this line is bound to */ +		      grputw(id);                  /*   do the wrong thing */ +		      break; +      default:  +                      grputw(name); +		      grputw(proc); +		      abort();        /* die please: a single word */ +				      /* doesn't represent long long times */ +		      grputw(TIME_ON_PROC(proc)); +		      grputw((StgWord)node); +    } +#endif +  else /* !BINARY_STATS */ +    switch (name) {  +     case GR_START: +     case GR_STARTQ: +        fprintf(output_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[SN %u]\t[sparks %u]\n",  +	        proc,time_string,gran_event_names[name], +	        id,node_str,sparkname,len); +        break; +     case GR_FETCH: +     case GR_REPLY: +     case GR_BLOCK: +     case GR_STOLEN: +     case GR_STOLENQ: +	fprintf(output_file, "PE %2u [%s]: %-9s\t%lx \t%s\t(from %2u)\n", +	        proc, time_string, gran_event_names[name],  +		id,node_str,p); +	break; +     case GR_RESUME: +     case GR_RESUMEQ: +     case GR_SCHEDULE: +     case GR_DESCHEDULE: +        fprintf(output_file,"PE %2u [%s]: %-9s\t%lx \n", +	        proc,time_string,gran_event_names[name],id); +        break; +     case GR_STEALING: +        fprintf(output_file,"PE %2u [%s]: %-9s\t%lx\t        \t(by %2u)\n", +	        proc,time_string,gran_event_names[name],id,p); +        break; +     case GR_ALLOC: +        fprintf(output_file,"PE %2u [%s]: %-9s\t%lx\t        \tallocating %u words\n", +	        proc,time_string,gran_event_names[name],id,len); +        break; +     default: +        fprintf(output_file,"PE %2u [%s]: %-9s\t%lx\t%s\t[sparks %u]\n", +	        proc,time_string,gran_event_names[name],id,node_str,len); +    } +} + +//@cindex DumpGranInfo +void +DumpEndEvent(proc, tso, mandatory_thread) +PEs proc; +StgTSO *tso; +rtsBool mandatory_thread; +{ +  FILE *output_file; // DEBUGGING ONLY !!!!!!!!!!!!!!!!!!!!!!!!!1 +    char time_string[TIME_STR_LEN]; +    ullong_format_string(CURRENT_TIME, time_string, rtsFalse/*no commas!*/); + +  output_file = gr_file; +  ASSERT(output_file!=NULL); +#if defined(GRAN) +    if (RtsFlags.GranFlags.GranSimStats.Suppressed) +      return; +#endif + +    if (BINARY_STATS) { +    barf("binary log files not yet supported"); +#if 0 +	grputw(GR_END); +	grputw(proc); +	abort(); /* die please: a single word doesn't represent long long times */ +	grputw(CURRENT_TIME); /* this line is bound to fail */ +	grputw(tso->id); +#ifdef PAR +	grputw(0); +	grputw(0); +	grputw(0); +	grputw(0); +	grputw(0); +	grputw(0); +	grputw(0); +	grputw(0); +	grputw(0); +	grputw(0); +	grputw(0); +	grputw(0); +#else +	grputw(tso->gran.sparkname); +	grputw(tso->gran.startedat); +	grputw(tso->gran.exported); +	grputw(tso->gran.basicblocks); +	grputw(tso->gran.allocs); +	grputw(tso->gran.exectime); +	grputw(tso->gran.blocktime); +	grputw(tso->gran.blockcount); +	grputw(tso->gran.fetchtime); +	grputw(tso->gran.fetchcount); +	grputw(tso->gran.localsparks); +	grputw(tso->gran.globalsparks); +#endif +	grputw(mandatory_thread); +#endif /* 0 */ +    } else { + +	/* +	 * NB: DumpGranEvent cannot be used because PE may be wrong  +	 * (as well as the extra info) +	 */ +	fprintf(output_file, "PE %2u [%s]: END %lx, SN %u, ST %lu, EXP %c, BB %u, HA %u, RT %u, BT %u (%u), FT %u (%u), LS %u, GS %u, MY %c\n" +	  ,proc +	  ,time_string +	  ,tso->id +#if defined(GRAN)		 +	  ,tso->gran.sparkname +	  ,tso->gran.startedat +	  ,tso->gran.exported ? 'T' : 'F' +	  ,tso->gran.basicblocks +	  ,tso->gran.allocs +	  ,tso->gran.exectime +	  ,tso->gran.blocktime +	  ,tso->gran.blockcount +	  ,tso->gran.fetchtime +	  ,tso->gran.fetchcount +	  ,tso->gran.localsparks +	  ,tso->gran.globalsparks +#elif defined(PAR) +	  ,tso->par.sparkname +	  ,tso->par.startedat +	  ,tso->par.exported ? 'T' : 'F' +	  ,tso->par.basicblocks +	  ,tso->par.allocs +	  ,tso->par.exectime +	  ,tso->par.blocktime +	  ,tso->par.blockcount +	  ,tso->par.fetchtime +	  ,tso->par.fetchcount +	  ,tso->par.localsparks +	  ,tso->par.globalsparks +#endif +	  ,mandatory_thread ? 'T' : 'F' +	  ); +    } +} + +//@cindex DumpTSO +void +DumpTSO(tso) +StgTSO *tso; +{ +  FILE *output_file; // DEBUGGING ONLY !!!!!!!!!!!!!!!!!!!!!!!!!1 + +  output_file = gr_file; +  ASSERT(output_file!=NULL); +  fprintf(stderr,"TSO 0x%lx, NAME 0x%lx, ID %u, LINK 0x%lx, TYPE %s\n" +          ,tso +#if defined(GRAN) +          ,tso->gran.sparkname +#elif defined(PAR) +          ,tso->par.sparkname +#endif +          ,tso->id +          ,tso->link +          ,/*tso->state==T_MAIN?"MAIN": +           TSO_TYPE(tso)==T_FAIL?"FAIL": +           TSO_TYPE(tso)==T_REQUIRED?"REQUIRED": +           TSO_TYPE(tso)==T_ADVISORY?"ADVISORY": +	   */ +           "???" +          ); +           +  fprintf(output_file,"TSO %lx: SN %u, ST %u, GBL %c, BB %u, HA %u, RT %u, BT %u (%u), FT %u (%u) LS %u, GS %u\n" +	  ,tso->id +#if defined(GRAN) +          ,tso->gran.sparkname +          ,tso->gran.startedat +          ,tso->gran.exported?'T':'F' +          ,tso->gran.basicblocks +          ,tso->gran.allocs +          ,tso->gran.exectime +          ,tso->gran.blocktime +          ,tso->gran.blockcount +          ,tso->gran.fetchtime +          ,tso->gran.fetchcount +          ,tso->gran.localsparks +          ,tso->gran.globalsparks +#elif defined(PAR) +          ,tso->par.sparkname +          ,tso->par.startedat +          ,tso->par.exported?'T':'F' +          ,tso->par.basicblocks +          ,tso->par.allocs +          ,tso->par.exectime +          ,tso->par.blocktime +          ,tso->par.blockcount +          ,tso->par.fetchtime +          ,tso->par.fetchcount +          ,tso->par.localsparks +          ,tso->par.globalsparks +#endif +          ); +} + +#if 0 +/* +  ToDo: fix binary output of log files, and support new log file format. +*/ +/* +   Output a terminate event and an 8-byte time. +*/ + +//@cindex grterminate +void +grterminate(v) +rtsTime v; +{ +  if (!BINARY_STATS)  +    barf("grterminate: binary statistics not enabled\n"); + +# if defined(GRAN) +    if (RtsFlags.GranFlags.GranSimStats.Suppressed) +      return; +# endif + +    DumpGranEvent(GR_TERMINATE, stgCast(StgTSO*,&END_TSO_QUEUE_closure)); + +    if (sizeof(rtsTime) == 4) { +      putc('\0', gr_file); +      putc('\0', gr_file); +      putc('\0', gr_file); +      putc('\0', gr_file); +    } else { +      putc(v >> 56l, gr_file); +      putc((v >> 48l) & 0xffl, gr_file); +      putc((v >> 40l) & 0xffl, gr_file); +      putc((v >> 32l) & 0xffl, gr_file); +    } +    putc((v >> 24l) & 0xffl, gr_file); +    putc((v >> 16l) & 0xffl, gr_file); +    putc((v >> 8l) & 0xffl, gr_file); +    putc(v & 0xffl, gr_file); +} + +/* +   Length-coded output: first 3 bits contain length coding + +     00x        1 byte +     01x        2 bytes +     10x        4 bytes +     110        8 bytes +     111        5 or 9 bytes +*/ + +//@cindex grputw +void +grputw(v) +rtsTime v; +{ +  if (!BINARY_STATS)  +    barf("grputw: binary statistics not enabled\n"); + +# if defined(GRAN) +    if (RtsFlags.GranFlags.GranSimStats.Suppressed) +      return; +# endif + +    if (v <= 0x3fl) {                           /* length v = 1 byte */  +	fputc(v & 0x3f, gr_file); +    } else if (v <= 0x3fffl) {                  /* length v = 2 byte */  +	fputc((v >> 8l) | 0x40l, gr_file); +	fputc(v & 0xffl, gr_file); +    } else if (v <= 0x3fffffffl) {              /* length v = 4 byte */  +	fputc((v >> 24l) | 0x80l, gr_file); +	fputc((v >> 16l) & 0xffl, gr_file); +	fputc((v >> 8l) & 0xffl, gr_file); +	fputc(v & 0xffl, gr_file); +    } else if (sizeof(TIME) == 4) { +	fputc(0x70, gr_file); +	fputc((v >> 24l) & 0xffl, gr_file); +	fputc((v >> 16l) & 0xffl, gr_file); +	fputc((v >> 8l) & 0xffl, gr_file); +	fputc(v & 0xffl, gr_file); +    } else { +	if (v <= 0x3fffffffffffffl) +	    putc((v >> 56l) | 0x60l, gr_file); +	else { +	    putc(0x70, gr_file); +	    putc((v >> 56l) & 0xffl, gr_file); +	} + +	putc((v >> 48l) & 0xffl, gr_file); +	putc((v >> 40l) & 0xffl, gr_file); +	putc((v >> 32l) & 0xffl, gr_file); +	putc((v >> 24l) & 0xffl, gr_file); +	putc((v >> 16l) & 0xffl, gr_file); +	putc((v >> 8l) & 0xffl, gr_file); +	putc(v & 0xffl, gr_file); +    } +} +#endif /* 0 */ + +#endif /* GRAN || PAR   whole file */ diff --git a/ghc/rts/parallel/ParallelDebug.c b/ghc/rts/parallel/ParallelDebug.c new file mode 100644 index 0000000000..2924b513d9 --- /dev/null +++ b/ghc/rts/parallel/ParallelDebug.c @@ -0,0 +1,1390 @@ +/* +  Time-stamp: <Sun Dec 12 1999 20:37:00 Stardate: [-30]4039.08 software> + +Various debugging routines for GranSim and GUM +*/ + +#if defined(GRAN) || defined(PAR)                             /* whole file */ + +//@node Debugging routines for GranSim and GUM, , , +//@section Debugging routines for GranSim and GUM + +//@menu +//* Includes::			 +//* Constants and Variables::	 +//* Closures::			 +//* Threads::			 +//* Events::			 +//* Sparks::			 +//* Processors::		 +//* Shortcuts::			 +//* Printing info type::	 +//* Printing Pack:et Contents::	 +//* End of File::		 +//@end menu +//*/ + +//@node Includes, Prototypes, Debugging routines for GranSim and GUM, Debugging routines for GranSim and GUM +//@subsection Includes + +#include "Rts.h" +#include "RtsFlags.h" +#include "GranSimRts.h" +#include "ParallelRts.h" +#include "StgMiscClosures.h" +# if defined(DEBUG) +# include "ParallelDebug.h" +# endif + +//@node Prototypes, Constants and Variables, Includes, Debugging routines for GranSim and GUM +//@subsection Prototypes +/* +rtsBool  isOffset(globalAddr *ga); +rtsBool  isFixed(globalAddr *ga); +*/ +//@node Constants and Variables, Closures, Prototypes, Debugging routines for GranSim and GUM +//@subsection Constants and Variables + +/* Names as strings; needed by get_closure_info in ClosureMacros.h -- HWL */ +static char *closure_type_names[] = { +  "INVALID_OBJECT",          /* 0 */ +  "CONSTR",                  /* 1 */ +  "CONSTR_1_0",		/* 2 */ +  "CONSTR_0_1",		/* 3 */ +  "CONSTR_2_0",		/* 4 */ +  "CONSTR_1_1",		/* 5 */ +  "CONSTR_0_2",		/* 6 */ +  "CONSTR_INTLIKE",	        /* 7  */ +  "CONSTR_CHARLIKE",	        /* 8  */ +  "CONSTR_STATIC",	        /* 9  */ +  "CONSTR_NOCAF_STATIC",     /* 10 */ +  "FUN",		        /* 11 */ +  "FUN_1_0",		  	/* 12 */ +  "FUN_0_1",		  	/* 13 */ +  "FUN_2_0",		  	/* 14 */ +  "FUN_1_1",		  	/* 15 */ +  "FUN_0_2",			/* 16 */ +  "FUN_STATIC",	        /* 17 */ +  "THUNK",		        /* 18 */ +  "THUNK_1_0",	  	/* 19 */ +  "THUNK_0_1",	  	/* 20 */ +  "THUNK_2_0",	  	/* 21 */ +  "THUNK_1_1",	  	/* 22 */ +  "THUNK_0_2",		/* 23 */ +  "THUNK_STATIC",	        /* 24 */ +  "THUNK_SELECTOR",	        /* 25 */ +  "BCO",		        /* 26 */ +  "AP_UPD",		        /* 27 */ +  "PAP",			/* 28 */ +  "IND",		        /* 29 */ +  "IND_OLDGEN",	        /* 30 */ +  "IND_PERM",	        /* 31 */ +  "IND_OLDGEN_PERM",	        /* 32 */ +  "IND_STATIC",	        /* 33 */ +  "CAF_UNENTERED",           /* 34 */ +  "CAF_ENTERED",		/* 35 */ +  "CAF_BLACKHOLE",		/* 36 */ +  "RET_BCO",                 /* 37 */ +  "RET_SMALL",	        /* 38 */ +  "RET_VEC_SMALL",	        /* 39 */ +  "RET_BIG",		        /* 40 */ +  "RET_VEC_BIG",	        /* 41 */ +  "RET_DYN",		        /* 42 */ +  "UPDATE_FRAME",	        /* 43 */ +  "CATCH_FRAME",	        /* 44 */ +  "STOP_FRAME",	        /* 45 */ +  "SEQ_FRAME",	        /* 46 */ +  "BLACKHOLE",	        /* 47 */ +  "BLACKHOLE_BQ",	        /* 48 */ +  "SE_BLACKHOLE",		/* 49 */ +  "SE_CAF_BLACKHOLE",	/* 50 */ +  "MVAR",		        /* 51 */ +  "ARR_WORDS",	        /* 52 */ +  "MUT_ARR_PTRS",	        /* 53 */ +  "MUT_ARR_PTRS_FROZEN",     /* 54 */ +  "MUT_VAR",		        /* 55 */ +  "WEAK",		        /* 56 */ +  "FOREIGN",		        /* 57 */ +  "STABLE_NAME",	        /* 58 */ +  "TSO",		        /* 59 */ +  "BLOCKED_FETCH",	        /* 60 */ +  "FETCH_ME",                /* 61 */ +  "EVACUATED",               /* 62 */ +  "N_CLOSURE_TYPES",         /* 63 */ +  "FETCH_ME_BQ",             /* 64 */ +  "RBH"                     /* 65 */ +}; + + +#if defined(GRAN) && defined(GRAN_CHECK) +//@node Closures, Threads, Constants and Variables, Debugging routines for GranSim and GUM +//@subsection Closures + +void +G_PRINT_NODE(node) +StgClosure* node; +{ +   StgInfoTable *info_ptr; +   StgTSO* bqe; +   nat size = 0, ptrs = 0, nonptrs = 0, i, vhs = 0; +   char info_hdr_ty[80], info_ty[80]; + +   if (node==NULL) { +     fprintf(stderr,"NULL\n"); +     return; +   } else if (node==END_TSO_QUEUE) { +     fprintf(stderr,"END_TSO_QUEUE\n"); +     return; +   } +   /* size_and_ptrs(node,&size,&ptrs); */ +   info_ptr = get_closure_info(node, &size, &ptrs, &nonptrs, &vhs, info_hdr_ty); + +   /* vhs = var_hdr_size(node); */ +   display_info_type(info_ptr,info_ty); + +   fprintf(stderr,"Node: 0x%lx", node); + +#if defined(PAR) +   fprintf(stderr," [GA: 0x%lx]",GA(node)); +#endif + +#if defined(USE_COST_CENTRES) +   fprintf(stderr," [CC: 0x%lx]",CC_HDR(node)); +#endif + +#if defined(GRAN) +   fprintf(stderr," [Bitmask: 0%lo]",PROCS(node)); +#endif + +   if (info_ptr->type==TSO)  +     fprintf(stderr," TSO: 0x%lx (%x) IP: 0x%lx (%s), type %s \n     ", +	     (StgTSO*)node, ((StgTSO*)node)->id, info_ptr, info_hdr_ty, info_ty); +   else +     fprintf(stderr," IP: 0x%lx (%s), type %s \n       VHS: %d, size: %ld, ptrs:%ld, nonptrs:  %ld\n     ", +	     info_ptr,info_hdr_ty,info_ty,vhs,size,ptrs,nonptrs); + +   /* For now, we ignore the variable header */ + +   fprintf(stderr," Ptrs: "); +   for(i=0; i < ptrs; ++i) +     { +     if ( (i+1) % 6 == 0) +       fprintf(stderr,"\n      "); +     fprintf(stderr," 0x%lx[P]",node->payload[i]); +     }; + +   fprintf(stderr," Data: "); +   for(i=0; i < nonptrs; ++i) +     { +       if( (i+1) % 6 == 0) +         fprintf(stderr,"\n      "); +       fprintf(stderr," %lu[D]",node->payload[ptrs+i]); +     } +   fprintf(stderr, "\n"); + + +   switch (info_ptr->type) +    { +     case TSO:  +      fprintf(stderr,"\n TSO_LINK: %#lx",  +	      ((StgTSO*)node)->link); +      break; + +    case BLACKHOLE: +    case RBH: +      bqe = ((StgBlockingQueue*)node)->blocking_queue; +      fprintf(stderr," BQ of %#lx: ", node); +      G_PRINT_BQ(bqe); +      break; +    case FETCH_ME: +    case FETCH_ME_BQ: +      printf("Panic: found FETCH_ME or FETCH_ME_BQ Infotable in GrAnSim system.\n"); +      break; +    default: +      /* do nothing */ +    } +} + +void +G_PPN(node)  /* Extracted from PrintPacket in Pack.lc */ +StgClosure* node; +{ +   StgInfoTable *info ; +   nat size = 0, ptrs = 0, nonptrs = 0, i, vhs = 0, locn = 0; +   char info_type[80]; + +   /* size_and_ptrs(node,&size,&ptrs); */ +   info = get_closure_info(node, &size, &ptrs, &nonptrs, &vhs, info_type); + +   if (info->type == FETCH_ME || info->type == FETCH_ME_BQ ||  +       info->type == BLACKHOLE || info->type == RBH ) +     size = ptrs = nonptrs = vhs = 0; + +   if (closure_THUNK(node)) { +     if (!closure_UNPOINTED(node)) +       fputs("SHARED ", stderr); +     else +       fputs("UNSHARED ", stderr); +   }  +   if (info->type==BLACKHOLE) { +     fputs("BLACK HOLE\n", stderr); +   } else { +     /* Fixed header */ +     fprintf(stderr, "(%s) FH [%#lx", info_type, node[locn++]); +     for (i = 1; i < FIXED_HS; i++) +       fprintf(stderr, " %#lx", node[locn++]); +      +     /* Variable header */ +     if (vhs > 0) { +       fprintf(stderr, "] VH [%#lx", node->payload[0]); +        +       for (i = 1; i < vhs; i++) +	 fprintf(stderr, " %#lx", node->payload[i]); +     } +      +     fprintf(stderr, "] PTRS %u", ptrs); +      +     /* Non-pointers */ +     if (nonptrs > 0) { +       fprintf(stderr, " NPTRS [%#lx", node->payload[ptrs]); +        +       for (i = 1; i < nonptrs; i++) +	 fprintf(stderr, " %#lx", node->payload[ptrs+i]); +        +       putc(']', stderr); +     } +     putc('\n', stderr); +   } +    +} + +#if 0 +// ToDo: fix this!! -- HWL +void +G_INFO_TABLE(node) +StgClosure *node; +{ +  StgInfoTable *info_ptr; +  nat size = 0, ptrs = 0, nonptrs = 0, vhs = 0; +  char info_type[80], hdr_type[80]; + +  info_hdr_type(info_ptr, hdr_type); + +  // get_itbl(node); +  info_ptr = get_closure_info(node, &size, &ptrs, &nonptrs, &vhs, info_type); +  fprintf(stderr,"%s Info Ptr @0x%lx; Entry: 0x%lx; Size: %lu; Ptrs: %lu\n\n", +                 info_type,info_ptr,(W_) ENTRY_CODE(info_ptr), +	         size, ptrs); +	         // INFO_SIZE(info_ptr),INFO_NoPTRS(info_ptr)); + +  if (closure_THUNK(node) && !closure_UNPOINTED(node) ) { +    fprintf(stderr,"  RBH InfoPtr: %#lx\n", +	    RBH_INFOPTR(info_ptr)); +  } + +#if defined(PAR) +  fprintf(stderr,"Enter Flush Entry: 0x%lx;\tExit Flush Entry: 0x%lx\n",INFO_FLUSHENT(info_ptr),INFO_FLUSH(info_ptr)); +#endif + +#if defined(USE_COST_CENTRES) +  fprintf(stderr,"Cost Centre (???):       0x%lx\n",INFO_CAT(info_ptr)); +#endif + +#if defined(_INFO_COPYING) +  fprintf(stderr,"Evacuate Entry:    0x%lx;\tScavenge Entry: 0x%lx\n", +          INFO_EVAC_2S(info_ptr),INFO_SCAV_2S(info_ptr)); +#endif + +#if defined(_INFO_COMPACTING) +  fprintf(stderr,"Scan Link:         0x%lx;\tScan Move:      0x%lx\n", +          (W_) INFO_SCAN_LINK_1S(info_ptr), (W_) INFO_SCAN_MOVE_1S(info_ptr)); +  fprintf(stderr,"Mark:              0x%lx;\tMarked:         0x%lx;\t", +          (W_) INFO_MARK_1S(info_ptr), (W_) INFO_MARKED_1S(info_ptr)); +#if 0 /* avoid INFO_TYPE */ +  if(BASE_INFO_TYPE(info_ptr)==INFO_SPEC_TYPE) +    fprintf(stderr,"plus specialised code\n"); +  else +    fprintf(stderr,"Marking:           0x%lx\n",(W_) INFO_MARKING_1S(info_ptr)); +#endif /* 0 */ +#endif /* _INFO_COMPACTING */ +} +#endif /* 0 */ + +//@cindex G_PRINT_BQ +void +G_PRINT_BQ(node) +StgClosure* node; +{ +    StgInfoTable *info; +    StgTSO *tso, *last; +    char str[80], str0[80]; + +    fprintf(stderr,"\n[PE %d] @ %lu BQ: ", +	            CurrentProc,CurrentTime[CurrentProc]); +    if ( node == (StgClosure*)NULL ) { +      fprintf(stderr," NULL.\n"); +      return; +    } +    if ( node == END_TSO_QUEUE ) { +      fprintf(stderr," _|_\n"); +      return; +    } +    tso = ((StgBlockingQueue*)node)->blocking_queue; +    while (node != END_TSO_QUEUE) { +      PEs proc;                      +       +      /* Find where the tso lives */ +      proc = where_is(node); +      info = get_itbl(node); + +      switch (info->type) { +	  case TSO: +	    strcpy(str0,"TSO"); +	    break; +	  case BLOCKED_FETCH: +	    strcpy(str0,"BLOCKED_FETCH"); +	    break; +	  default: +	    strcpy(str0,"???"); +	    break; +	  } + +      if(proc == CurrentProc) +	fprintf(stderr," %#lx (%x) L %s,",  +		node, ((StgBlockingQueue*)node)->blocking_queue, str0); +      else +	fprintf(stderr," %#lx (%x) G (PE %d) %s,",  +		node, ((StgBlockingQueue*)node)->blocking_queue, proc, str0); + +      last = tso; +      tso = last->link; +    } +    if ( tso == END_TSO_QUEUE )  +      fprintf(stderr," _|_\n"); +} + +//@node Threads, Events, Closures, Debugging routines for GranSim and GUM +//@subsection Threads + +void +G_CURR_THREADQ(verbose)  +StgInt verbose; +{  +  fprintf(stderr,"Thread Queue on proc %d: ", CurrentProc); +  G_THREADQ(run_queue_hd, verbose); +} + +void  +G_THREADQ(closure, verbose)  +StgTSO* closure; +StgInt verbose; +{ + StgTSO* x; + + fprintf(stderr,"Thread Queue: "); + for (x=closure; x!=END_TSO_QUEUE; x=x->link) +   if (verbose)  +     G_TSO(x,0); +   else +     fprintf(stderr," %#lx",x); + + if (closure==END_TSO_QUEUE) +   fprintf(stderr,"NIL\n"); + else +   fprintf(stderr,"\n"); +} + +void  +G_TSO(closure,verbose)  +StgTSO* closure; +StgInt verbose; +{ +  + if (closure==END_TSO_QUEUE) { +   fprintf(stderr,"TSO at %#lx is END_TSO_QUEUE!\n"); +   return; + } + + if ( verbose & 0x08 ) {   /* short info */ +   fprintf(stderr,"[TSO @ %#lx, PE %d]: Id: %#lx, Link: %#lx\n", +	   closure,where_is(closure), +	   closure->id,closure->link); +   return; + } +    + fprintf(stderr,"TSO at %#lx has the following contents:\n", +                 closure); + + fprintf(stderr,"> Id:   \t%#lx",closure->id); + // fprintf(stderr,"\tstate: \t%#lx",closure->state); + fprintf(stderr,"\twhatNext: \t%#lx",closure->whatNext); + fprintf(stderr,"\tlink: \t%#lx\n",closure->link); + // fprintf(stderr,"\tType: \t%s\n",type_name[TSO_TYPE(closure)]); + fprintf(stderr,">PRI: \t%#lx", closure->gran.pri); + fprintf(stderr,"\tMAGIC: \t%#lx %s\n", closure->gran.magic,  +	 (closure->gran.magic==TSO_MAGIC ? "it IS a TSO" : "THIS IS NO TSO!!")); + if ( verbose & 0x04 ) { +   fprintf(stderr, "Stack: stack @ %#lx (stack_size: %u; max_stack_size: %u)\n",  +	   closure->stack, closure->stack_size, closure->max_stack_size); +   fprintf(stderr, "  sp: %#lx, su: %#lx, splim: %#lx\n",  +	   closure->sp, closure->su, closure->splim); + } + // fprintf(stderr,"\n"); + if (verbose & 0x01) { +   // fprintf(stderr,"} LOCKED: \t%#lx",closure->locked); +   fprintf(stderr,"} SPARKNAME: \t%#lx\n", closure->gran.sparkname); +   fprintf(stderr,"} STARTEDAT: \t%#lx", closure->gran.startedat); +   fprintf(stderr,"\tEXPORTED: \t%#lx\n", closure->gran.exported); +   fprintf(stderr,"} BASICBLOCKS: \t%#lx", closure->gran.basicblocks); +   fprintf(stderr,"\tALLOCS: \t%#lx\n", closure->gran.allocs); +   fprintf(stderr,"} EXECTIME: \t%#lx", closure->gran.exectime); +   fprintf(stderr,"\tFETCHTIME: \t%#lx\n", closure->gran.fetchtime); +   fprintf(stderr,"} FETCHCOUNT: \t%#lx", closure->gran.fetchcount); +   fprintf(stderr,"\tBLOCKTIME: \t%#lx\n", closure->gran.blocktime); +   fprintf(stderr,"} BLOCKCOUNT: \t%#lx", closure->gran.blockcount); +   fprintf(stderr,"\tBLOCKEDAT: \t%#lx\n", closure->gran.blockedat); +   fprintf(stderr,"} GLOBALSPARKS:\t%#lx", closure->gran.globalsparks); +   fprintf(stderr,"\tLOCALSPARKS:\t%#lx\n", closure->gran.localsparks); + } + if ( verbose & 0x02 ) { +   fprintf(stderr,"BQ that starts with this TSO: "); +   G_PRINT_BQ(closure); + } +} + +//@node Events, Sparks, Threads, Debugging routines for GranSim and GUM +//@subsection Events + +void  +G_EVENT(event, verbose)  +rtsEventQ event; +StgInt verbose; +{ +  if (verbose) { +    print_event(event); +  }else{ +    fprintf(stderr," %#lx",event); +  } +} + +void +G_EVENTQ(verbose) +StgInt verbose; +{ + extern rtsEventQ EventHd; + rtsEventQ x; + + fprintf(stderr,"RtsEventQ (hd @%#lx):\n",EventHd); + for (x=EventHd; x!=NULL; x=x->next) { +   G_EVENT(x,verbose); + } + if (EventHd==NULL)  +   fprintf(stderr,"NIL\n"); + else +   fprintf(stderr,"\n"); +} + +void +G_PE_EQ(pe,verbose) +PEs pe; +StgInt verbose; +{ + extern rtsEventQ EventHd; + rtsEventQ x; + + fprintf(stderr,"RtsEventQ (hd @%#lx):\n",EventHd); + for (x=EventHd; x!=NULL; x=x->next) { +   if (x->proc==pe) +     G_EVENT(x,verbose); + } + if (EventHd==NULL)  +   fprintf(stderr,"NIL\n"); + else +   fprintf(stderr,"\n"); +} + +//@node Sparks, Processors, Events, Debugging routines for GranSim and GUM +//@subsection Sparks + +void  +G_SPARK(spark, verbose)  +rtsSparkQ spark; +StgInt verbose; +{ + if (spark==(rtsSpark*)NULL) { +   belch("G_SPARK: NULL spark; aborting"); +   return; + } +  if (verbose) +    print_spark(spark); +  else +    fprintf(stderr," %#lx",spark); +} + +void  +G_SPARKQ(spark,verbose)  +rtsSparkQ spark; +StgInt verbose; +{ + rtsSparkQ x; + + if (spark==(rtsSpark*)NULL) { +   belch("G_SPARKQ: NULL spark; aborting"); +   return; + } +    + fprintf(stderr,"RtsSparkQ (hd @%#lx):\n",spark); + for (x=spark; x!=NULL; x=x->next) { +   G_SPARK(x,verbose); + } + if (spark==NULL)  +   fprintf(stderr,"NIL\n"); + else +   fprintf(stderr,"\n"); +} + +void  +G_CURR_SPARKQ(verbose)  +StgInt verbose; +{ +  G_SPARKQ(pending_sparks_hd,verbose); +} + +//@node Processors, Shortcuts, Sparks, Debugging routines for GranSim and GUM +//@subsection Processors + +void  +G_PROC(proc,verbose) +StgInt proc; +StgInt verbose; +{  +  extern rtsEventQ EventHd; +  extern char *proc_status_names[]; + +  fprintf(stderr,"Status of proc %d at time %d (%#lx): %s (%s)\n", +          proc,CurrentTime[proc],CurrentTime[proc], +          (CurrentProc==proc)?"ACTIVE":"INACTIVE", +          proc_status_names[procStatus[proc]]); +  G_THREADQ(run_queue_hds[proc],verbose & 0x2); +  if ( (CurrentProc==proc) ) +    G_TSO(CurrentTSO,1); + +  if (EventHd!=NULL) +    fprintf(stderr,"Next event (%s) is on proc %d\n", +            event_names[EventHd->evttype],EventHd->proc); + +  if (verbose & 0x1) { +    fprintf(stderr,"\nREQUIRED sparks: "); +    G_SPARKQ(pending_sparks_hds[proc],1); +    fprintf(stderr,"\nADVISORY_sparks: "); +    G_SPARKQ(pending_sparks_hds[proc],1); +  } +} + +//@node Shortcuts, Printing info type, Processors, Debugging routines for GranSim and GUM +//@subsection Shortcuts + +/* Debug Processor */ +void  +GP(proc) +StgInt proc; +{ G_PROC(proc,1); +} + +/* Debug Current Processor */ +void +GCP(){ G_PROC(CurrentProc,2); } + +/* Debug TSO */ +void +GT(StgPtr tso){  +  G_TSO(tso,1); +} + +/* Debug CurrentTSO */ +void +GCT(){  +  fprintf(stderr,"Current Proc: %d\n",CurrentProc); +  G_TSO(CurrentTSO,1); +} + +/* Shorthand for debugging event queue */ +void +GEQ() { G_EVENTQ(1); } + +/* Shorthand for debugging thread queue of a processor */ +void  +GTQ(PEs p) { G_THREADQ(run_queue_hds[p],1); }  + +/* Shorthand for debugging thread queue of current processor */ +void  +GCTQ() { G_THREADQ(run_queue_hds[CurrentProc],1); }  + +/* Shorthand for debugging spark queue of a processor */ +void +GSQ(PEs p) { G_SPARKQ(pending_sparks_hds[p],1); } + +/* Shorthand for debugging spark queue of current processor */ +void +GCSQ() { G_CURR_SPARKQ(1); } + +/* Shorthand for printing a node */ +void +GN(StgPtr node) { G_PRINT_NODE(node); } + +/* Shorthand for printing info table */ +#if 0 +// ToDo: fix -- HWL +void +GIT(StgPtr node) { G_INFO_TABLE(node); } +#endif + +void  +printThreadQPtrs(void) +{ +  PEs p; +  for (p=0; p<RtsFlags.GranFlags.proc; p++) { +    fprintf(stderr,", PE %d: (hd=%p,tl=%p)",  +	    run_queue_hds[p], run_queue_tls[p]); +  } +} + +void +printThreadQ(StgTSO *tso) { G_THREADQ(tso, 0); }; + +void +printSparkQ(rtsSpark *spark) { G_SPARKQ(spark, 0); }; + +void +printThreadQ_verbose(StgTSO *tso) { G_THREADQ(tso, 1); }; + +void +printSparkQ_verbose(rtsSpark *spark) { G_SPARKQ(spark, 1); }; + +/* Shorthand for some of ADRs debugging functions */ + +#endif /* GRAN && GRAN_CHECK*/ + +#if 0 +void +DEBUG_PRINT_NODE(node) +StgPtr node; +{ +   W_ info_ptr = INFO_PTR(node); +   StgInt size = 0, ptrs = 0, i, vhs = 0; +   char info_type[80]; + +   info_hdr_type(info_ptr, info_type); + +   size_and_ptrs(node,&size,&ptrs); +   vhs = var_hdr_size(node); + +   fprintf(stderr,"Node: 0x%lx", (W_) node); + +#if defined(PAR) +   fprintf(stderr," [GA: 0x%lx]",GA(node)); +#endif + +#if defined(PROFILING) +   fprintf(stderr," [CC: 0x%lx]",CC_HDR(node)); +#endif + +#if defined(GRAN) +   fprintf(stderr," [Bitmask: 0%lo]",PROCS(node)); +#endif + +   fprintf(stderr," IP: 0x%lx (%s), size %ld, %ld ptrs\n", +                  info_ptr,info_type,size,ptrs); + +   /* For now, we ignore the variable header */ + +   for(i=0; i < size; ++i) +     { +       if(i == 0) +         fprintf(stderr,"Data: "); + +       else if(i % 6 == 0) +         fprintf(stderr,"\n      "); + +       if(i < ptrs) +         fprintf(stderr," 0x%lx[P]",*(node+_FHS+vhs+i)); +       else +         fprintf(stderr," %lu[D]",*(node+_FHS+vhs+i)); +     } +   fprintf(stderr, "\n"); +} + + +#define INFO_MASK       0x80000000 + +void +DEBUG_TREE(node) +StgPtr node; +{ +  W_ size = 0, ptrs = 0, i, vhs = 0; + +  /* Don't print cycles */ +  if((INFO_PTR(node) & INFO_MASK) != 0) +    return; + +  size_and_ptrs(node,&size,&ptrs); +  vhs = var_hdr_size(node); + +  DEBUG_PRINT_NODE(node); +  fprintf(stderr, "\n"); + +  /* Mark the node -- may be dangerous */ +  INFO_PTR(node) |= INFO_MASK; + +  for(i = 0; i < ptrs; ++i) +    DEBUG_TREE((StgPtr)node[i+vhs+_FHS]); + +  /* Unmark the node */ +  INFO_PTR(node) &= ~INFO_MASK; +} + + +void +DEBUG_INFO_TABLE(node) +StgPtr node; +{ +  W_ info_ptr = INFO_PTR(node); +  char *iStgPtrtype = info_hdr_type(info_ptr); + +  fprintf(stderr,"%s Info Ptr @0x%lx; Entry: 0x%lx; Size: %lu; Ptrs: %lu\n\n", +                 iStgPtrtype,info_ptr,(W_) ENTRY_CODE(info_ptr),INFO_SIZE(info_ptr),INFO_NoPTRS(info_ptr)); +#if defined(PAR) +  fprintf(stderr,"Enter Flush Entry: 0x%lx;\tExit Flush Entry: 0x%lx\n",INFO_FLUSHENT(info_ptr),INFO_FLUSH(info_ptr)); +#endif + +#if defined(PROFILING) +  fprintf(stderr,"Cost Centre (???):       0x%lx\n",INFO_CAT(info_ptr)); +#endif + +#if defined(_INFO_COPYING) +  fprintf(stderr,"Evacuate Entry:    0x%lx;\tScavenge Entry: 0x%lx\n", +          INFO_EVAC_2S(info_ptr),INFO_SCAV_2S(info_ptr)); +#endif + +#if defined(_INFO_COMPACTING) +  fprintf(stderr,"Scan Link:         0x%lx;\tScan Move:      0x%lx\n", +          (W_) INFO_SCAN_LINK_1S(info_ptr), (W_) INFO_SCAN_MOVE_1S(info_ptr)); +  fprintf(stderr,"Mark:              0x%lx;\tMarked:         0x%lx;\t", +          (W_) INFO_MARK_1S(info_ptr), (W_) INFO_MARKED_1S(info_ptr)); +#if 0 /* avoid INFO_TYPE */ +  if(BASE_INFO_TYPE(info_ptr)==INFO_SPEC_TYPE) +    fprintf(stderr,"plus specialised code\n"); +  else +    fprintf(stderr,"Marking:           0x%lx\n",(W_) INFO_MARKING_1S(info_ptr)); +#endif /* 0 */ +#endif /* _INFO_COMPACTING */ +} +#endif /* 0 */ + +//@node Printing info type, Printing Packet Contents, Shortcuts, Debugging routines for GranSim and GUM +//@subsection Printing info type + +char * +display_info_type(closure, str) +StgClosure *closure; +char *str; +{  +  strcpy(str,""); +  if ( closure_HNF(closure) ) +    strcat(str,"|_HNF "); +  else if ( closure_BITMAP(closure) ) +    strcat(str,"|_BTM"); +  else if ( !closure_SHOULD_SPARK(closure) ) +    strcat(str,"|_NS"); +  else if ( closure_STATIC(closure) ) +    strcat(str,"|_STA"); +  else if ( closure_THUNK(closure) ) +    strcat(str,"|_THU"); +  else if ( closure_MUTABLE(closure) ) +    strcat(str,"|_MUT"); +  else if ( closure_UNPOINTED(closure) ) +    strcat(str,"|_UPT"); +  else if ( closure_SRT(closure) ) +    strcat(str,"|_SRT"); + +  return(str); +} + +char * +info_type(StgClosure *closure){  +  return closure_type_names[get_itbl(closure)->type]; +} + +char * +info_type_by_ip(StgInfoTable *ip){  +  return closure_type_names[ip->type]; +} + +void +info_hdr_type(StgClosure *closure, char *res){  +  strcpy(res,closure_type_names[get_itbl(closure)->type]); +} + +/* +  PrintPacket is in Pack.c because it makes use of closure queues +*/ + +#if defined(GRAN) || defined(PAR) + +/* +  Print graph rooted at q. The structure of this recursive printing routine +  should be the same as in the graph traversals when packing a graph in +  GUM. Thus, it demonstrates the structure of such a generic graph +  traversal, and in particular, how to extract pointer and non-pointer info +  from the multitude of different heap objects available.  + +  {evacuate}Daq ngoqvam nIHlu'pu'!! +*/ + +void +PrintGraph(StgClosure *p, int indent_level) +{ +  StgPtr x, q; +  rtsBool printed = rtsFalse; +  nat i, j; +  const StgInfoTable *info; +   +  q = p;			/* save ptr to object */ +   +  /* indentation */ +  for (j=0; j<indent_level; j++) +    fputs(" ", stderr); + +  ASSERT(p && (LOOKS_LIKE_GHC_INFO(GET_INFO((StgClosure *)p)) +              || IS_HUGS_CONSTR_INFO(GET_INFO((StgClosure *)p)))); + +  printClosure(p); // prints contents of this one closure + +  /* indentation */ +  for (j=0; j<indent_level; j++) +    fputs(" ", stderr); + +  info = get_itbl((StgClosure *)p); +  /* the rest of this fct recursively traverses the graph */ +  switch (info -> type) { +   +  case BCO: +    { +  	StgBCO* bco = stgCast(StgBCO*,p); +  	nat i; +	fprintf(stderr, "BCO (%p) with %d pointers\n", p, bco->n_ptrs); +  	for (i = 0; i < bco->n_ptrs; i++) { +  	  // bcoConstCPtr(bco,i) =  +	  PrintGraph(bcoConstCPtr(bco,i), indent_level+1); +  	} +  	// p += bco_sizeW(bco); +  	break; +    } +   +  case MVAR: +    /* treat MVars specially, because we don't want to PrintGraph the +     * mut_link field in the middle of the closure. +     */ +    {  +  	StgMVar *mvar = ((StgMVar *)p); +  	// evac_gen = 0; +	fprintf(stderr, "MVAR (%p) with 3 pointers (head, tail, value)\n", p); +  	// (StgClosure *)mvar->head =  +	PrintGraph((StgClosure *)mvar->head, indent_level+1); +  	// (StgClosure *)mvar->tail =  +	PrintGraph((StgClosure *)mvar->tail, indent_level+1); +  	//(StgClosure *)mvar->value =  +	PrintGraph((StgClosure *)mvar->value, indent_level+1); +  	// p += sizeofW(StgMVar); +  	// evac_gen = saved_evac_gen; +  	break; +    } +   +  case THUNK_2_0: +    if (!printed) { +      fprintf(stderr, "THUNK_2_0 (%p) with 2 pointers\n", p); +      printed = rtsTrue; +    } +  case FUN_2_0: +    if (!printed) { +      fprintf(stderr, "FUN_2_0 (%p) with 2 pointers\n", p); +      printed = rtsTrue; +    } +    // scavenge_srt(info); +  case CONSTR_2_0: +    if (!printed) { +      fprintf(stderr, "CONSTR_2_0 (%p) with 2 pointers\n", p); +      printed = rtsTrue; +    } +    // ((StgClosure *)p)->payload[0] =  +    PrintGraph(((StgClosure *)p)->payload[0], +	       indent_level+1); +    // ((StgClosure *)p)->payload[1] =  +    PrintGraph(((StgClosure *)p)->payload[1], +	       indent_level+1); +    // p += sizeofW(StgHeader) + 2; +    break; +   +  case THUNK_1_0: +    // scavenge_srt(info); +    fprintf(stderr, "THUNK_1_0 (%p) with 1 pointer\n", p); +    // ((StgClosure *)p)->payload[0] =  +    PrintGraph(((StgClosure *)p)->payload[0], +	       indent_level+1); +    // p += sizeofW(StgHeader) + 2; /* MIN_UPD_SIZE */ +    break; +   +  case FUN_1_0: +    if (!printed) { +      fprintf(stderr, "FUN_1_0 (%p) with 1 pointer\n", p); +      printed = rtsTrue; +    } +    // scavenge_srt(info); +  case CONSTR_1_0: +    if (!printed) { +      fprintf(stderr, "CONSTR_2_0 (%p) with 2 pointers\n", p); +      printed = rtsTrue; +    } +    // ((StgClosure *)p)->payload[0] =  +    PrintGraph(((StgClosure *)p)->payload[0], +	       indent_level+1); +    // p += sizeofW(StgHeader) + 1; +    break; +   +  case THUNK_0_1: +    fprintf(stderr, "THUNK_0_1 (%p) with 0 pointers\n", p); +    // scavenge_srt(info); +    // p += sizeofW(StgHeader) + 2; /* MIN_UPD_SIZE */ +    break; +   +  case FUN_0_1: +    fprintf(stderr, "FUN_0_1 (%p) with 0 pointers\n", p); +    //scavenge_srt(info); +  case CONSTR_0_1: +    fprintf(stderr, "CONSTR_0_1 (%p) with 0 pointers\n", p); +    //p += sizeofW(StgHeader) + 1; +    break; +   +  case THUNK_0_2: +    if (!printed) { +      fprintf(stderr, "THUNK_0_2 (%p) with 0 pointers\n", p); +      printed = rtsTrue; +    } +  case FUN_0_2: +    if (!printed) { +      fprintf(stderr, "FUN_0_2 (%p) with 0 pointers\n", p); +      printed = rtsTrue; +    } +    // scavenge_srt(info); +  case CONSTR_0_2: +    if (!printed) { +      fprintf(stderr, "CONSTR_0_2 (%p) with 0 pointers\n", p); +      printed = rtsTrue; +    } +    // p += sizeofW(StgHeader) + 2; +    break; +   +  case THUNK_1_1: +    if (!printed) { +      fprintf(stderr, "THUNK_1_1 (%p) with 1 pointer\n", p); +      printed = rtsTrue; +    } +  case FUN_1_1: +    if (!printed) { +      fprintf(stderr, "FUN_1_1 (%p) with 1 pointer\n", p); +      printed = rtsTrue; +    } +    // scavenge_srt(info); +  case CONSTR_1_1: +    if (!printed) { +      fprintf(stderr, "CONSTR_1_1 (%p) with 1 pointer\n", p); +      printed = rtsTrue; +    } +    // ((StgClosure *)p)->payload[0] =  +    PrintGraph(((StgClosure *)p)->payload[0], +	       indent_level+1); +    // p += sizeofW(StgHeader) + 2; +    break; +   +  case FUN: +    if (!printed) { +      fprintf(stderr, "FUN (%p) with %d pointers\n", p, info->layout.payload.ptrs); +      printed = rtsTrue; +    } +    /* fall through */ +   +  case THUNK: +    if (!printed) { +      fprintf(stderr, "THUNK (%p) with %d pointers\n", p, info->layout.payload.ptrs); +      printed = rtsTrue; +    } +    // scavenge_srt(info); +    /* fall through */ +   +  case CONSTR: +    if (!printed) { +      fprintf(stderr, "CONSTR (%p) with %d pointers\n", p, info->layout.payload.ptrs); +      printed = rtsTrue; +    } +    /* basically same as loop in STABLE_NAME case  */ +    for (i=0; i<info->layout.payload.ptrs; i++) +      PrintGraph(((StgClosure *)p)->payload[i], +		 indent_level+1); +    break; +    /* NOT fall through */ +   +  case WEAK: +    if (!printed) { +      fprintf(stderr, "WEAK (%p) with %d pointers\n", p, info->layout.payload.ptrs); +      printed = rtsTrue; +    } +    /* fall through */ +   +  case FOREIGN: +    if (!printed) { +      fprintf(stderr, "FOREIGN (%p) with %d pointers\n", p, info->layout.payload.ptrs); +      printed = rtsTrue; +    } +    /* fall through */ +   +  case STABLE_NAME: +    { +      StgPtr end; +       +      if (!printed) { +	fprintf(stderr, "STABLE_NAME (%p) with %d pointers (not followed!)\n",  +		p, info->layout.payload.ptrs); +	printed = rtsTrue; +      } +      end = (StgPtr)((StgClosure *)p)->payload + info->layout.payload.ptrs; +      for (p = (StgPtr)((StgClosure *)p)->payload; p < end; p++) { +	// (StgClosure *)*p =  +	//PrintGraph((StgClosure *)*p, indent_level+1); +	fprintf(stderr, ", %p", *p);  +      } +      //fputs("\n", stderr); +      // p += info->layout.payload.nptrs; +      break; +    } +   +  case IND_PERM: +    //if (step->gen->no != 0) { +    //	SET_INFO(((StgClosure *)p), &IND_OLDGEN_PERM_info); +    //} +    if (!printed) { +      fprintf(stderr, "IND_PERM (%p) with indirection to\n",  +	      p, ((StgIndOldGen *)p)->indirectee); +      printed = rtsTrue; +    } +    /* fall through */ + +  case IND_OLDGEN_PERM: +    if (!printed) { +      fprintf(stderr, "IND_OLDGEN_PERM (%p) with indirection to %p\n",  +	      p, ((StgIndOldGen *)p)->indirectee); +      printed = rtsTrue; +    } +    // ((StgIndOldGen *)p)->indirectee =  +    PrintGraph(((StgIndOldGen *)p)->indirectee, +	       indent_level+1); +    //if (failed_to_evac) { +    //	failed_to_evac = rtsFalse; +    //	recordOldToNewPtrs((StgMutClosure *)p); +    //} +    // p += sizeofW(StgIndOldGen); +    break; +   +  case CAF_UNENTERED: +    { +  	StgCAF *caf = (StgCAF *)p; +   +	fprintf(stderr, "CAF_UNENTERED (%p) pointing to %p\n", p, caf->body); +  	PrintGraph(caf->body, indent_level+1); +  	//if (failed_to_evac) { +  	//  failed_to_evac = rtsFalse; +  	//  recordOldToNewPtrs((StgMutClosure *)p); +  	//} else { +  	//  caf->mut_link = NULL; +  	//} +	//p += sizeofW(StgCAF); +  	break; +    } +   +  case CAF_ENTERED: +    { +  	StgCAF *caf = (StgCAF *)p; +   +	fprintf(stderr, "CAF_ENTERED (%p) pointing to %p and %p\n",  +		p, caf->body, caf->value); +  	// caf->body =  +	PrintGraph(caf->body, indent_level+1); +  	//caf->value =  +	PrintGraph(caf->value, indent_level+1); +  	//if (failed_to_evac) { +  	//  failed_to_evac = rtsFalse; +  	//  recordOldToNewPtrs((StgMutClosure *)p); +  	//} else { +  	//  caf->mut_link = NULL; +  	//} +	//p += sizeofW(StgCAF); +  	break; +    } +   +  case MUT_VAR: +    /* ignore MUT_CONSs */ +    fprintf(stderr, "MUT_VAR (%p) pointing to %p\n", p, ((StgMutVar *)p)->var); +    if (((StgMutVar *)p)->header.info != &MUT_CONS_info) { +      //evac_gen = 0; +      PrintGraph(((StgMutVar *)p)->var, indent_level+1); +  	//evac_gen = saved_evac_gen; +    } +    //p += sizeofW(StgMutVar); +    break; +   +  case CAF_BLACKHOLE: +    if (!printed) { +      fprintf(stderr, "CAF_BLACKHOLE (%p) with 0 pointers\n", p); +      printed = rtsTrue; +    } +  case SE_CAF_BLACKHOLE: +    if (!printed) { +      fprintf(stderr, "SE_CAF_BLACKHOLE (%p) with 0 pointers\n", p); +      printed = rtsTrue; +    } +  case SE_BLACKHOLE: +    if (!printed) { +      fprintf(stderr, "SE_BLACKHOLE (%p) with 0 pointers\n", p); +      printed = rtsTrue; +    } +  case BLACKHOLE: +    if (!printed) { +      fprintf(stderr, "BLACKHOLE (%p) with 0 pointers\n", p); +      printed = rtsTrue; +    } +    //p += BLACKHOLE_sizeW(); +    break; +   +  case BLACKHOLE_BQ: +    {  +      StgBlockingQueue *bh = (StgBlockingQueue *)p; +      // (StgClosure *)bh->blocking_queue =  +      fprintf(stderr, "BLACKHOLE_BQ (%p) pointing to %p\n",  +	      p, (StgClosure *)bh->blocking_queue); +      PrintGraph((StgClosure *)bh->blocking_queue, indent_level+1); +      //if (failed_to_evac) { +      //  failed_to_evac = rtsFalse; +      //  recordMutable((StgMutClosure *)bh); +      //} +      // p += BLACKHOLE_sizeW(); +      break; +    } +   +  case THUNK_SELECTOR: +    {  +      StgSelector *s = (StgSelector *)p; +      fprintf(stderr, "THUNK_SELECTOR (%p) pointing to %p\n",  +	      p, s->selectee); +      PrintGraph(s->selectee, indent_level+1); +      // p += THUNK_SELECTOR_sizeW(); +      break; +    } +   +  case IND: +    fprintf(stderr, "IND (%p) pointing to %p\n", p, ((StgInd*)p)->indirectee); +    PrintGraph(((StgInd*)p)->indirectee, indent_level+1); +    break; + +  case IND_OLDGEN: +    fprintf(stderr, "IND_OLDGEN (%p) pointing to %p\n",  +	    p, ((StgIndOldGen*)p)->indirectee); +    PrintGraph(((StgIndOldGen*)p)->indirectee, indent_level+1); +    break; +   +  case CONSTR_INTLIKE: +    fprintf(stderr, "CONSTR_INTLIKE (%p) with 0 pointers\n", p); +    break; +  case CONSTR_CHARLIKE: +    fprintf(stderr, "CONSTR_CHARLIKE (%p) with 0 pointers\n", p); +    break; +  case CONSTR_STATIC: +    fprintf(stderr, "CONSTR_STATIC (%p) with 0 pointers\n", p); +    break; +  case CONSTR_NOCAF_STATIC: +    fprintf(stderr, "CONSTR_NOCAF_STATIC (%p) with 0 pointers\n", p); +    break; +  case THUNK_STATIC: +    fprintf(stderr, "THUNK_STATIC (%p) with 0 pointers\n", p); +    break; +  case FUN_STATIC: +    fprintf(stderr, "FUN_STATIC (%p) with 0 pointers\n", p); +    break; +  case IND_STATIC: +    fprintf(stderr, "IND_STATIC (%p) with 0 pointers\n", p); +    break; +   +  case RET_BCO: +    fprintf(stderr, "RET_BCO (%p) with 0 pointers\n", p); +    break; +  case RET_SMALL: +    fprintf(stderr, "RET_SMALL (%p) with 0 pointers\n", p); +    break; +  case RET_VEC_SMALL: +    fprintf(stderr, "RET_VEC_SMALL (%p) with 0 pointers\n", p); +    break; +  case RET_BIG: +    fprintf(stderr, "RET_BIG (%p) with 0 pointers\n", p); +    break; +  case RET_VEC_BIG: +    fprintf(stderr, "RET_VEC_BIG (%p) with 0 pointers\n", p); +    break; +  case RET_DYN: +    fprintf(stderr, "RET_DYN (%p) with 0 pointers\n", p); +    break; +  case UPDATE_FRAME: +    fprintf(stderr, "UPDATE_FRAME (%p) with 0 pointers\n", p); +    break; +  case STOP_FRAME: +    fprintf(stderr, "STOP_FRAME (%p) with 0 pointers\n", p); +    break; +  case CATCH_FRAME: +    fprintf(stderr, "CATCH_FRAME (%p) with 0 pointers\n", p); +    break; +  case SEQ_FRAME: +    fprintf(stderr, "SEQ_FRAME (%p) with 0 pointers\n", p); +    break; +   +  case AP_UPD: /* same as PAPs */ +    fprintf(stderr, "AP_UPD (%p) with 0 pointers\n", p); +  case PAP: +    /* Treat a PAP just like a section of stack, not forgetting to +     * PrintGraph the function pointer too... +     */ +    {  +  	StgPAP* pap = stgCast(StgPAP*,p); +   +	fprintf(stderr, "PAP (%p) pointing to %p\n", p, pap->fun); +  	// pap->fun =  +	PrintGraph(pap->fun, indent_level+1); +  	//scavenge_stack((P_)pap->payload, (P_)pap->payload + pap->n_args); +  	//p += pap_sizeW(pap); +  	break; +    } +     +  case ARR_WORDS: +    fprintf(stderr, "ARR_WORDS (%p) with 0 pointers\n", p); +    /* nothing to follow */ +    //p += arr_words_sizeW(stgCast(StgArrWords*,p)); +    break; +   +  case MUT_ARR_PTRS: +    /* follow everything */ +    { +  	StgPtr next; +   +	fprintf(stderr, "MUT_ARR_PTRS (%p) with %d pointers (not followed)\n",  +		p, mut_arr_ptrs_sizeW((StgMutArrPtrs*)p)); +  	// evac_gen = 0;		/* repeatedly mutable */ +  	next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p); +  	for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) { +  	  // (StgClosure *)*p =  +	  // PrintGraph((StgClosure *)*p, indent_level+1); +	  fprintf(stderr, ", %p", *p);  +  	} +	fputs("\n", stderr); +  	//evac_gen = saved_evac_gen; +  	break; +    } +   +  case MUT_ARR_PTRS_FROZEN: +    /* follow everything */ +    { +  	StgPtr start = p, next; +   +	fprintf(stderr, "MUT_ARR_PTRS (%p) with %d pointers (not followed)",  +		p, mut_arr_ptrs_sizeW((StgMutArrPtrs*)p)); +  	next = p + mut_arr_ptrs_sizeW((StgMutArrPtrs*)p); +  	for (p = (P_)((StgMutArrPtrs *)p)->payload; p < next; p++) { +  	  // (StgClosure *)*p =  +	  // PrintGraph((StgClosure *)*p, indent_level+1); +	  fprintf(stderr, ", %p", *p);  +  	} +	fputs("\n", stderr); +  	//if (failed_to_evac) { +  	  /* we can do this easier... */ +  	//  recordMutable((StgMutClosure *)start); +  	//  failed_to_evac = rtsFalse; +  	//} +  	break; +    } +   +  case TSO: +    {  +  	StgTSO *tso; +  	 +  	tso = (StgTSO *)p; +	fprintf(stderr, "TSO (%p) with link field %p\n", p, (StgClosure *)tso->link); +  	// evac_gen = 0; +  	/* chase the link field for any TSOs on the same queue */ +  	// (StgClosure *)tso->link =  +	PrintGraph((StgClosure *)tso->link, indent_level+1); +  	//if (tso->blocked_on) { +  	//  tso->blocked_on = PrintGraph(tso->blocked_on); +  	//} +  	/* scavenge this thread's stack */ +  	//scavenge_stack(tso->sp, &(tso->stack[tso->stack_size])); +  	//evac_gen = saved_evac_gen; +  	//p += tso_sizeW(tso); +  	break; +    } +   +#if defined(GRAN) || defined(PAR) +  case RBH: +    { +    StgInfoTable *rip = REVERT_INFOPTR(get_itbl(p)); +    //if (LOOKS_LIKE_GHC_INFO(rip)) +    //  fprintf(stderr, "RBH (%p) with 0 pointers (reverted type=%s)\n",  +	//      p, info_type_by_ip(rip));  +    //else +    fprintf(stderr, "RBH (%p) with 0 pointers (reverted IP=%x)\n",  +	    p, rip);  +    } +    break; +#endif +#if defined(PAR) +  case BLOCKED_FETCH: +    fprintf(stderr, "BLOCKED_FETCH (%p) with 0 pointers (link=%p)\n",  +	    p, ((StgBlockedFetch *)p)->link); +    break; +  case FETCH_ME: +    fprintf(stderr, "FETCH_ME (%p) with 0 pointers\n", p); +    break; +  case FETCH_ME_BQ: +    fprintf(stderr, "FETCH_ME_BQ (%p) with 0 pointers (blocking_queue=%p)\n",  +	    p, ((StgFetchMeBlockingQueue *)p)->blocking_queue); +    break; +#endif +  case EVACUATED: +    fprintf(stderr, "EVACUATED (%p) with 0 pointers (evacuee=%p)\n",  +	    p, ((StgEvacuated *)p)->evacuee); +    break; +   +  default: +    barf("PrintGraph: unknown closure %d (%s)", +	 info -> type, info_type(info)); +  } +   +  /* If we didn't manage to promote all the objects pointed to by +   * the current object, then we have to designate this object as +   * mutable (because it contains old-to-new generation pointers). +   */ +  //if (failed_to_evac) { +  //  mkMutCons((StgClosure *)q, &generations[evac_gen]); +  //  failed_to_evac = rtsFalse; +  //} +}     + +#endif /* GRAN */ + +#endif /* GRAN || PAR */ +//@node End of File,  , Printing Packet Contents, Debugging routines for GranSim and GUM +//@subsection End of File diff --git a/ghc/rts/parallel/ParallelDebug.h b/ghc/rts/parallel/ParallelDebug.h new file mode 100644 index 0000000000..62f2232071 --- /dev/null +++ b/ghc/rts/parallel/ParallelDebug.h @@ -0,0 +1,49 @@ +/*  +   Time-stamp: <Mon Nov 29 1999 17:17:13 Stardate: [-30]3973.60 hwloidl> + +   Prototypes of all parallel debugging functions. +   */ + +#ifndef PARALLEL_DEBUG_H +#define PARALLEL_DEBUG_H + +#if defined(GRAN) // || defined(PAR) +void G_PRINT_NODE(StgClosure* node); +void G_PPN(StgClosure* node); +void G_INFO_TABLE(StgClosure* node); +void G_CURR_THREADQ(StgInt verbose); +void G_THREADQ(StgTSO* closure, StgInt verbose); +void G_TSO(StgTSO* closure, StgInt verbose); +void G_EVENT(rtsEventQ event, StgInt verbose); +void G_EVENTQ(StgInt verbose); +void G_PE_EQ(PEs pe, StgInt verbose); +void G_SPARK(rtsSparkQ spark, StgInt verbose); +void G_SPARKQ(rtsSparkQ spark, StgInt verbose); +void G_CURR_SPARKQ(StgInt verbose); +void G_PROC(StgInt proc, StgInt verbose); +void GP(StgInt proc); +void GCP(void); +void GT(StgPtr tso); +void GCT(void); +void GEQ(void); +void GTQ(PEs p); +void GCTQ(void); +void GSQ(PEs p); +void GCSQ(void); +void GN(StgPtr node); +void GIT(StgPtr node); +#endif + +#if defined(GRAN) || defined(PAR) + +char  *display_info_type(StgClosure *closure, char *str); +void   info_hdr_type(StgClosure *closure, char *res); +char  *info_type(StgClosure *closure); +char  *info_type_by_ip(StgInfoTable *ip); + +void   PrintPacket(rtsPackBuffer *buffer); +void   PrintGraph(StgClosure *p, int indent_level); + +#endif /* GRAN || PAR */ + +#endif /* PARALLEL_DEBUG_H */ diff --git a/ghc/rts/parallel/ParallelRts.h b/ghc/rts/parallel/ParallelRts.h new file mode 100644 index 0000000000..e1395419fc --- /dev/null +++ b/ghc/rts/parallel/ParallelRts.h @@ -0,0 +1,294 @@ +/* -------------------------------------------------------------------------- +   Time-stamp: <Wed Jan 12 2000 16:22:43 Stardate: [-30]4194.45 hwloidl> +   $Id: ParallelRts.h,v 1.2 2000/01/13 14:34:09 hwloidl Exp $ + +   Variables and functions specific to the parallel RTS (i.e. GUM or GranSim) +   ----------------------------------------------------------------------- */ + +#ifndef PARALLEL_RTS_H +#define PARALLEL_RTS_H + +#if defined(GRAN) || defined(PAR) + +//@menu +//* Packing routines::		 +//* Spark handling routines::	 +//* GC routines::		 +//* Debugging routines::	 +//* Generating .gr profiles::	 +//* Common macros::		 +//* Index::			 +//@end menu + +#ifndef GRAN +// Dummy def for NO_PRI if not in GranSim +#define NO_PRI  0 +#endif + +//@node Packing routines, Spark handling routines +//@subsection Packing routines + +#if defined(GRAN) +/* Statistics info */ +extern nat tot_packets, tot_packet_size, tot_cuts, tot_thunks; +#endif + +#if defined(GRAN) +/* Pack.c */ +rtsPackBuffer *PackNearbyGraph(StgClosure* closure, StgTSO* tso,  +			       nat *packBufferSize); +rtsPackBuffer *PackOneNode(StgClosure* closure, StgTSO* tso,  +			   nat *packBufferSize); +rtsPackBuffer *PackTSO(StgTSO *tso, nat *packBufferSize); +rtsPackBuffer *PackStkO(StgPtr stko, nat *packBufferSize); +void           PackFetchMe(StgClosure *closure); + +/* Unpack.c */ +StgClosure*    UnpackGraph(rtsPackBuffer* buffer); +void           InitPendingGABuffer(nat size);  + +/* RBH.c */ +StgClosure    *convertToRBH(StgClosure *closure); +void           convertFromRBH(StgClosure *closure); + +/* General closure predicates */ +/* +    {Parallel.h}Daq ngoqvam vIroQpu' + +StgInfoTable *get_closure_info(StgClosure* node, nat *size, nat *ptrs, nat *nonptrs, nat *vhs, char *info_hdr_ty); +rtsBool      IS_BLACK_HOLE(StgClosure* node); +StgClosure  *IS_INDIRECTION(StgClosure* node); +rtsBool      IS_THUNK(StgClosure* closure); +*/ + +#elif defined(PAR) + +/* Pack.c */ +rtsPackBuffer *PackNearbyGraph(StgClosure* closure, StgTSO* tso,  +			       nat *packBufferSize);  + +rtsPackBuffer *PackTSO(StgTSO *tso, nat *packBufferSize); +rtsPackBuffer *PackStkO(StgPtr stko, nat *packBufferSize); +void           PackFetchMe(StgClosure *closure); + +/* Unpack.c */ +void           CommonUp(StgClosure *src, StgClosure *dst); +StgClosure    *UnpackGraph(rtsPackBuffer *buffer, globalAddr **gamap,  +			   nat *nGAs); + +/* RBH.c */ +StgClosure    *convertToRBH(StgClosure *closure); +void           convertToFetchMe(StgRBH *rbh, globalAddr *ga); + +/* General closure predicates */ +/*  +  {Parallel.h}Daq ngoqvam vIroQpu' + +StgInfoTable *get_closure_info(StgClosure* node, nat *size, nat *ptrs, nat *nonptrs, nat *vhs, char *info_hdr_ty); +rtsBool      IS_BLACK_HOLE(StgClosure* node); +StgClosure  *IS_INDIRECTION(StgClosure* node); +rtsBool      IS_THUNK(StgClosure* closure); +*/ + +#endif + +/* this routine should be moved to a more general module; currently in Pack.c  +StgInfoTable* get_closure_info(StgClosure* node,  +			       nat *size, nat *ptrs, nat *nonptrs, nat *vhs,  +			       char *info_hdr_ty); +*/ +void doGlobalGC(void);  + +//@node Spark handling routines, GC routines, Packing routines +//@subsection Spark handling routines + +/* now in ../Sparks.c */ + +#if 0 + +#if defined(PAR) + +rtsSpark  findLocalSpark(rtsBool forexport); +StgTSO*   activateSpark (rtsSpark spark);  +void      disposeSpark(rtsSpark spark); +rtsBool   add_to_spark_queue(StgClosure *closure, rtsBool required); +rtsBool   initSparkPools (void); + +nat       spark_queue_len(nat pool); +void      markSparkQueue(void); +void      print_sparkq(void); + +#elif defined(GRAN) + +void      findLocalSpark (rtsEvent *event,  +			  rtsBool *found_res, rtsSparkQ *spark_res); +rtsBool   activateSpark (rtsEvent *event, rtsSparkQ spark); +rtsSpark *newSpark (StgClosure *node, StgInt name, StgInt gran_info,  +		    StgInt size_info, StgInt par_info, StgInt local); +void      disposeSpark(rtsSpark *spark); +void      disposeSparkQ(rtsSparkQ spark); +void      add_to_spark_queue(rtsSpark *spark); +void      print_spark(rtsSpark *spark); +nat       spark_queue_len(PEs proc); +rtsSpark *delete_from_sparkq (rtsSpark *spark, PEs p, rtsBool dispose_too); +void      markSparkQueue(void); +void      print_sparkq(PEs proc); +void      print_sparkq_stats(void); + +#endif +#endif /* 0 */ + +//@node GC routines, Debugging routines, Spark handling routines +//@subsection GC routines + +#if defined(PAR) +/* HLComms.c */ +void      freeRemoteGA(int pe, globalAddr *ga); +void      sendFreeMessages(void); + +/* Global.c */ +void      markLocalGAs(rtsBool full); +void      RebuildGAtables(rtsBool full); +void      RebuildLAGAtable(void); +#endif + +//@node Debugging routines, Generating .gr profiles, GC routines +//@subsection Debugging routines + +#if defined(PAR) +void      printGA (globalAddr *ga); +void      printGALA (GALA *gala); +void      printLAGAtable(void); +#endif + +//@node Generating .gr profiles, Common macros, Debugging routines +//@subsection Generating .gr profiles + +#define STATS_FILENAME_MAXLEN	128 + +/* Where to write the log file */ +//@cindex gr_file +//@cindex gr_filename +extern FILE *gr_file; +extern char gr_filename[STATS_FILENAME_MAXLEN]; + +//@cindex init_gr_simulation +//@cindex end_gr_simulation +void init_gr_simulation(int rts_argc, char *rts_argv[],  +			int prog_argc, char *prog_argv[]); +void end_gr_simulation(void); + +//@node Common macros, Index, Generating .gr profiles +//@subsection Common macros + +/*  +   extracting specific info out of a closure; used in packing (GranSim, GUM) +*/ +//@cindex get_closure_info +static inline StgInfoTable* +get_closure_info(node, size, ptrs, nonptrs, vhs, info_hdr_ty) +StgClosure* node; +nat *size, *ptrs, *nonptrs, *vhs; +char *info_hdr_ty; +{ +  StgInfoTable *info; + +  info = get_itbl(node); +  /* the switch shouldn't be necessary, really; just use default case */ +  switch (info->type) { +  case RBH: +    { +      StgInfoTable *rip = REVERT_INFOPTR(info); // closure to revert to +      *size = sizeW_fromITBL(rip); +      *ptrs = (nat) (rip->layout.payload.ptrs); +      *nonptrs = (nat) (rip->layout.payload.nptrs); +      *vhs = (nat) 0; // unknown +#if 0 /* DEBUG */ +      info_hdr_type(node, info_hdr_ty); +#else +      strcpy(info_hdr_ty, "UNKNOWN"); +#endif +      return rip;  // NB: we return the reverted info ptr for a RBH!!!!!! +    } + +  default: +    *size = sizeW_fromITBL(info); +    *ptrs = (nat) (info->layout.payload.ptrs); +    *nonptrs = (nat) (info->layout.payload.nptrs); +    *vhs = (nat) 0; // unknown +#if 0 /* DEBUG */ +      info_hdr_type(node, info_hdr_ty); +#else +      strcpy(info_hdr_ty, "UNKNOWN"); +#endif +    return info; +  } +}  + +//@cindex IS_BLACK_HOLE +static inline rtsBool +IS_BLACK_HOLE(StgClosure* node)           +{  +  StgInfoTable *info; +  switch (get_itbl(node)->type) { +  case BLACKHOLE: +  case BLACKHOLE_BQ: +  case RBH: +  case FETCH_ME: +  case FETCH_ME_BQ: +    return rtsTrue; +  default: +    return rtsFalse; +  } +//return ((info->type == BLACKHOLE || info->type == RBH) ? rtsTrue : rtsFalse); +} + +//@cindex IS_INDIRECTION +static inline StgClosure * +IS_INDIRECTION(StgClosure* node)           +{  +  StgInfoTable *info; +  info = get_itbl(node); +  switch (info->type) { +    case IND: +    case IND_OLDGEN: +    case IND_PERM: +    case IND_OLDGEN_PERM: +    case IND_STATIC: +      /* relies on indirectee being at same place for all these closure types */ +      return (((StgInd*)node) -> indirectee); +    default: +      return NULL; +  } +} + +//@cindex unwindInd +static inline StgClosure * +UNWIND_IND (StgClosure *closure) +{ +  StgClosure *next; + +  while ((next = IS_INDIRECTION((StgClosure *)closure)) != NULL)  +    closure = next; + +  ASSERT(next==(StgClosure *)NULL); +  return closure; +} + +#endif /* defined(PAR) || defined(GRAN) */ + +#endif /* PARALLEL_RTS_H */ + +//@node Index,  , Common macros +//@subsection Index + +//@index +//* IS_BLACK_HOLE::  @cindex\s-+IS_BLACK_HOLE +//* IS_INDIRECTION::  @cindex\s-+IS_INDIRECTION +//* end_gr_simulation::  @cindex\s-+end_gr_simulation +//* get_closure_info::  @cindex\s-+get_closure_info +//* gr_file::  @cindex\s-+gr_file +//* gr_filename::  @cindex\s-+gr_filename +//* init_gr_simulation::  @cindex\s-+init_gr_simulation +//* unwindInd::  @cindex\s-+unwindInd +//@end index diff --git a/ghc/rts/parallel/RBH.c b/ghc/rts/parallel/RBH.c new file mode 100644 index 0000000000..faf25914c9 --- /dev/null +++ b/ghc/rts/parallel/RBH.c @@ -0,0 +1,338 @@ +/* +  Time-stamp: <Sun Dec 12 1999 20:39:04 Stardate: [-30]4039.09 software> + +  Revertible Black Hole Manipulation. +  Used in GUM and GranSim during the packing of closures. These black holes +  must be revertible because a GC might occur while the packet is being  +  transmitted. In this case all RBHs have to be reverted. +  */ + +#if defined(PAR) || defined(GRAN) /* whole file */ + +#include "Rts.h" +#include "RtsFlags.h" +#include "RtsUtils.h" +#include "GranSimRts.h" +#include "ParallelRts.h" +# if defined(DEBUG) +# include "ParallelDebug.h" +# endif +#include "Storage.h"  // for recordMutable +#include "StgMacros.h" // inlined IS_... fcts + +/* +   Turn a closure into a revertible black hole.  After the conversion, the +   first two words of the closure (after the fixed header, of course) will +   be a link to the mutables list (if appropriate for the garbage +   collector), and a pointer to the blocking queue.  The blocking queue is +   terminated by a 2-word SPEC closure which holds the original contents of +   the first two words of the closure.   +*/ + +//@menu +//* Externs and prototypes::	 +//* Conversion Functions::	 +//* Index::			 +//@end menu +//*/ + +//@node Externs and prototypes, Conversion Functions +//@section Externs and prototypes + +EXTFUN(RBH_Save_0_info); +EXTFUN(RBH_Save_1_info); +EXTFUN(RBH_Save_2_info); + +//@node Conversion Functions, Index, Externs and prototypes +//@section Conversion Functions + +/* +  A closure is turned into an RBH upon packing it (see PackClosure in Pack.c). +  This is needed in case we have to do a GC before the packet is turned +  into a graph on the PE receiving the packet.  +*/ +//@cindex convertToRBH +StgClosure * +convertToRBH(closure) +StgClosure *closure; +{ +  StgRBHSave *rbh_save; +  StgInfoTable *info_ptr, *rbh_info_ptr, *old_info; +  nat size, ptrs, nonptrs, vhs; +  char str[80]; + +  /* +     Closure layout before this routine runs amuck: +       +------------------- +       |   HEADER   | DATA ... +       +------------------- +       | FIXED_HS   | +  */ +  /*  +     Turn closure into an RBH.  This is done by modifying the info_ptr, +     grabbing the info_ptr of the RBH for this closure out of its +     ITBL. Additionally, we have to save the words from the closure, which +     will hold the link to the blocking queue.  For this purpose we use the +     RBH_Save_N closures, with N being the number of pointers for this +     closure.  */ +  IF_GRAN_DEBUG(pack, +		belch(":*   Converting closure %p (%s) into an RBH", +		      closure, info_type(closure)));  +  IF_PAR_DEBUG(pack, +		belch(":*   Converting closure %p (%s) into an RBH", +		      closure, info_type(closure)));  + +  ASSERT(closure_THUNK(closure)); + +  IF_GRAN_DEBUG(pack, +		old_info = get_itbl(closure)); + +  /* Allocate a new closure for the holding data ripped out of closure */ +  if ((rbh_save = (StgRBHSave *)allocate(FIXED_HS + 2)) == NULL) +    return NULL;  /* have to Garbage Collect; check that in the caller! */ + +  info_ptr = get_closure_info(closure, &size, &ptrs, &nonptrs, &vhs, str); +  ASSERT(size >= MIN_UPD_SIZE); + +  /* Fill in the RBH_Save closure with the original data from closure */ +  rbh_save->payload[0] = (StgPtr) ((StgRBH *)closure)->blocking_queue; +  rbh_save->payload[1] = (StgPtr) ((StgRBH *)closure)->mut_link; + +  /* Set the info_ptr for the rbh_Save closure according to the number of +     pointers in the original */ + +  rbh_info_ptr = (StgInfoTable *) (ptrs == 0 ? &RBH_Save_0_info : +				   ptrs == 1 ? &RBH_Save_1_info : +				   &RBH_Save_2_info); +  SET_INFO(rbh_save, rbh_info_ptr); +  /* same bitmask as the original closure */ +  SET_GRAN_HDR(rbh_save, PROCS(closure)); + +  /* Init the blocking queue of the RBH and have it point to the saved data */ +  ((StgRBH *)closure)->blocking_queue = (StgBlockingQueueElement *)rbh_save; + +  ASSERT(LOOKS_LIKE_GHC_INFO(RBH_INFOPTR(get_itbl(closure)))); +  /* Turn the closure into a RBH;  a great system, indeed! */ +  SET_INFO(closure, RBH_INFOPTR(get_itbl(closure))); + +  /* +    add closure to the mutable list! +    do this after having turned the closure into an RBH, because an +    RBH is mutable but the think it was previously isn't +  */ +  //recordMutable((StgMutClosure *)closure); + +  //IF_GRAN_DEBUG(pack, +		/* sanity check; make sure that reverting the RBH yields the  +		   orig closure, again */ +  //ASSERT(REVERT_INFOPTR(get_itbl(closure))==old_info)); + +  /* +     Closure layout after this routine has run amuck: +       +--------------------- +       | RBH-HEADER | |   |  ... +       +--------------|---|-- +       | FIXED_HS   | |   v +                      |   Mutable-list ie another StgMutClosure +		      v +		      +--------- +		      | RBH_SAVE with 0-2 words of DATA +		      +--------- +  */ + +  return closure; +} + +/* +  An RBH closure is turned into a FETCH_ME when reveiving an ACK message +  indicating that the transferred closure has been unpacked on the other PE +  (see processAck in HLComms.c). The ACK also contains the new GA of the +  closure to which the FETCH_ME closure has to point. + +  Converting a closure to a FetchMe is trivial, unless the closure has +  acquired a blocking queue.  If that has happened, we first have to awaken +  the blocking queue.  What a nuisance!  Fortunately, @AwakenBlockingQueue@ +  should now know what to do. + +  A note on GrAnSim: In GrAnSim we don't have FetchMe closures. However, +  we have to turn a RBH back to its original form when the simulated +  transfer of the closure has been finished. Therefore we need the +  @convertFromRBH@ routine below. After converting the RBH back to its +  original form and awakening all TSOs, the first TSO will reenter the +  closure which is now local and carry on merrily reducing it (the other +  TSO will be less merrily blocked on the now local closure; we're costing +  the difference between local and global blocks in the BQ code).  -- HWL  +*/ + +# if defined(PAR) + +EXTFUN(FETCH_ME_info); + +//@cindex convertToFetchMe +void +convertToFetchMe(rbh, ga) +StgRBH *rbh; +globalAddr *ga; +{ +  // StgInfoTable *ip = get_itbl(rbh); +  StgBlockingQueueElement *bqe = rbh->blocking_queue; + +  ASSERT(get_itbl(rbh)->type==RBH); + +  IF_PAR_DEBUG(pack, +	       belch(":*   Converting RBH %p (%s) into a FETCH_ME for GA ((%x, %d, %x))", +		     rbh, info_type(rbh),  +	             ga->payload.gc.gtid, ga->payload.gc.slot, ga->weight));  + +  /* put closure on mutables list, while it is still a RBH */ +  //recordMutable((StgMutClosure *)rbh); + +  /* actually turn it into a FETCH_ME */ +  SET_INFO((StgClosure *)rbh, &FETCH_ME_info); + +  /* set the global pointer in the FETCH_ME closure to the given value */ +  ((StgFetchMe *)rbh)->ga = ga; + +  IF_PAR_DEBUG(pack, +	       if (get_itbl(bqe)->type==TSO || get_itbl(bqe)->type==BLOCKED_FETCH) +	         belch(":*     Awakening non-empty BQ of RBH closure %p (first TSO is %d (%p)", +		      rbh, ((StgTSO *)bqe)->id, ((StgTSO *)bqe)));  + +  /* awaken all TSOs and BLOCKED_FETCHES on the blocking queue */ +  if (get_itbl(bqe)->type==TSO || get_itbl(bqe)->type==BLOCKED_FETCH) +    awaken_blocked_queue(bqe, (StgClosure *)rbh); +} +# else  /* GRAN */ +/* Prototype */ +// void UnlinkFromMUT(StgPtr closure);  + +/* +  This routine in fact reverts the RBH into its original form; this code  +  should be of interest for GUM, too, but is not needed in the current version. +  convertFromRBH is called where GUM uses convertToFetchMe. +*/ +void +convertFromRBH(closure) +StgClosure *closure; +{ +  StgBlockingQueueElement *bqe = ((StgRBH*)closure)->blocking_queue; +  char str[NODE_STR_LEN]; // debugging only +  StgInfoTable *rip = REVERT_INFOPTR(get_itbl(closure));  // debugging only + +  IF_GRAN_DEBUG(pack, +		if (get_itbl(bqe)->type==TSO) +		  sprintf(str, "%d (%p)",  +			  ((StgTSO *)bqe)->id, ((StgTSO *)bqe)); +		else  +		  strcpy(str, "empty"); +		belch(":*   Reverting RBH %p (%s) into a ??? closure again; BQ start: %s", +		      closure, info_type(closure), str)); + +  ASSERT(get_itbl(closure)->type==RBH); + +  /* awaken_blocked_queue also restores the RBH_Save closure +     (have to call it even if there are no TSOs in the queue!) */ +  awaken_blocked_queue(bqe, closure); + +  /* Put back old info pointer (grabbed from the RBH's info table). +     We do that *after* awakening the BQ to be sure node is an RBH when +     calling awaken_blocked_queue (different in GUM!) +  */ +  SET_INFO(closure, REVERT_INFOPTR(get_itbl(closure))); + +  /* put closure on mutables list */ +  //recordMutable((StgMutClosure *)closure); + +# if 0 /* rest of this fct */ +    /* ngoq ngo' */ +    /* FETCHME_GA(closure) = ga; */ +    if (IS_MUTABLE(INFO_PTR(bqe))) { +      PROC old_proc = CurrentProc,        /* NB: For AwakenBlockingQueue, */ +           new_proc = where_is(closure);  /*     CurentProc must be where */ +					  /*     closure lives. */ +      CurrentProc = new_proc; + +#  if defined(GRAN_CHECK) +      if (RTSflags.GranFlags.debug & 0x100) +        fprintf(stderr,"===== AwBQ of node 0x%lx (%s) [PE %2u]\n", +	               closure, (isSpec ? "SPEC_RBH" : "GEN_RBH"), new_proc); +#  endif + +      rbh_save = AwakenBlockingQueue(bqe);     /* AwakenBlockingQueue(bqe); */ +      CurrentProc = old_proc; +    } else { +        rbh_save = bqe; +    } + +    /* Put data from special RBH save closures back into the closure */ +    if ( rbh_save == NULL ) { +      fprintf(stderr,"convertFromRBH: No RBH_Save_? closure found at end of BQ!\n"); +      EXIT(EXIT_FAILURE); +    } else { +      closure[isSpec ? SPEC_HS : GEN_HS] = rbh_save[SPEC_HS]; +      closure[(isSpec ? SPEC_HS : GEN_HS) + 1] = rbh_save[SPEC_HS + 1]; +    } +# endif /* 0 */ + +# if 0 && (defined(GCap) || defined(GCgn)) +    /* ngoq ngo' */ +    /* If we convert from an RBH in the old generation, +       we have to make sure it goes on the mutables list */ + +    if(closure <= StorageMgrInfo.OldLim) { +	if (IS_MUTABLE(INFO_PTR(closure)) && MUT_LINK(closure) == MUT_NOT_LINKED) { +	    MUT_LINK(closure) = (StgWord) StorageMgrInfo.OldMutables; +            StorageMgrInfo.OldMutables = closure; +	} +    } +# endif /* 0 */ +} +#endif /* PAR */ + +/* Remove closure from the mutables list */ +#if 0 +/* ngoq ngo' */ +void +UnlinkFromMUT(StgPtr closure)  +{ +  StgPtr curr = StorageMgrInfo.OldMutables, prev = NULL; + +  while (curr != NULL && curr != closure) { +    ASSERT(MUT_LINK(curr)!=MUT_NOT_LINKED); +    prev=curr; +    curr=MUT_LINK(curr);  +  } +  if (curr==closure) {    +   if (prev==NULL)  +     StorageMgrInfo.OldMutables = MUT_LINK(curr); +   else    +     MUT_LINK(prev) = MUT_LINK(curr); +   MUT_LINK(curr) = MUT_NOT_LINKED; +  } + +#  if 0 && (defined(GCap) || defined(GCgn)) +  { +    closq newclos; +    extern closq ex_RBH_q; + +    newclos = (closq) stgMallocBytes(sizeof(struct clos), "UnlinkFromMUT"); +    CLOS_CLOSURE(newclos) = closure; +    CLOS_PREV(newclos) = NULL; +    CLOS_NEXT(newclos) = ex_RBH_q; +    if (ex_RBH_q!=NULL) +      CLOS_PREV(ex_RBH_q) = newclos; +    ex_RBH_q = newclos; +  } +#  endif +} +#endif /* PAR */ + +#endif /* PAR || GRAN -- whole file */ + +//@node Index,  , Conversion Functions +//@section Index + +//@index +//* convertToFetchMe::  @cindex\s-+convertToFetchMe +//* convertToRBH::  @cindex\s-+convertToRBH +//@end index diff --git a/ghc/rts/parallel/SysMan.c b/ghc/rts/parallel/SysMan.c new file mode 100644 index 0000000000..eaafc03174 --- /dev/null +++ b/ghc/rts/parallel/SysMan.c @@ -0,0 +1,417 @@ +/* ---------------------------------------------------------------------------- +   Time-stamp: <Sat Dec 04 1999 19:29:57 Stardate: [-30]3999.06 hwloidl> +   $Id: SysMan.c,v 1.2 2000/01/13 14:34:09 hwloidl Exp $ + +   GUM System Manager Program +   Handles startup, shutdown and global synchronisation of the parallel system. + +   The Parade/AQUA Projects, Glasgow University, 1994-1995. +   GdH/APART Projects, Heriot-Watt University, Edinburgh, 1997-1999. +   P. Trinder, November 30th. 1994. +   Adapted for new RTS +   P. Trinder, July 1997. +   H-W. Loidl, November 1999.   +  +   ------------------------------------------------------------------------- */ + +//@node GUM System Manager Program, , , +//@section GUM System Manager Program + +//@menu +//* General docu::		 +//* Includes::			 +//* Macros etc::		 +//* Variables::			 +//* Main fct::			 +//* Auxiliary fcts::		 +//* Index::			 +//@end menu + +//@node General docu, Includes, GUM System Manager Program, GUM System Manager Program +//@subsection General docu + +/* + +The Sysman task currently controls initiation, termination, of a +parallel Haskell program running under GUM. In the future it may +control global GC synchronisation and statistics gathering. Based on +K. Hammond's SysMan.lc in Graph for PVM. SysMan is unusual in that it +is not part of the executable produced by ghc: it is a free-standing +program that spawns PVM tasks (logical PEs) to evaluate the +program. After initialisation it runs in parallel with the PE tasks, +awaiting messages. + +OK children, buckle down for some serious weirdness, it works like this ... + + +o The argument vector (argv) for SysMan has one the following 2 shapes: + +------------------------------------------------------------------------------- +| SysMan path | debug flag | pvm-executable path | Num. PEs | Program Args ...| +------------------------------------------------------------------------------- + +------------------------------------------------------------------- +| SysMan path | pvm-executable path | Num. PEs | Program Args ... | +------------------------------------------------------------------- + +The "pvm-executable path" is an absolute path of where PVM stashes the +code for each PE. The arguments passed on to each PE-executable +spawned by PVM are: + +------------------------------- +| Num. PEs | Program Args ... | +------------------------------- + +The arguments passed to the Main-thread PE-executable are + +------------------------------------------------------------------- +| main flag | pvm-executable path | Num. PEs | Program Args ... | +------------------------------------------------------------------- + +o SysMan's algorithm is as follows. + +o use PVM to spawn (nPE-1) PVM tasks  +o fork SysMan to create the main-thread PE. This permits the main-thread to  +read and write to stdin and stdout.  +o Barrier-synchronise waiting for all of the PE-tasks to start. +o Broadcast the SysMan task-id, so that the main thread knows it. +o Wait for the Main-thread PE to send it's task-id. +o Broadcast an array of the PE task-ids to all of the PE-tasks. +o Enter a loop awaiting incoming messages, e.g. failure, Garbage-collection,  +termination. + +The forked Main-thread algorithm, in SysMan, is as follows. + +o disconnects from PVM. +o sets a flag in argv to indicate that it is the main thread. +o `exec's a copy of the pvm-executable (i.e. the program being run) + + +The pvm-executable run by each PE-task, is initialised as follows. + +o Registers with PVM, obtaining a task-id. +o Joins the barrier synchronisation awaiting the other PEs. +o Receives and records the task-id of SysMan, for future use. +o If the PE is the main thread it sends its task-id to SysMan. +o Receives and records the array of task-ids of the other PEs. +o Begins execution. + +*/ + +//@node Includes, Macros etc, General docu, GUM System Manager Program +//@subsection Includes + +#include "Rts.h" +#include "ParTypes.h" +#include "LLC.h" +#include "Parallel.h" + +//@node Macros etc, Variables, Includes, GUM System Manager Program +//@subsection Macros etc + +#define NON_POSIX_SOURCE /* so says Solaris */ + +#define checkerr(c)	do { \ +                          if ((c)<0) { \ +                            pvm_perror("Sysman"); \ +                            fprintf(stderr,"Sysman"); \ +                            stg_exit(EXIT_FAILURE); \ +                          } \ +                        } while(0) + +/* SysMan is put on top of the GHC routine that does the RtsFlags handling. +   So, we cannot use the standard macros. For the time being we use a macro +   that is fixed at compile time. +*/ +/* debugging enabled */ +#define IF_PAR_DEBUG(c,s)  { s; } +/* debugging disabled */ +// #define IF_PAR_DEBUG(c,s)  /* nothing */ + +//@node Variables, Main fct, Macros etc, GUM System Manager Program +//@subsection Variables + +/* +   The following definitions included so that SysMan can be linked with Low +   Level Communications module (LLComms). They are not used in SysMan.  */ + +GlobalTaskId  mytid, SysManTask; +rtsBool       IAmMainThread; +rtsBool       GlobalStopPending = rtsFalse; +              /* Handle unexpected messages correctly */ + +static           GlobalTaskId gtids[MAX_PES]; +static           GlobalTaskId sysman_id, sender_id, mainThread_id; +static unsigned  PEsTerminated = 0; +static rtsBool   Finishing = rtsFalse; +static long      PEbuffer[MAX_PES]; +nat              nPEs = 0; + +//@node Main fct, Auxiliary fcts, Variables, GUM System Manager Program +//@subsection Main fct + +//@cindex main +main (int argc, char **argv) { +  int rbufid; +  int opcode, nbytes; +  char **pargv; +  int i, cc, spawn_flag = PvmTaskDefault; +  char *petask, *pvmExecutable; +  rtsPacket addr; +   +  setbuf(stdout, NULL);  // disable buffering of stdout +  setbuf(stderr, NULL);  // disable buffering of stderr +   +  if (argc > 1) { +    if (*argv[1] == '-') { +      spawn_flag = PvmTaskDebug; +      argv[1] = argv[0]; +      argv++; argc--; +    } +    sysman_id = pvm_mytid();  /* This must be the first PVM call */ +     +    checkerr(sysman_id); +     +    /*  +       Get the full path and filename of the pvm executable (stashed in some +       PVM directory), and the number of PEs from the command line. +    */ +    pvmExecutable = argv[1]; +    nPEs = atoi(argv[2]); +     +    if ((petask = getenv(PETASK)) == NULL)  // PETASK set by driver +      petask = PETASK; + +    IF_PAR_DEBUG(verbose, +		 fprintf(stderr,"== [%x] nPEs (%s) = %d\n",  +			 sysman_id, petask, nPEs)); +     +    /* Check that we can create the number of PE and IMU tasks requested */ +    if (nPEs > MAX_PES) { +      fprintf(stderr,"SysMan: No more than %d PEs allowed (%d requested)\n",  +	   MAX_PES, nPEs); +      stg_exit(EXIT_FAILURE); +    } +    /*  +       Now create the PE Tasks. We spawn (nPEs-1) pvm threads: the Main Thread  +       (which starts execution and performs IO) is created by forking SysMan  +    */ +    nPEs--; +    if (nPEs > 0) { +      /* Initialise the PE task arguments from Sysman's arguments */ +      pargv = argv + 2; + +      IF_PAR_DEBUG(verbose, +		   fprintf(stderr, "== [%x] Spawning %d PEs(%s) ...\n",  +			   sysman_id, nPEs, petask); +		   fprintf(stderr, "  args: "); +		   for (i = 0; pargv[i]; ++i) +		     fprintf(stderr, "%s, ", pargv[i]); +		   fprintf(stderr, "\n")); + +      checkerr(pvm_spawn(petask, pargv, spawn_flag, "", nPEs, gtids)); +      /* +       * Stash the task-ids of the PEs away in a buffer, once we know  +       * the Main Thread's task-id, we'll broadcast them all. +       */	     +      for (i = 0; i < nPEs; i++) +	PEbuffer[i+1] = (long) gtids[i]; + +      IF_PAR_DEBUG(verbose, +		   fprintf(stderr,"== [%x] Spawned\n", sysman_id)); +    } +     +    /*  +       Create the MainThread PE by forking SysMan. This arcane coding  +       is required to allow MainThread to read stdin and write to stdout. +       PWT 18/1/96  +    */ +    nPEs++;                /* Record that the number of PEs is increasing */ +    if ((cc = fork())) { +      checkerr(cc);        /* Parent continues as SysMan */ +      IF_PAR_DEBUG(verbose, +		   fprintf(stderr,"== [%x] SysMan Task is [t%x]\n", sysman_id)); + +      /* +	SysMan joins PECTLGROUP, so that it can wait (at the +	barrier sysnchronisation a few instructions later) for the +	other PE-tasks to start. +	 +	The manager group (MGRGROUP) is vestigial at the moment. It +	may eventually include a statistics manager, and a (global)  +	garbage collector manager. +      */ +      checkerr(pvm_joingroup(PECTLGROUP)); +      IF_PAR_DEBUG(verbose, +		   fprintf(stderr,"== [%x] Joined PECTLGROUP \n", sysman_id)); + +      /* Wait for all the PEs to arrive */ +      checkerr(pvm_barrier(PECTLGROUP, nPEs + 1)); + +      IF_PAR_DEBUG(verbose, +		   fprintf(stderr,"== [%x] PECTLGROUP  barrier passed \n",  +			   sysman_id)); + +      /* Broadcast SysMan's ID, so Main Thread PE knows it */ +      pvm_initsend(PvmDataDefault); +      pvm_bcast(PEGROUP, PP_SYSMAN_TID); +       +      /* Wait for Main Thread to identify itself*/ +      addr = waitForPEOp(PP_MAIN_TASK, ANY_GLOBAL_TASK); +      pvm_bufinfo(addr, &nbytes, &opcode, &mainThread_id); +      PEbuffer[0] = mainThread_id; + +      IF_PAR_DEBUG(verbose, +		   fprintf(stderr,"== [%x] SysMan received Main Task = %x\n",  +			   sysman_id, mainThread_id)); + +      /* Now that we have them all, broadcast Global Task Ids of all PEs */ +      pvm_initsend(PvmDataDefault); +      PutArgs(PEbuffer, nPEs); +      pvm_bcast(PEGROUP, PP_PETIDS); + +      IF_PAR_DEBUG(verbose, +		   fprintf(stderr,"== [%x] Sysman successfully initialized!\n", +			   sysman_id)); + +//@cindex message handling loop +      /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ +      /* Main message handling loop                                         */ +      /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ +      /* Process incoming messages */ +      while (1) { +	if ((rbufid = pvm_recv(ANY_TASK, ANY_OPCODE)) < 0) +	  pvm_perror("Sysman: Receiving Message"); +	else { +	  pvm_bufinfo(rbufid, &nbytes, &opcode, &sender_id); + +	  /*  +	  IF_PAR_DEBUG(trace, +		       fprintf(stderr,"== [%x] SysMan: Message received by SysMan: rbufid=%x, nbytes = %d, opcode = %x, sender_id = %x\n", +			     sysman_id, rbufid, nbytes, opcode, sender_id)); +	  */ +	  switch (opcode) { +	  case PP_GC_INIT: +	    /* This Function not yet implemented for GUM */ +	    fprintf(stderr,"Global GC from %x Not yet implemented for GUM!\n",  +		  sender_id); +	    sync(PECTLGROUP, PP_FULL_SYSTEM); +	    broadcast(PEGROUP, PP_GC_INIT); +	    /*                DoGlobalGC();                */ +	    /*		      broadcast(PEGROUP, PP_INIT); */ +	    break; +	     +	  case PP_STATS_ON: +	    fprintf(stderr,"PP_STATS_ON (from %x) not yet implemented for GUM!\n",  +		  sender_id); +	    break; + +	  case PP_STATS_OFF: +	    fprintf(stderr,"PP_STATS_OFF (from %x) not yet implemented for GUM!\n",  +		  sender_id); +	    break; +	     +	  case PP_FINISH: +	    IF_PAR_DEBUG(verbose, +			 fprintf(stderr,"== [%x] Finish from %x\n",  +				 sysman_id, sender_id)); +	    if (!Finishing) { +	      Finishing = rtsTrue; +	      PEsTerminated = 1; +	      pvm_initsend(PvmDataDefault); +	      pvm_bcast(PEGROUP, PP_FINISH); +	    } else { +	      ++PEsTerminated; +	    } +	    if (PEsTerminated >= nPEs) { +	      IF_PAR_DEBUG(verbose, +			   fprintf(stderr,"== [%x] Global Shutdown, Goodbye!! (SysMan has received FINISHes from all PEs)\n",  +				   sysman_id)); +	      broadcast(PEGROUP, PP_FINISH); +	      broadcast(MGRGROUP, PP_FINISH); +	      pvm_lvgroup(PECTLGROUP); +	      pvm_lvgroup(MGRGROUP); +	      pvm_exit(); +	      exit(EXIT_SUCCESS); +	      /* Qapla'! */ +	    } +	    break; +	     +	  case PP_FAIL: +	    IF_PAR_DEBUG(verbose, +			 fprintf(stderr,"== [%x] Fail from %x\n",  +				 sysman_id, sender_id)); +	    if (!Finishing) { +	      Finishing = rtsTrue; +	      broadcast(PEGROUP, PP_FAIL); +	    } +	    break; +	     +	  default: +	    { +	     /*		           +	      char *opname = GetOpName(opcode); +	      fprintf(stderr,"Sysman: Unrecognised opcode %s (%x)\n", +	                      opname,opcode);	*/ +	      fprintf(stderr,"Qagh: Sysman: Unrecognised opcode (%x)\n", +		    opcode); +	    } +	    break; +	  } 	/* switch */ +	}		/* else */ +      }		/* while 1 */ +    }      		/* forked Sysman Process */ +    else { +      fprintf(stderr, "Main Thread PE has been forked; doing an execv(%s,...)\n",  +	      pvmExecutable); +      pvmendtask();		 /* Disconnect from PVM to avoid confusion: */ +      /* executable reconnects  */ +      *argv[0] = '-';		 /* Flag that this is the Main Thread PE */ +      execv(pvmExecutable,argv); /* Parent task becomes Main Thread PE */ +    } +  }			/* argc > 1 */   +}			/* main */ + +//@node Auxiliary fcts, Index, Main fct, GUM System Manager Program +//@subsection Auxiliary fcts + +/* + * This reproduced from RtsUtlis to save linking with a whole ball of wax + */ +/* result-checking malloc wrappers. */ + +//@cindex stgMallocBytes + +void * +stgMallocBytes (int n, char *msg) +{ +    char *space; + +    if ((space = (char *) malloc((size_t) n)) == NULL) { +	fflush(stdout); +	fprintf(stderr, msg); +	// MallocFailHook((W_) n, msg); /*msg*/ +	stg_exit(EXIT_FAILURE); +    } +    return space; +} + +/* Needed here because its used in loads of places like LLComms etc */ + +//@cindex stg_exit + +void stg_exit(n) +I_ n; +{ +    exit(n); +} + +//@node Index,  , Auxiliary fcts, GUM System Manager Program +//@subsection Index + +//@index +//* main::  @cindex\s-+main +//* message handling loop::  @cindex\s-+message handling loop +//* stgMallocBytes::  @cindex\s-+stgMallocBytes +//* stg_exit::  @cindex\s-+stg_exit +//@end index  | 
