summaryrefslogtreecommitdiff
path: root/compiler/profiling
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/profiling')
-rw-r--r--compiler/profiling/CostCentre.lhs373
-rw-r--r--compiler/profiling/NOTES301
-rw-r--r--compiler/profiling/SCCfinal.lhs411
3 files changed, 1085 insertions, 0 deletions
diff --git a/compiler/profiling/CostCentre.lhs b/compiler/profiling/CostCentre.lhs
new file mode 100644
index 0000000000..3ee46a88db
--- /dev/null
+++ b/compiler/profiling/CostCentre.lhs
@@ -0,0 +1,373 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+\section[CostCentre]{The @CostCentre@ data type}
+
+\begin{code}
+module CostCentre (
+ CostCentre(..), CcName, IsDupdCC(..), IsCafCC(..),
+ -- All abstract except to friend: ParseIface.y
+
+ CostCentreStack,
+ CollectedCCs,
+ noCCS, subsumedCCS, currentCCS, overheadCCS, dontCareCCS,
+ noCostCentre, noCCAttached,
+ noCCSAttached, isCurrentCCS, isSubsumedCCS, currentOrSubsumedCCS,
+ isDerivedFromCurrentCCS, maybeSingletonCCS,
+ decomposeCCS,
+
+ mkUserCC, mkAutoCC, mkAllCafsCC,
+ mkSingletonCCS, dupifyCC, pushCCOnCCS,
+ isCafCCS, isCafCC,
+ isSccCountCostCentre,
+ sccAbleCostCentre,
+ ccFromThisModule,
+
+ pprCostCentreCore,
+ costCentreUserName,
+
+ cmpCostCentre -- used for removing dups in a list
+ ) where
+
+#include "HsVersions.h"
+
+import Var ( Id )
+import Name ( getOccName, occNameFS )
+import Module ( Module, moduleFS )
+import Outputable
+import FastTypes
+import FastString
+import Util ( thenCmp )
+\end{code}
+
+A Cost Centre Stack is something that can be attached to a closure.
+This is either:
+
+ - the current cost centre stack (CCCS)
+ - a pre-defined cost centre stack (there are several
+ pre-defined CCSs, see below).
+
+\begin{code}
+data CostCentreStack
+ = NoCCS
+
+ | CurrentCCS -- Pinned on a let(rec)-bound
+ -- thunk/function/constructor, this says that the
+ -- cost centre to be attached to the object, when it
+ -- is allocated, is whatever is in the
+ -- current-cost-centre-stack register.
+
+ | SubsumedCCS -- Cost centre stack for top-level subsumed functions
+ -- (CAFs get an AllCafsCC).
+ -- Its execution costs get subsumed into the caller.
+ -- This guy is *only* ever pinned on static closures,
+ -- and is *never* the cost centre for an SCC construct.
+
+ | OverheadCCS -- We charge costs due to the profiling-system
+ -- doing its work to "overhead".
+ --
+ -- Objects whose CCS is "Overhead"
+ -- have their *allocation* charged to "overhead",
+ -- but have the current CCS put into the object
+ -- itself.
+
+ -- For example, if we transform "f g" to "let
+ -- g' = g in f g'" (so that something about
+ -- profiling works better...), then we charge
+ -- the *allocation* of g' to OverheadCCS, but
+ -- we put the cost-centre of the call to f
+ -- (i.e., current CCS) into the g' object. When
+ -- g' is entered, the CCS of the call
+ -- to f will be set.
+
+ | DontCareCCS -- We need a CCS to stick in static closures
+ -- (for data), but we *don't* expect them to
+ -- accumulate any costs. But we still need
+ -- the placeholder. This CCS is it.
+
+ | PushCC CostCentre CostCentreStack
+ -- These are used during code generation as the CCSs
+ -- attached to closures. A PushCC never appears as
+ -- the argument to an _scc_.
+ --
+ -- The tail (2nd argument) is either NoCCS, indicating
+ -- a staticly allocated CCS, or CurrentCCS indicating
+ -- a dynamically created CCS. We only support
+ -- statically allocated *singleton* CCSs at the
+ -- moment, for the purposes of initialising the CCS
+ -- field of a CAF.
+
+ deriving (Eq, Ord) -- needed for Ord on CLabel
+\end{code}
+
+A Cost Centre is the argument of an _scc_ expression.
+
+\begin{code}
+data CostCentre
+ = NoCostCentre -- Having this constructor avoids having
+ -- to use "Maybe CostCentre" all the time.
+
+ | NormalCC {
+ cc_name :: CcName, -- Name of the cost centre itself
+ cc_mod :: Module, -- Name of module defining this CC.
+ cc_is_dupd :: IsDupdCC, -- see below
+ cc_is_caf :: IsCafCC -- see below
+ }
+
+ | AllCafsCC {
+ cc_mod :: Module -- Name of module defining this CC.
+ }
+
+type CcName = FastString
+
+data IsDupdCC
+ = OriginalCC -- This says how the CC is *used*. Saying that
+ | DupdCC -- it is DupdCC doesn't make it a different
+ -- CC, just that it a sub-expression which has
+ -- been moved ("dupd") into a different scope.
+ --
+ -- The point about a dupd SCC is that we don't
+ -- count entries to it, because it's not the
+ -- "original" one.
+ --
+ -- In the papers, it's called "SCCsub",
+ -- i.e. SCCsub CC == SCC DupdCC,
+ -- but we are trying to avoid confusion between
+ -- "subd" and "subsumed". So we call the former
+ -- "dupd".
+
+data IsCafCC = CafCC | NotCafCC
+
+-- synonym for triple which describes the cost centre info in the generated
+-- code for a module.
+type CollectedCCs
+ = ( [CostCentre] -- local cost-centres that need to be decl'd
+ , [CostCentre] -- "extern" cost-centres
+ , [CostCentreStack] -- pre-defined "singleton" cost centre stacks
+ )
+\end{code}
+
+WILL: Would there be any merit to recording ``I am now using a
+cost-centre from another module''? I don't know if this would help a
+user; it might be interesting to us to know how much computation is
+being moved across module boundaries.
+
+SIMON: Maybe later...
+
+\begin{code}
+
+noCCS = NoCCS
+subsumedCCS = SubsumedCCS
+currentCCS = CurrentCCS
+overheadCCS = OverheadCCS
+dontCareCCS = DontCareCCS
+
+noCostCentre = NoCostCentre
+\end{code}
+
+Predicates on Cost-Centre Stacks
+
+\begin{code}
+noCCSAttached NoCCS = True
+noCCSAttached _ = False
+
+noCCAttached NoCostCentre = True
+noCCAttached _ = False
+
+isCurrentCCS CurrentCCS = True
+isCurrentCCS _ = False
+
+isSubsumedCCS SubsumedCCS = True
+isSubsumedCCS _ = False
+
+isCafCCS (PushCC cc NoCCS) = isCafCC cc
+isCafCCS _ = False
+
+isDerivedFromCurrentCCS CurrentCCS = True
+isDerivedFromCurrentCCS (PushCC _ ccs) = isDerivedFromCurrentCCS ccs
+isDerivedFromCurrentCCS _ = False
+
+currentOrSubsumedCCS SubsumedCCS = True
+currentOrSubsumedCCS CurrentCCS = True
+currentOrSubsumedCCS _ = False
+
+maybeSingletonCCS (PushCC cc NoCCS) = Just cc
+maybeSingletonCCS _ = Nothing
+\end{code}
+
+Building cost centres
+
+\begin{code}
+mkUserCC :: FastString -> Module -> CostCentre
+mkUserCC cc_name mod
+ = NormalCC { cc_name = cc_name, cc_mod = mod,
+ cc_is_dupd = OriginalCC, cc_is_caf = NotCafCC {-might be changed-}
+ }
+
+mkAutoCC :: Id -> Module -> IsCafCC -> CostCentre
+mkAutoCC id mod is_caf
+ = NormalCC { cc_name = occNameFS (getOccName id), cc_mod = mod,
+ cc_is_dupd = OriginalCC, cc_is_caf = is_caf
+ }
+
+mkAllCafsCC m = AllCafsCC { cc_mod = m }
+
+
+
+mkSingletonCCS :: CostCentre -> CostCentreStack
+mkSingletonCCS cc = pushCCOnCCS cc NoCCS
+
+pushCCOnCCS :: CostCentre -> CostCentreStack -> CostCentreStack
+pushCCOnCCS = PushCC
+
+dupifyCC cc = cc {cc_is_dupd = DupdCC}
+
+isCafCC, isDupdCC :: CostCentre -> Bool
+
+isCafCC (AllCafsCC {}) = True
+isCafCC (NormalCC {cc_is_caf = CafCC}) = True
+isCafCC _ = False
+
+isDupdCC (NormalCC {cc_is_dupd = DupdCC}) = True
+isDupdCC _ = False
+
+isSccCountCostCentre :: CostCentre -> Bool
+ -- Is this a cost-centre which records scc counts
+
+#if DEBUG
+isSccCountCostCentre NoCostCentre = panic "isSccCount:NoCostCentre"
+#endif
+isSccCountCostCentre cc | isCafCC cc = False
+ | isDupdCC cc = False
+ | otherwise = True
+
+sccAbleCostCentre :: CostCentre -> Bool
+ -- Is this a cost-centre which can be sccd ?
+
+#if DEBUG
+sccAbleCostCentre NoCostCentre = panic "sccAbleCC:NoCostCentre"
+#endif
+sccAbleCostCentre cc | isCafCC cc = False
+ | otherwise = True
+
+ccFromThisModule :: CostCentre -> Module -> Bool
+ccFromThisModule cc m = cc_mod cc == m
+\end{code}
+
+\begin{code}
+instance Eq CostCentre where
+ c1 == c2 = case c1 `cmpCostCentre` c2 of { EQ -> True; _ -> False }
+
+instance Ord CostCentre where
+ compare = cmpCostCentre
+
+cmpCostCentre :: CostCentre -> CostCentre -> Ordering
+
+cmpCostCentre (AllCafsCC {cc_mod = m1}) (AllCafsCC {cc_mod = m2}) = m1 `compare` m2
+
+cmpCostCentre (NormalCC {cc_name = n1, cc_mod = m1, cc_is_caf = c1})
+ (NormalCC {cc_name = n2, cc_mod = m2, cc_is_caf = c2})
+ -- first key is module name, then we use "kinds" (which include
+ -- names) and finally the caf flag
+ = (m1 `compare` m2) `thenCmp` (n1 `compare` n2) `thenCmp` (c1 `cmp_caf` c2)
+
+cmpCostCentre other_1 other_2
+ = let
+ tag1 = tag_CC other_1
+ tag2 = tag_CC other_2
+ in
+ if tag1 <# tag2 then LT else GT
+ where
+ tag_CC (NormalCC {}) = (_ILIT 1 :: FastInt)
+ tag_CC (AllCafsCC {}) = _ILIT 2
+
+cmp_caf NotCafCC CafCC = LT
+cmp_caf NotCafCC NotCafCC = EQ
+cmp_caf CafCC CafCC = EQ
+cmp_caf CafCC NotCafCC = GT
+
+decomposeCCS :: CostCentreStack -> ([CostCentre],CostCentreStack)
+decomposeCCS (PushCC cc ccs) = (cc:more, ccs')
+ where (more,ccs') = decomposeCCS ccs
+decomposeCCS ccs = ([],ccs)
+\end{code}
+
+-----------------------------------------------------------------------------
+Printing Cost Centre Stacks.
+
+The outputable instance for CostCentreStack prints the CCS as a C
+expression.
+
+NOTE: Not all cost centres are suitable for using in a static
+initializer. In particular, the PushCC forms where the tail is CCCS
+may only be used in inline C code because they expand to a
+non-constant C expression.
+
+\begin{code}
+instance Outputable CostCentreStack where
+ ppr NoCCS = ptext SLIT("NO_CCS")
+ ppr CurrentCCS = ptext SLIT("CCCS")
+ ppr OverheadCCS = ptext SLIT("CCS_OVERHEAD")
+ ppr DontCareCCS = ptext SLIT("CCS_DONT_CARE")
+ ppr SubsumedCCS = ptext SLIT("CCS_SUBSUMED")
+ ppr (PushCC cc NoCCS) = ppr cc <> ptext SLIT("_ccs")
+ ppr (PushCC cc ccs) = ptext SLIT("PushCostCentre") <>
+ parens (ppr ccs <> comma <>
+ parens(ptext SLIT("void *")) <> ppr cc)
+\end{code}
+
+-----------------------------------------------------------------------------
+Printing Cost Centres.
+
+There are several different ways in which we might want to print a
+cost centre:
+
+ - the name of the cost centre, for profiling output (a C string)
+ - the label, i.e. C label for cost centre in .hc file.
+ - the debugging name, for output in -ddump things
+ - the interface name, for printing in _scc_ exprs in iface files.
+
+The last 3 are derived from costCentreStr below. The first is given
+by costCentreName.
+
+\begin{code}
+instance Outputable CostCentre where
+ ppr cc = getPprStyle $ \ sty ->
+ if codeStyle sty
+ then ppCostCentreLbl cc
+ else text (costCentreUserName cc)
+
+-- Printing in an interface file or in Core generally
+pprCostCentreCore (AllCafsCC {cc_mod = m})
+ = text "__sccC" <+> braces (ppr_mod m)
+pprCostCentreCore (NormalCC {cc_name = n, cc_mod = m,
+ cc_is_caf = caf, cc_is_dupd = dup})
+ = text "__scc" <+> braces (hsep [
+ ftext (zEncodeFS n),
+ ppr_mod m,
+ pp_dup dup,
+ pp_caf caf
+ ])
+
+pp_dup DupdCC = char '!'
+pp_dup other = empty
+
+pp_caf CafCC = text "__C"
+pp_caf other = empty
+
+ppr_mod m = ftext (zEncodeFS (moduleFS m))
+
+-- Printing as a C label
+ppCostCentreLbl (NoCostCentre) = text "NONE_cc"
+ppCostCentreLbl (AllCafsCC {cc_mod = m}) = ppr m <> text "_CAFs_cc"
+ppCostCentreLbl (NormalCC {cc_name = n, cc_mod = m, cc_is_caf = is_caf})
+ = ppr_mod m <> ftext (zEncodeFS n) <>
+ text (case is_caf of { CafCC -> "_CAF"; _ -> "" }) <> text "_cc"
+
+-- This is the name to go in the user-displayed string,
+-- recorded in the cost centre declaration
+costCentreUserName (NoCostCentre) = "NO_CC"
+costCentreUserName (AllCafsCC {}) = "CAF"
+costCentreUserName cc@(NormalCC {cc_name = name, cc_is_caf = is_caf})
+ = case is_caf of { CafCC -> "CAF:"; _ -> "" } ++ unpackFS name
+\end{code}
diff --git a/compiler/profiling/NOTES b/compiler/profiling/NOTES
new file mode 100644
index 0000000000..c50cf562e3
--- /dev/null
+++ b/compiler/profiling/NOTES
@@ -0,0 +1,301 @@
+Profiling Implementation Notes -- June/July/Sept 1994
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Simon and Will
+
+Pre-code-generator-ish
+~~~~~~~~~~~~~~~~~~~~~~
+
+* Automagic insertion of _sccs_ on...
+
+ - If -auto is specified, add _scc_ on each *exported* top-level definition.
+ NB this includes CAFs. Done by addAutoCostCentres (Core-to-Core pass).
+
+ - If -auto-all is specified, add _scc_ on *all* top-level definitions.
+ Done by same pass.
+
+ - Always: just before code generation of module M, onto any CAF
+ which hasn't already got an explicit cost centre attached, pin
+ "AllCAFs-M".
+
+ Done by finalStgMassageForProfiling (final STG-to-STG pass)
+
+ Only the one-off costs of evaluating the CAFs will be attributed
+ to the AllCAFs-M cost centre. We hope that these costs will be
+ small; since the _scc_s are introduced automatically it's
+ confusing to attribute any significant costs to them. However if
+ there *are* significant one-off costs we'd better know about it.
+
+ Why so late in the compilation process? We aren't *absolutely*
+ sure what is and isn't a CAF until *just* before code generation.
+ So we don't want to mark them as such until then.
+
+ - Individual DICTs
+
+ We do it in the desugarer, because that's the *only* point at
+ which we *know* exactly what bindings are introduced by
+ overloading. NB should include bindings for selected methods, eg
+
+ f d = let op = _scc_ DICT op_sel d in
+ ...op...op...op
+
+ The DICT CC ensures that:
+ (a) [minor] that the selection cost is separately attributed
+ (b) [major] that the cost of executing op is attributed to
+ its call site, eg
+
+ ...(scc "a" op)...(scc "b" op)...(scc "c" op)...
+
+* Automagic "boxing" of higher-order args:
+
+ finalStgMassageForProfiling (final STG-to-STG pass)
+
+ This (as well as CAF stuff above) is really quite separate
+ from the other business of finalStgMassageForProfiling
+ (collecting up CostCentres that need to be
+ declared/registered).
+
+ But throwing it all into the pot together means that we don't
+ have to have Yet Another STG Syntax Walker.
+
+ Furthermore, these "boxes" are really just let-bindings that
+ many other parts of the compiler will happily substitute away!
+ Doing them at the very last instant prevents this.
+
+ A down side of doing these so late is that we get lots of
+ "let"s, which if generated earlier and not substituted away,
+ could be floated outwards. Having them floated outwards would
+ lessen the chance of skewing profiling results (because of
+ gratuitous "let"s added by the compiler into the inner loop of
+ some program...). The allocation itself will be attributed to
+ profiling overhead; the only thing which'll be skewed is time measurement.
+
+ So if we have, post-boxing-higher-order-args...
+
+ _scc_ "foo" ( let f' = [f] \ [] f
+ in
+ map f' xs )
+
+ ... we want "foo" to be put in the thunk for "f'", but we want the
+ allocation cost (heap census stuff) to be attr to OVERHEAD.
+
+ As an example of what could be improved
+ f = _scc_ "f" (g h)
+ To save dynamic allocation, we could have a static closure for h:
+ h_inf = _scc_ "f" h
+ f = _scc_ "f" (g h_inf)
+
+
+
+
+
+Code generator-ish
+~~~~~~~~~~~~~~~~~~
+
+(1) _Entry_ code for a closure *usually* sets CC from the closure,
+ at the fast entry point
+
+ Exceptions:
+
+ (a) Top-level subsumed functions (i.e., w/ no _scc_ on them)
+
+ Refrain from setting CC from the closure
+
+ (b) Constructors
+
+ Again, refrain. (This is *new*)
+
+ Reasons: (i) The CC will be zapped very shortly by the restore
+ of the enclosing CC when we return to the eval'ing "case".
+ (ii) Any intervening updates will indirect to this existing
+ constructor (...mumble... new update mechanism... mumble...)
+
+(2) "_scc_ cc expr"
+
+ Set current CC to "cc".
+ No later "restore" of the previous CC is reqd.
+
+(3) "case e of { ...alts... }" expression (eval)
+
+ Save CC before eval'ing scrutinee
+ Restore CC at the start of the case-alternative(s)
+
+(4) _Updates_ : updatee gets current CC
+
+ (???? not sure this is OK yet 94/07/04)
+
+ Reasons:
+
+ * Constructors : want to be insensitive to return-in-heap vs
+ return-in-regs. For example,
+
+ f x = _scc_ "f" (x, x)
+
+ The pair (x,x) would get CC of "f" if returned-in-heap;
+ therefore, updatees should get CC of "f".
+
+ * PAPs : Example:
+
+ f x = _scc_ "f" (let g = \ y -> ... in g)
+
+ At the moment of update (updatePAP?), CC is "f", which
+ is what we want to set it to if the "updatee" is entered
+
+ When we enter the PAP ("please put the arguments back so I can
+ use them"), we restore the setup as at the moment the
+ arg-satisfaction check failed.
+
+ Be careful! UPDATE_PAP is called from the arg-satis check,
+ which is before the fast entry point. So the cost centre
+ won't yet have been set from the closure which has just
+ been entered. Solution: in UPDATE_PAP see if the cost centre inside
+ the function closure which is being entered is "SUB"; if so, use
+ the current cost centre to update the updatee; otherwise use that
+ inside the function closure. (See the computation of cc_pap
+ in rule 16_l for lexical semantics.)
+
+
+(5) CAFs
+
+CAFs get their own cost centre. Ie
+
+ x = e
+is transformed to
+ x = _scc_ "CAF:x" e
+
+Or sometimes we lump all the CAFs in a module together.
+(Reporting issue or code-gen issue?)
+
+
+
+Hybrid stuff
+~~~~~~~~~~~~
+
+The problem:
+
+ f = _scc_ "CAF:f" (let g = \xy -> ...
+ in (g,g))
+
+Now, g has cost-centre "CAF:f", and is returned as part of
+the result. So whenever the function embedded in the result
+is called, the costs will accumulate to "CAF:f". This is
+particularly (de)pressing for dictionaries, which contain lots
+of functions.
+
+Solution:
+
+ A. Whenever in case (1) above we would otherwise "set the CC from the
+ closure", we *refrain* from doing so if
+ (a) the closure is a function, not a thunk; and
+ (b) the cost-centre in the closure is a CAF cost centre.
+
+ B. Whenever we enter a thunk [at least, one which might return a function]
+ we save the current cost centre in the update frame. Then, UPDATE_PAP
+ restores the saved cost centre from the update frame iff the cost
+ centre at the point of update (cc_pap in (4) above) is a CAF cost centre.
+
+ It isn't necessary to save and possibly-restore the cost centre for
+ thunks which will certainly return a constructor, because the
+ cost centre is about to be restored anyway by the enclosing case.
+
+Both A and B are runtime tests. For A, consider:
+
+ f = _scc_ "CAF:f" (g 2)
+
+ h y = _scc_ "h" g (y+y)
+
+ g x = let w = \p -> ...
+ in (w,w)
+
+
+Now, in the call to g from h, the cost-centre on w will be "h", and
+indeed all calls to the result of the call should be attributed to
+"h".
+
+ ... _scc_ "x1" (let (t,_) = h 2 in t 3) ...
+
+ Costs of executing (w 3) attributed to "h".
+
+But in the call to g from f, the cost-centre on w will be
+"CAF:f", and calls to w should be attributed to the call site.
+
+ ..._scc_ "x2" (let (t,_) = f in t 3)...
+
+ Costs of executing (w 3) attributed to "x2".
+
+
+ Remaining problem
+
+Consider
+
+ _scc_ "CAF:f" (if expensive then g 2 else g 3)
+
+where g is a function with arity 2. In theory we should
+restore the enclosing cost centre once we've reduced to
+(g 2) or (g 3). In practice this is pretty tiresome; and pretty rare.
+
+A quick fix: given (_scc_ "CAF" e) where e might be function-valued
+(in practice we usually know, because CAF sccs are top level), transform to
+
+ _scc_ "CAF" (let f = e in f)
+
+
+
+
+
+============
+
+scc cc x ===> x
+
+ UNLESS
+
+(a) cc is a user-defined, non-dup'd cost
+ centre (so we care about entry counts)
+
+OR
+
+(b) cc is not a CAF/DICT cost centre and x is top-level subsumed
+ function.
+ [If x is lambda/let bound it'll have a cost centre
+ attached dynamically.]
+
+ To repeat, the transformation is OK if
+ x is a not top-level subsumed function
+ OR
+ cc is a CAF/DICT cost centre and x is a top-level
+ subsumed function
+
+
+
+(scc cc e) x ===> (scc cc e x)
+
+ OK????? IFF
+
+cc is not CAF/DICT --- remains to be proved!!!!!!
+True for lex
+False for eval
+Can we tell which in hybrid?
+
+eg Is this ok?
+
+ (scc "f" (scc "CAF" (\x.b))) y ==> (scc "f" (scc "CAF" (\x.b) y))
+
+
+\x -> (scc cc e) ===> (scc cc \x->e)
+
+ OK IFF cc is not CAF/DICT
+
+
+scc cc1 (scc cc2 e)) ===> scc cc2 e
+
+ IFF not interested in cc1's entry count
+ AND cc2 is not CAF/DICT
+
+(scc cc1 ... (scc cc2 e) ...) ===> (scc cc1 ... e ...)
+
+ IFF cc2 is CAF/DICT
+ AND e is a lambda not appearing as the RHS of a let
+ OR
+ e is a variable not bound to SUB
+
+
diff --git a/compiler/profiling/SCCfinal.lhs b/compiler/profiling/SCCfinal.lhs
new file mode 100644
index 0000000000..c95db9c358
--- /dev/null
+++ b/compiler/profiling/SCCfinal.lhs
@@ -0,0 +1,411 @@
+%
+% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
+%
+\section[SCCfinal]{Modify and collect code generation for final STG program}
+
+This is now a sort-of-normal STG-to-STG pass (WDP 94/06), run by stg2stg.
+
+* Traverses the STG program collecting the cost centres. These are
+ required to declare the cost centres at the start of code
+ generation.
+
+ Note: because of cross-module unfolding, some of these cost centres
+ may be from other modules. But will still have to give them
+ "extern" declarations.
+
+* Puts on CAF cost-centres if the user has asked for individual CAF
+ cost-centres.
+
+* Ditto for individual DICT cost-centres.
+
+* Boxes top-level inherited functions passed as arguments.
+
+* "Distributes" given cost-centres to all as-yet-unmarked RHSs.
+
+\begin{code}
+module SCCfinal ( stgMassageForProfiling ) where
+
+#include "HsVersions.h"
+
+import StgSyn
+
+import Packages ( HomeModules )
+import StaticFlags ( opt_AutoSccsOnIndividualCafs )
+import CostCentre -- lots of things
+import Id ( Id )
+import Module ( Module )
+import UniqSupply ( uniqFromSupply, splitUniqSupply, UniqSupply )
+import Unique ( Unique )
+import VarSet
+import ListSetOps ( removeDups )
+import Outputable
+
+infixr 9 `thenMM`, `thenMM_`
+\end{code}
+
+\begin{code}
+stgMassageForProfiling
+ :: HomeModules
+ -> Module -- module name
+ -> UniqSupply -- unique supply
+ -> [StgBinding] -- input
+ -> (CollectedCCs, [StgBinding])
+
+stgMassageForProfiling pdeps mod_name us stg_binds
+ = let
+ ((local_ccs, extern_ccs, cc_stacks),
+ stg_binds2)
+ = initMM mod_name us (do_top_bindings stg_binds)
+
+ (fixed_ccs, fixed_cc_stacks)
+ = if opt_AutoSccsOnIndividualCafs
+ then ([],[]) -- don't need "all CAFs" CC
+ -- (for Prelude, we use PreludeCC)
+ else ([all_cafs_cc], [all_cafs_ccs])
+
+ local_ccs_no_dups = fst (removeDups cmpCostCentre local_ccs)
+ extern_ccs_no_dups = fst (removeDups cmpCostCentre extern_ccs)
+ in
+ ((fixed_ccs ++ local_ccs_no_dups,
+ extern_ccs_no_dups,
+ fixed_cc_stacks ++ cc_stacks), stg_binds2)
+ where
+
+ all_cafs_cc = mkAllCafsCC mod_name
+ all_cafs_ccs = mkSingletonCCS all_cafs_cc
+
+ ----------
+ do_top_bindings :: [StgBinding] -> MassageM [StgBinding]
+
+ do_top_bindings [] = returnMM []
+
+ do_top_bindings (StgNonRec b rhs : bs)
+ = do_top_rhs b rhs `thenMM` \ rhs' ->
+ addTopLevelIshId b (
+ do_top_bindings bs `thenMM` \bs' ->
+ returnMM (StgNonRec b rhs' : bs')
+ )
+
+ do_top_bindings (StgRec pairs : bs)
+ = addTopLevelIshIds binders (
+ mapMM do_pair pairs `thenMM` \ pairs2 ->
+ do_top_bindings bs `thenMM` \ bs' ->
+ returnMM (StgRec pairs2 : bs')
+ )
+ where
+ binders = map fst pairs
+ do_pair (b, rhs)
+ = do_top_rhs b rhs `thenMM` \ rhs2 ->
+ returnMM (b, rhs2)
+
+ ----------
+ do_top_rhs :: Id -> StgRhs -> MassageM StgRhs
+
+ do_top_rhs binder (StgRhsClosure _ bi fv u srt [] (StgSCC cc (StgConApp con args)))
+ | not (isSccCountCostCentre cc) && not (isDllConApp pdeps con args)
+ -- Trivial _scc_ around nothing but static data
+ -- Eliminate _scc_ ... and turn into StgRhsCon
+
+ -- isDllConApp checks for LitLit args too
+ = returnMM (StgRhsCon dontCareCCS con args)
+
+{- Can't do this one with cost-centre stacks: --SDM
+ do_top_rhs binder (StgRhsClosure no_cc bi fv u [] (StgSCC ty cc expr))
+ | (noCCSAttached no_cc || currentOrSubsumedCCS no_cc)
+ && not (isSccCountCostCentre cc)
+ -- Top level CAF without a cost centre attached
+ -- Attach and collect cc of trivial _scc_ in body
+ = collectCC cc `thenMM_`
+ set_prevailing_cc cc (do_expr expr) `thenMM` \ expr' ->
+ returnMM (StgRhsClosure cc bi fv u [] expr')
+-}
+
+ do_top_rhs binder (StgRhsClosure no_cc bi fv u srt [] body)
+ | noCCSAttached no_cc || currentOrSubsumedCCS no_cc
+ -- Top level CAF without a cost centre attached
+ -- Attach CAF cc (collect if individual CAF ccs)
+ = (if opt_AutoSccsOnIndividualCafs
+ then let cc = mkAutoCC binder mod_name CafCC
+ ccs = mkSingletonCCS cc
+ in
+ collectCC cc `thenMM_`
+ collectCCS ccs `thenMM_`
+ returnMM ccs
+ else
+ returnMM all_cafs_ccs) `thenMM` \ caf_ccs ->
+ set_prevailing_cc caf_ccs (do_expr body) `thenMM` \ body' ->
+ returnMM (StgRhsClosure caf_ccs bi fv u srt [] body')
+
+ do_top_rhs binder (StgRhsClosure cc bi fv u srt [] body)
+ -- Top level CAF with cost centre attached
+ -- Should this be a CAF cc ??? Does this ever occur ???
+ = pprPanic "SCCfinal: CAF with cc:" (ppr cc)
+
+ do_top_rhs binder (StgRhsClosure no_ccs bi fv u srt args body)
+ -- Top level function, probably subsumed
+ | noCCSAttached no_ccs
+ = set_lambda_cc (do_expr body) `thenMM` \ body' ->
+ returnMM (StgRhsClosure subsumedCCS bi fv u srt args body')
+
+ | otherwise
+ = pprPanic "SCCfinal: CAF with cc:" (ppr no_ccs)
+
+ do_top_rhs binder (StgRhsCon ccs con args)
+ -- Top-level (static) data is not counted in heap
+ -- profiles; nor do we set CCCS from it; so we
+ -- just slam in dontCareCostCentre
+ = returnMM (StgRhsCon dontCareCCS con args)
+
+ ------
+ do_expr :: StgExpr -> MassageM StgExpr
+
+ do_expr (StgLit l) = returnMM (StgLit l)
+
+ do_expr (StgApp fn args)
+ = boxHigherOrderArgs (StgApp fn) args
+
+ do_expr (StgConApp con args)
+ = boxHigherOrderArgs (\args -> StgConApp con args) args
+
+ do_expr (StgOpApp con args res_ty)
+ = boxHigherOrderArgs (\args -> StgOpApp con args res_ty) args
+
+ do_expr (StgSCC cc expr) -- Ha, we found a cost centre!
+ = collectCC cc `thenMM_`
+ do_expr expr `thenMM` \ expr' ->
+ returnMM (StgSCC cc expr')
+
+ do_expr (StgCase expr fv1 fv2 bndr srt alt_type alts)
+ = do_expr expr `thenMM` \ expr' ->
+ mapMM do_alt alts `thenMM` \ alts' ->
+ returnMM (StgCase expr' fv1 fv2 bndr srt alt_type alts')
+ where
+ do_alt (id, bs, use_mask, e)
+ = do_expr e `thenMM` \ e' ->
+ returnMM (id, bs, use_mask, e')
+
+ do_expr (StgLet b e)
+ = do_let b e `thenMM` \ (b,e) ->
+ returnMM (StgLet b e)
+
+ do_expr (StgLetNoEscape lvs1 lvs2 b e)
+ = do_let b e `thenMM` \ (b,e) ->
+ returnMM (StgLetNoEscape lvs1 lvs2 b e)
+
+#ifdef DEBUG
+ do_expr other = pprPanic "SCCfinal.do_expr" (ppr other)
+#endif
+
+ ----------------------------------
+
+ do_let (StgNonRec b rhs) e
+ = do_rhs rhs `thenMM` \ rhs' ->
+ addTopLevelIshId b (
+ do_expr e `thenMM` \ e' ->
+ returnMM (StgNonRec b rhs',e')
+ )
+
+ do_let (StgRec pairs) e
+ = addTopLevelIshIds binders (
+ mapMM do_pair pairs `thenMM` \ pairs' ->
+ do_expr e `thenMM` \ e' ->
+ returnMM (StgRec pairs', e')
+ )
+ where
+ binders = map fst pairs
+ do_pair (b, rhs)
+ = do_rhs rhs `thenMM` \ rhs2 ->
+ returnMM (b, rhs2)
+
+ ----------------------------------
+ do_rhs :: StgRhs -> MassageM StgRhs
+ -- We play much the same game as we did in do_top_rhs above;
+ -- but we don't have to worry about cafs etc.
+
+{-
+ do_rhs (StgRhsClosure closure_cc bi fv u [] (StgSCC ty cc (StgCon (DataCon con) args _)))
+ | not (isSccCountCostCentre cc)
+ = collectCC cc `thenMM_`
+ returnMM (StgRhsCon cc con args)
+-}
+
+ do_rhs (StgRhsClosure _ bi fv u srt args expr)
+ = slurpSCCs currentCCS expr `thenMM` \ (expr', ccs) ->
+ do_expr expr' `thenMM` \ expr'' ->
+ returnMM (StgRhsClosure ccs bi fv u srt args expr'')
+ where
+ slurpSCCs ccs (StgSCC cc e)
+ = collectCC cc `thenMM_`
+ slurpSCCs (cc `pushCCOnCCS` ccs) e
+ slurpSCCs ccs e
+ = returnMM (e, ccs)
+
+ do_rhs (StgRhsCon cc con args)
+ = returnMM (StgRhsCon currentCCS con args)
+\end{code}
+
+%************************************************************************
+%* *
+\subsection{Boxing higher-order args}
+%* *
+%************************************************************************
+
+Boxing is *turned off* at the moment, until we can figure out how to
+do it properly in general.
+
+\begin{code}
+boxHigherOrderArgs
+ :: ([StgArg] -> StgExpr)
+ -- An application lacking its arguments
+ -> [StgArg] -- arguments which we might box
+ -> MassageM StgExpr
+
+#ifndef PROF_DO_BOXING
+boxHigherOrderArgs almost_expr args
+ = returnMM (almost_expr args)
+#else
+boxHigherOrderArgs almost_expr args
+ = getTopLevelIshIds `thenMM` \ ids ->
+ mapAccumMM (do_arg ids) [] args `thenMM` \ (let_bindings, new_args) ->
+ returnMM (foldr (mk_stg_let currentCCS) (almost_expr new_args) let_bindings)
+ where
+ ---------------
+
+ do_arg ids bindings arg@(StgVarArg old_var)
+ | (not (isLocalVar old_var) || elemVarSet old_var ids)
+ && isFunTy (dropForAlls var_type)
+ = -- make a trivial let-binding for the top-level function
+ getUniqueMM `thenMM` \ uniq ->
+ let
+ new_var = mkSysLocal FSLIT("sf") uniq var_type
+ in
+ returnMM ( (new_var, old_var) : bindings, StgVarArg new_var )
+ where
+ var_type = idType old_var
+
+ do_arg ids bindings arg = returnMM (bindings, arg)
+
+ ---------------
+ mk_stg_let :: CostCentreStack -> (Id, Id) -> StgExpr -> StgExpr
+
+ mk_stg_let cc (new_var, old_var) body
+ = let
+ rhs_body = StgApp old_var [{-args-}]
+ rhs_closure = StgRhsClosure cc stgArgOcc [{-fvs-}] ReEntrant NoSRT{-eeek!!!-} [{-args-}] rhs_body
+ in
+ StgLet (StgNonRec new_var rhs_closure) body
+ where
+ bOGUS_LVs = emptyUniqSet -- easier to print than: panic "mk_stg_let: LVs"
+#endif
+\end{code}
+
+%************************************************************************
+%* *
+\subsection{Boring monad stuff for this}
+%* *
+%************************************************************************
+
+\begin{code}
+type MassageM result
+ = Module -- module name
+ -> CostCentreStack -- prevailing CostCentre
+ -- if none, subsumedCosts at top-level
+ -- currentCostCentre at nested levels
+ -> UniqSupply
+ -> VarSet -- toplevel-ish Ids for boxing
+ -> CollectedCCs
+ -> (CollectedCCs, result)
+
+-- the initMM function also returns the final CollectedCCs
+
+initMM :: Module -- module name, which we may consult
+ -> UniqSupply
+ -> MassageM a
+ -> (CollectedCCs, a)
+
+initMM mod_name init_us m = m mod_name noCCS init_us emptyVarSet ([],[],[])
+
+thenMM :: MassageM a -> (a -> MassageM b) -> MassageM b
+thenMM_ :: MassageM a -> (MassageM b) -> MassageM b
+
+thenMM expr cont mod scope_cc us ids ccs
+ = case splitUniqSupply us of { (s1, s2) ->
+ case (expr mod scope_cc s1 ids ccs) of { (ccs2, result) ->
+ cont result mod scope_cc s2 ids ccs2 }}
+
+thenMM_ expr cont mod scope_cc us ids ccs
+ = case splitUniqSupply us of { (s1, s2) ->
+ case (expr mod scope_cc s1 ids ccs) of { (ccs2, _) ->
+ cont mod scope_cc s2 ids ccs2 }}
+
+returnMM :: a -> MassageM a
+returnMM result mod scope_cc us ids ccs = (ccs, result)
+
+nopMM :: MassageM ()
+nopMM mod scope_cc us ids ccs = (ccs, ())
+
+mapMM :: (a -> MassageM b) -> [a] -> MassageM [b]
+mapMM f [] = returnMM []
+mapMM f (m:ms)
+ = f m `thenMM` \ r ->
+ mapMM f ms `thenMM` \ rs ->
+ returnMM (r:rs)
+
+mapAccumMM :: (acc -> x -> MassageM (acc, y)) -> acc -> [x] -> MassageM (acc, [y])
+mapAccumMM f b [] = returnMM (b, [])
+mapAccumMM f b (m:ms)
+ = f b m `thenMM` \ (b2, r) ->
+ mapAccumMM f b2 ms `thenMM` \ (b3, rs) ->
+ returnMM (b3, r:rs)
+
+getUniqueMM :: MassageM Unique
+getUniqueMM mod scope_cc us ids ccs = (ccs, uniqFromSupply us)
+
+addTopLevelIshId :: Id -> MassageM a -> MassageM a
+addTopLevelIshId id scope mod scope_cc us ids ccs
+ | isCurrentCCS scope_cc = scope mod scope_cc us ids ccs
+ | otherwise = scope mod scope_cc us (extendVarSet ids id) ccs
+
+addTopLevelIshIds :: [Id] -> MassageM a -> MassageM a
+addTopLevelIshIds [] cont = cont
+addTopLevelIshIds (id:ids) cont
+ = addTopLevelIshId id (addTopLevelIshIds ids cont)
+
+getTopLevelIshIds :: MassageM VarSet
+getTopLevelIshIds mod scope_cc us ids ccs = (ccs, ids)
+\end{code}
+
+The prevailing CCS is used to tell whether we're in a top-levelish
+position, where top-levelish is defined as "not inside a lambda".
+Prevailing CCs used to be used for something much more complicated,
+I'm sure --SDM
+
+\begin{code}
+set_lambda_cc :: MassageM a -> MassageM a
+set_lambda_cc action mod scope_cc us ids ccs
+ = action mod currentCCS us ids ccs
+
+set_prevailing_cc :: CostCentreStack -> MassageM a -> MassageM a
+set_prevailing_cc cc_to_set_to action mod scope_cc us ids ccs
+ = action mod cc_to_set_to us ids ccs
+
+get_prevailing_cc :: MassageM CostCentreStack
+get_prevailing_cc mod scope_cc us ids ccs = (ccs, scope_cc)
+\end{code}
+
+\begin{code}
+collectCC :: CostCentre -> MassageM ()
+
+collectCC cc mod_name scope_cc us ids (local_ccs, extern_ccs, ccss)
+ = ASSERT(not (noCCAttached cc))
+ if (cc `ccFromThisModule` mod_name) then
+ ((cc : local_ccs, extern_ccs, ccss), ())
+ else -- must declare it "extern"
+ ((local_ccs, cc : extern_ccs, ccss), ())
+
+collectCCS :: CostCentreStack -> MassageM ()
+
+collectCCS ccs mod_name scope_cc us ids (local_ccs, extern_ccs, ccss)
+ = ASSERT(not (noCCSAttached ccs))
+ ((local_ccs, extern_ccs, ccs : ccss), ())
+\end{code}