diff options
author | Ömer Sinan Ağacan <omeragacan@gmail.com> | 2019-10-14 15:44:58 +0300 |
---|---|---|
committer | Ömer Sinan Ağacan <omeragacan@gmail.com> | 2020-01-31 12:21:10 +0300 |
commit | c846618ae0f8601515683a4c7677c20c3272a50f (patch) | |
tree | 22caa68b7c6cdf3464d078e556f7eac19400b0bf /compiler/main | |
parent | 01b15b835a7555c501df862b4dc8cc8eaff86afc (diff) | |
download | haskell-c846618ae0f8601515683a4c7677c20c3272a50f.tar.gz |
Do CafInfo/SRT analysis in Cmm
This patch removes all CafInfo predictions and various hacks to preserve
predicted CafInfos from the compiler and assigns final CafInfos to
interface Ids after code generation. SRT analysis is extended to support
static data, and Cmm generator is modified to allow generating
static_link fields after SRT analysis.
This also fixes `-fcatch-bottoms`, which introduces error calls in case
expressions in CorePrep, which runs *after* CoreTidy (which is where we
decide on CafInfos) and turns previously non-CAFFY things into CAFFY.
Fixes #17648
Fixes #9718
Evaluation
==========
NoFib
-----
Boot with: `make boot mode=fast`
Run: `make mode=fast EXTRA_RUNTEST_OPTS="-cachegrind" NoFibRuns=1`
--------------------------------------------------------------------------------
Program Size Allocs Instrs Reads Writes
--------------------------------------------------------------------------------
CS -0.0% 0.0% -0.0% -0.0% -0.0%
CSD -0.0% 0.0% -0.0% -0.0% -0.0%
FS -0.0% 0.0% -0.0% -0.0% -0.0%
S -0.0% 0.0% -0.0% -0.0% -0.0%
VS -0.0% 0.0% -0.0% -0.0% -0.0%
VSD -0.0% 0.0% -0.0% -0.0% -0.5%
VSM -0.0% 0.0% -0.0% -0.0% -0.0%
anna -0.1% 0.0% -0.0% -0.0% -0.0%
ansi -0.0% 0.0% -0.0% -0.0% -0.0%
atom -0.0% 0.0% -0.0% -0.0% -0.0%
awards -0.0% 0.0% -0.0% -0.0% -0.0%
banner -0.0% 0.0% -0.0% -0.0% -0.0%
bernouilli -0.0% 0.0% -0.0% -0.0% -0.0%
binary-trees -0.0% 0.0% -0.0% -0.0% -0.0%
boyer -0.0% 0.0% -0.0% -0.0% -0.0%
boyer2 -0.0% 0.0% -0.0% -0.0% -0.0%
bspt -0.0% 0.0% -0.0% -0.0% -0.0%
cacheprof -0.0% 0.0% -0.0% -0.0% -0.0%
calendar -0.0% 0.0% -0.0% -0.0% -0.0%
cichelli -0.0% 0.0% -0.0% -0.0% -0.0%
circsim -0.0% 0.0% -0.0% -0.0% -0.0%
clausify -0.0% 0.0% -0.0% -0.0% -0.0%
comp_lab_zift -0.0% 0.0% -0.0% -0.0% -0.0%
compress -0.0% 0.0% -0.0% -0.0% -0.0%
compress2 -0.0% 0.0% -0.0% -0.0% -0.0%
constraints -0.0% 0.0% -0.0% -0.0% -0.0%
cryptarithm1 -0.0% 0.0% -0.0% -0.0% -0.0%
cryptarithm2 -0.0% 0.0% -0.0% -0.0% -0.0%
cse -0.0% 0.0% -0.0% -0.0% -0.0%
digits-of-e1 -0.0% 0.0% -0.0% -0.0% -0.0%
digits-of-e2 -0.0% 0.0% -0.0% -0.0% -0.0%
dom-lt -0.0% 0.0% -0.0% -0.0% -0.0%
eliza -0.0% 0.0% -0.0% -0.0% -0.0%
event -0.0% 0.0% -0.0% -0.0% -0.0%
exact-reals -0.0% 0.0% -0.0% -0.0% -0.0%
exp3_8 -0.0% 0.0% -0.0% -0.0% -0.0%
expert -0.0% 0.0% -0.0% -0.0% -0.0%
fannkuch-redux -0.0% 0.0% -0.0% -0.0% -0.0%
fasta -0.0% 0.0% -0.0% -0.0% -0.0%
fem -0.0% 0.0% -0.0% -0.0% -0.0%
fft -0.0% 0.0% -0.0% -0.0% -0.0%
fft2 -0.0% 0.0% -0.0% -0.0% -0.0%
fibheaps -0.0% 0.0% -0.0% -0.0% -0.0%
fish -0.0% 0.0% -0.0% -0.0% -0.0%
fluid -0.1% 0.0% -0.0% -0.0% -0.0%
fulsom -0.0% 0.0% -0.0% -0.0% -0.0%
gamteb -0.0% 0.0% -0.0% -0.0% -0.0%
gcd -0.0% 0.0% -0.0% -0.0% -0.0%
gen_regexps -0.0% 0.0% -0.0% -0.0% -0.0%
genfft -0.0% 0.0% -0.0% -0.0% -0.0%
gg -0.0% 0.0% -0.0% -0.0% -0.0%
grep -0.0% 0.0% -0.0% -0.0% -0.0%
hidden -0.0% 0.0% -0.0% -0.0% -0.0%
hpg -0.1% 0.0% -0.0% -0.0% -0.0%
ida -0.0% 0.0% -0.0% -0.0% -0.0%
infer -0.0% 0.0% -0.0% -0.0% -0.0%
integer -0.0% 0.0% -0.0% -0.0% -0.0%
integrate -0.0% 0.0% -0.0% -0.0% -0.0%
k-nucleotide -0.0% 0.0% -0.0% -0.0% -0.0%
kahan -0.0% 0.0% -0.0% -0.0% -0.0%
knights -0.0% 0.0% -0.0% -0.0% -0.0%
lambda -0.0% 0.0% -0.0% -0.0% -0.0%
last-piece -0.0% 0.0% -0.0% -0.0% -0.0%
lcss -0.0% 0.0% -0.0% -0.0% -0.0%
life -0.0% 0.0% -0.0% -0.0% -0.0%
lift -0.0% 0.0% -0.0% -0.0% -0.0%
linear -0.1% 0.0% -0.0% -0.0% -0.0%
listcompr -0.0% 0.0% -0.0% -0.0% -0.0%
listcopy -0.0% 0.0% -0.0% -0.0% -0.0%
maillist -0.0% 0.0% -0.0% -0.0% -0.0%
mandel -0.0% 0.0% -0.0% -0.0% -0.0%
mandel2 -0.0% 0.0% -0.0% -0.0% -0.0%
mate -0.0% 0.0% -0.0% -0.0% -0.0%
minimax -0.0% 0.0% -0.0% -0.0% -0.0%
mkhprog -0.0% 0.0% -0.0% -0.0% -0.0%
multiplier -0.0% 0.0% -0.0% -0.0% -0.0%
n-body -0.0% 0.0% -0.0% -0.0% -0.0%
nucleic2 -0.0% 0.0% -0.0% -0.0% -0.0%
para -0.0% 0.0% -0.0% -0.0% -0.0%
paraffins -0.0% 0.0% -0.0% -0.0% -0.0%
parser -0.1% 0.0% -0.0% -0.0% -0.0%
parstof -0.1% 0.0% -0.0% -0.0% -0.0%
pic -0.0% 0.0% -0.0% -0.0% -0.0%
pidigits -0.0% 0.0% -0.0% -0.0% -0.0%
power -0.0% 0.0% -0.0% -0.0% -0.0%
pretty -0.0% 0.0% -0.3% -0.4% -0.4%
primes -0.0% 0.0% -0.0% -0.0% -0.0%
primetest -0.0% 0.0% -0.0% -0.0% -0.0%
prolog -0.0% 0.0% -0.0% -0.0% -0.0%
puzzle -0.0% 0.0% -0.0% -0.0% -0.0%
queens -0.0% 0.0% -0.0% -0.0% -0.0%
reptile -0.0% 0.0% -0.0% -0.0% -0.0%
reverse-complem -0.0% 0.0% -0.0% -0.0% -0.0%
rewrite -0.0% 0.0% -0.0% -0.0% -0.0%
rfib -0.0% 0.0% -0.0% -0.0% -0.0%
rsa -0.0% 0.0% -0.0% -0.0% -0.0%
scc -0.0% 0.0% -0.3% -0.5% -0.4%
sched -0.0% 0.0% -0.0% -0.0% -0.0%
scs -0.0% 0.0% -0.0% -0.0% -0.0%
simple -0.1% 0.0% -0.0% -0.0% -0.0%
solid -0.0% 0.0% -0.0% -0.0% -0.0%
sorting -0.0% 0.0% -0.0% -0.0% -0.0%
spectral-norm -0.0% 0.0% -0.0% -0.0% -0.0%
sphere -0.0% 0.0% -0.0% -0.0% -0.0%
symalg -0.0% 0.0% -0.0% -0.0% -0.0%
tak -0.0% 0.0% -0.0% -0.0% -0.0%
transform -0.0% 0.0% -0.0% -0.0% -0.0%
treejoin -0.0% 0.0% -0.0% -0.0% -0.0%
typecheck -0.0% 0.0% -0.0% -0.0% -0.0%
veritas -0.0% 0.0% -0.0% -0.0% -0.0%
wang -0.0% 0.0% -0.0% -0.0% -0.0%
wave4main -0.0% 0.0% -0.0% -0.0% -0.0%
wheel-sieve1 -0.0% 0.0% -0.0% -0.0% -0.0%
wheel-sieve2 -0.0% 0.0% -0.0% -0.0% -0.0%
x2n1 -0.0% 0.0% -0.0% -0.0% -0.0%
--------------------------------------------------------------------------------
Min -0.1% 0.0% -0.3% -0.5% -0.5%
Max -0.0% 0.0% -0.0% -0.0% -0.0%
Geometric Mean -0.0% -0.0% -0.0% -0.0% -0.0%
--------------------------------------------------------------------------------
Program Size Allocs Instrs Reads Writes
--------------------------------------------------------------------------------
circsim -0.1% 0.0% -0.0% -0.0% -0.0%
constraints -0.0% 0.0% -0.0% -0.0% -0.0%
fibheaps -0.0% 0.0% -0.0% -0.0% -0.0%
gc_bench -0.0% 0.0% -0.0% -0.0% -0.0%
hash -0.0% 0.0% -0.0% -0.0% -0.0%
lcss -0.0% 0.0% -0.0% -0.0% -0.0%
power -0.0% 0.0% -0.0% -0.0% -0.0%
spellcheck -0.0% 0.0% -0.0% -0.0% -0.0%
--------------------------------------------------------------------------------
Min -0.1% 0.0% -0.0% -0.0% -0.0%
Max -0.0% 0.0% -0.0% -0.0% -0.0%
Geometric Mean -0.0% +0.0% -0.0% -0.0% -0.0%
Manual inspection of programs in testsuite/tests/programs
---------------------------------------------------------
I built these programs with a bunch of dump flags and `-O` and compared
STG, Cmm, and Asm dumps and file sizes.
(Below the numbers in parenthesis show number of modules in the program)
These programs have identical compiler (same .hi and .o sizes, STG, and
Cmm and Asm dumps):
- Queens (1), andre_monad (1), cholewo-eval (2), cvh_unboxing (3),
andy_cherry (7), fun_insts (1), hs-boot (4), fast2haskell (2),
jl_defaults (1), jq_readsPrec (1), jules_xref (1), jtod_circint (4),
jules_xref2 (1), lennart_range (1), lex (1), life_space_leak (1),
bargon-mangler-bug (7), record_upd (1), rittri (1), sanders_array (1),
strict_anns (1), thurston-module-arith (2), okeefe_neural (1),
joao-circular (6), 10queens (1)
Programs with different compiler outputs:
- jl_defaults (1): For some reason GHC HEAD marks a lot of top-level
`[Int]` closures as CAFFY for no reason. With this patch we no longer
make them CAFFY and generate less SRT entries. For some reason Main.o
is slightly larger with this patch (1.3%) and the executable sizes are
the same. (I'd expect both to be smaller)
- launchbury (1): Same as jl_defaults: top-level `[Int]` closures marked
as CAFFY for no reason. Similarly `Main.o` is 1.4% larger but the
executable sizes are the same.
- galois_raytrace (13): Differences are in the Parse module. There are a
lot, but some of the changes are caused by the fact that for some
reason (I think a bug) GHC HEAD marks the dictionary for `Functor
Identity` as CAFFY. Parse.o is 0.4% larger, the executable size is the
same.
- north_array: We now generate less SRT entries because some of array
primops used in this program like `NewArrayOp` get eliminated during
Stg-to-Cmm and turn some CAFFY things into non-CAFFY. Main.o gets 24%
larger (9224 bytes from 9000 bytes), executable sizes are the same.
- seward-space-leak: Difference in this program is better shown by this
smaller example:
module Lib where
data CDS
= Case [CDS] [(Int, CDS)]
| Call CDS CDS
instance Eq CDS where
Case sels1 rets1 == Case sels2 rets2 =
sels1 == sels2 && rets1 == rets2
Call a1 b1 == Call a2 b2 =
a1 == a2 && b1 == b2
_ == _ =
False
In this program GHC HEAD builds a new SRT for the recursive group of
`(==)`, `(/=)` and the dictionary closure. Then `/=` points to `==`
in its SRT field, and `==` uses the SRT object as its SRT. With this
patch we use the closure for `/=` as the SRT and add `==` there. Then
`/=` gets an empty SRT field and `==` points to `/=` in its SRT
field.
This change looks fine to me.
Main.o gets 0.07% larger, executable sizes are identical.
head.hackage
------------
head.hackage's CI script builds 428 packages from Hackage using this
patch with no failures.
Compiler performance
--------------------
The compiler perf tests report that the compiler allocates slightly more
(worst case observed so far is 4%). However most programs in the test
suite are small, single file programs. To benchmark compiler performance
on something more realistic I build Cabal (the library, 236 modules)
with different optimisation levels. For the "max residency" row I run
GHC with `+RTS -s -A100k -i0 -h` for more accurate numbers. Other rows
are generated with just `-s`. (This is because `-i0` causes running GC
much more frequently and as a result "bytes copied" gets inflated by
more than 25x in some cases)
* -O0
| | GHC HEAD | This MR | Diff |
| --------------- | -------------- | -------------- | ------ |
| Bytes allocated | 54,413,350,872 | 54,701,099,464 | +0.52% |
| Bytes copied | 4,926,037,184 | 4,990,638,760 | +1.31% |
| Max residency | 421,225,624 | 424,324,264 | +0.73% |
* -O1
| | GHC HEAD | This MR | Diff |
| --------------- | --------------- | --------------- | ------ |
| Bytes allocated | 245,849,209,992 | 246,562,088,672 | +0.28% |
| Bytes copied | 26,943,452,560 | 27,089,972,296 | +0.54% |
| Max residency | 982,643,440 | 991,663,432 | +0.91% |
* -O2
| | GHC HEAD | This MR | Diff |
| --------------- | --------------- | --------------- | ------ |
| Bytes allocated | 291,044,511,408 | 291,863,910,912 | +0.28% |
| Bytes copied | 37,044,237,616 | 36,121,690,472 | -2.49% |
| Max residency | 1,071,600,328 | 1,086,396,256 | +1.38% |
Extra compiler allocations
--------------------------
Runtime allocations of programs are as reported above (NoFib section).
The compiler now allocates more than before. Main source of allocation
in this patch compared to base commit is the new SRT algorithm
(GHC.Cmm.Info.Build). Below is some of the extra work we do with this
patch, numbers generated by profiled stage 2 compiler when building a
pathological case (the test 'ManyConstructors') with '-O2':
- We now sort the final STG for a module, which means traversing the
entire program, generating free variable set for each top-level
binding, doing SCC analysis, and re-ordering the program. In
ManyConstructors this step allocates 97,889,952 bytes.
- We now do SRT analysis on static data, which in a program like
ManyConstructors causes analysing 10,000 bindings that we would
previously just skip. This step allocates 70,898,352 bytes.
- We now maintain an SRT map for the entire module as we compile Cmm
groups:
data ModuleSRTInfo = ModuleSRTInfo
{ ...
, moduleSRTMap :: SRTMap
}
(SRTMap is just a strict Map from the 'containers' library)
This map gets an entry for most bindings in a module (exceptions are
THUNKs and CAFFY static functions). For ManyConstructors this map
gets 50015 entries.
- Once we're done with code generation we generate a NameSet from SRTMap
for the non-CAFFY names in the current module. This set gets the same
number of entries as the SRTMap.
- Finally we update CafInfos in ModDetails for the non-CAFFY Ids, using
the NameSet generated in the previous step. This usually does the
least amount of allocation among the work listed here.
Only place with this patch where we do less work in the CAF analysis in
the tidying pass (CoreTidy). However that doesn't save us much, as the
pass still needs to traverse the whole program and update IdInfos for
other reasons. Only thing we don't here do is the `hasCafRefs` pass over
the RHS of bindings, which is a stateless pass that returns a boolean
value, so it doesn't allocate much.
(Metric changes blow are all increased allocations)
Metric changes
--------------
Metric Increase:
ManyAlternatives
ManyConstructors
T13035
T14683
T1969
T9961
Diffstat (limited to 'compiler/main')
-rw-r--r-- | compiler/main/DriverPipeline.hs | 16 | ||||
-rw-r--r-- | compiler/main/DynFlags.hs | 17 | ||||
-rw-r--r-- | compiler/main/Hooks.hs | 7 | ||||
-rw-r--r-- | compiler/main/HscMain.hs | 30 | ||||
-rw-r--r-- | compiler/main/UpdateCafInfos.hs | 141 |
5 files changed, 174 insertions, 37 deletions
diff --git a/compiler/main/DriverPipeline.hs b/compiler/main/DriverPipeline.hs index 823d3d75ff..0781b1a6d8 100644 --- a/compiler/main/DriverPipeline.hs +++ b/compiler/main/DriverPipeline.hs @@ -69,6 +69,7 @@ import Ar import Bag ( unitBag ) import FastString ( mkFastString ) import GHC.Iface.Utils ( mkFullIface ) +import UpdateCafInfos ( updateModDetailsCafInfos ) import Exception import System.Directory @@ -228,8 +229,8 @@ compileOne' m_tc_result mHscMessage hscs_iface_dflags = iface_dflags }, HscInterpreted) -> do -- In interpreted mode the regular codeGen backend is not run so we -- generate a interface without codeGen info. - final_iface <- mkFullIface hsc_env'{hsc_dflags=iface_dflags} partial_iface - liftIO $ hscMaybeWriteIface dflags final_iface mb_old_iface_hash mod_location + final_iface <- mkFullIface hsc_env'{hsc_dflags=iface_dflags} partial_iface Nothing + liftIO $ hscMaybeWriteIface dflags final_iface mb_old_iface_hash (ms_location summary) (hasStub, comp_bc, spt_entries) <- hscInteractive hsc_env' cgguts mod_location @@ -1188,15 +1189,12 @@ runPhase (HscOut src_flavour mod_name result) _ dflags = do PipeState{hsc_env=hsc_env'} <- getPipeState - (outputFilename, mStub, foreign_files) <- liftIO $ + (outputFilename, mStub, foreign_files, caf_infos) <- liftIO $ hscGenHardCode hsc_env' cgguts mod_location output_fn - final_iface <- liftIO (mkFullIface hsc_env'{hsc_dflags=iface_dflags} partial_iface) - -- TODO(osa): ModIface and ModDetails need to be in sync, - -- but we only generate ModIface with the backend info. See - -- !2100 for more discussion on this. This will be fixed - -- with !1304 or !2100. - setIface final_iface mod_details + final_iface <- liftIO (mkFullIface hsc_env'{hsc_dflags=iface_dflags} partial_iface (Just caf_infos)) + let final_mod_details = updateModDetailsCafInfos caf_infos mod_details + setIface final_iface final_mod_details -- See Note [Writing interface files] let if_dflags = dflags `gopt_unset` Opt_BuildDynamicToo diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs index 5c5d01c546..be40ff9e2e 100644 --- a/compiler/main/DynFlags.hs +++ b/compiler/main/DynFlags.hs @@ -427,6 +427,7 @@ data DumpFlag | Opt_D_dump_cmm_split | Opt_D_dump_cmm_info | Opt_D_dump_cmm_cps + | Opt_D_dump_srts -- end cmm subflags | Opt_D_dump_cfg_weights -- ^ Dump the cfg used for block layout. | Opt_D_dump_asm @@ -3358,6 +3359,8 @@ dynamic_flags_deps = [ (setDumpFlag Opt_D_dump_cmm_info) , make_ord_flag defGhcFlag "ddump-cmm-cps" (setDumpFlag Opt_D_dump_cmm_cps) + , make_ord_flag defGhcFlag "ddump-srts" + (setDumpFlag Opt_D_dump_srts) , make_ord_flag defGhcFlag "ddump-cfg-weights" (setDumpFlag Opt_D_dump_cfg_weights) , make_ord_flag defGhcFlag "ddump-core-stats" @@ -4791,20 +4794,6 @@ optLevelFlags -- see Note [Documenting optimisation flags] -- Static Argument Transformation needs investigation. See #9374 ] -{- Note [Eta-reduction in -O0] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -#11562 showed an example which tripped an ASSERT in CoreToStg; a -function was marked as MayHaveCafRefs when in fact it obviously -didn't. Reason was: - * Eta reduction wasn't happening in the simplifier, but it was - happening in CorePrep, on - $fBla = MkDict (/\a. K a) - * Result: rhsIsStatic told GHC.Iface.Tidy that $fBla might have CAF refs - but the eta-reduced version (MkDict K) obviously doesn't -Simple solution: just let the simplifier do eta-reduction even in -O0. -After all, CorePrep does it unconditionally! Not a big deal, but -removes an assertion failure. -} - -- ----------------------------------------------------------------------------- -- Standard sets of warning options diff --git a/compiler/main/Hooks.hs b/compiler/main/Hooks.hs index 8caebfc556..064f96c33e 100644 --- a/compiler/main/Hooks.hs +++ b/compiler/main/Hooks.hs @@ -3,7 +3,8 @@ -- NB: this module is SOURCE-imported by DynFlags, and should primarily -- refer to *types*, rather than *code* -{-# LANGUAGE CPP #-} +{-# LANGUAGE CPP, RankNTypes #-} + module Hooks ( Hooks , emptyHooks , lookupHook @@ -107,8 +108,8 @@ data Hooks = Hooks , createIservProcessHook :: Maybe (CreateProcess -> IO ProcessHandle) , stgToCmmHook :: Maybe (DynFlags -> Module -> [TyCon] -> CollectedCCs -> [CgStgTopBinding] -> HpcInfo -> Stream IO CmmGroup ()) - , cmmToRawCmmHook :: Maybe (DynFlags -> Maybe Module -> Stream IO CmmGroup () - -> IO (Stream IO RawCmmGroup ())) + , cmmToRawCmmHook :: forall a . Maybe (DynFlags -> Maybe Module -> Stream IO CmmGroupSRTs a + -> IO (Stream IO RawCmmGroup a)) } getHooked :: (Functor f, HasDynFlags f) => (Hooks -> Maybe a) -> a -> f a diff --git a/compiler/main/HscMain.hs b/compiler/main/HscMain.hs index 1c27542270..391b989915 100644 --- a/compiler/main/HscMain.hs +++ b/compiler/main/HscMain.hs @@ -133,6 +133,7 @@ import CostCentre import ProfInit import TyCon import Name +import NameSet import GHC.Cmm import GHC.Cmm.Parser ( parseCmmFile ) import GHC.Cmm.Info.Build @@ -173,6 +174,7 @@ import System.IO (fixIO) import qualified Data.Map as M import qualified Data.Set as S import Data.Set (Set) +import Data.Functor import Control.DeepSeq (force) import GHC.Iface.Ext.Ast ( mkHieFile ) @@ -1405,7 +1407,7 @@ hscWriteIface dflags iface no_change mod_location = do -- | Compile to hard-code. hscGenHardCode :: HscEnv -> CgGuts -> ModLocation -> FilePath - -> IO (FilePath, Maybe FilePath, [(ForeignSrcLang, FilePath)]) + -> IO (FilePath, Maybe FilePath, [(ForeignSrcLang, FilePath)], NameSet) -- ^ @Just f@ <=> _stub.c is f hscGenHardCode hsc_env cgguts location output_filename = do let CgGuts{ -- This is the last use of the ModGuts in a compilation. @@ -1464,11 +1466,11 @@ hscGenHardCode hsc_env cgguts location output_filename = do return a rawcmms1 = Stream.mapM dump rawcmms0 - (output_filename, (_stub_h_exists, stub_c_exists), foreign_fps, ()) + (output_filename, (_stub_h_exists, stub_c_exists), foreign_fps, caf_infos) <- {-# SCC "codeOutput" #-} codeOutput dflags this_mod output_filename location foreign_stubs foreign_files dependencies rawcmms1 - return (output_filename, stub_c_exists, foreign_fps) + return (output_filename, stub_c_exists, foreign_fps, caf_infos) hscInteractive :: HscEnv @@ -1514,7 +1516,16 @@ hscCompileCmmFile hsc_env filename output_filename = runHsc hsc_env $ do -- lest we reproduce #11784. mod_name = mkModuleName $ "Cmm$" ++ FilePath.takeFileName filename cmm_mod = mkModule (thisPackage dflags) mod_name - (_, cmmgroup) <- cmmPipeline hsc_env (emptySRT cmm_mod) cmm + + -- Compile decls in Cmm files one decl at a time, to avoid re-ordering + -- them in SRT analysis. + -- + -- Re-ordering here causes breakage when booting with C backend because + -- in C we must declare before use, but SRT algorithm is free to + -- re-order [A, B] (B refers to A) when A is not CAFFY and return [B, A] + cmmgroup <- + concatMapM (\cmm -> snd <$> cmmPipeline hsc_env (emptySRT cmm_mod) [cmm]) cmm + unless (null cmmgroup) $ dumpIfSet_dyn dflags Opt_D_dump_cmm "Output Cmm" FormatCMM (ppr cmmgroup) @@ -1535,7 +1546,7 @@ doCodeGen :: HscEnv -> Module -> [TyCon] -> CollectedCCs -> [StgTopBinding] -> HpcInfo - -> IO (Stream IO CmmGroup ()) + -> IO (Stream IO CmmGroupSRTs NameSet) -- Note we produce a 'Stream' of CmmGroups, so that the -- backend can be run incrementally. Otherwise it generates all -- the C-- up front, which has a significant space cost. @@ -1565,18 +1576,15 @@ doCodeGen hsc_env this_mod data_tycons pipeline_stream = {-# SCC "cmmPipeline" #-} - let run_pipeline = cmmPipeline hsc_env - in void $ Stream.mapAccumL run_pipeline (emptySRT this_mod) ppr_stream1 + Stream.mapAccumL (cmmPipeline hsc_env) (emptySRT this_mod) ppr_stream1 + <&> (srtMapNonCAFs . moduleSRTMap) dump2 a = do unless (null a) $ dumpIfSet_dyn dflags Opt_D_dump_cmm "Output Cmm" FormatCMM (ppr a) return a - ppr_stream2 = Stream.mapM dump2 pipeline_stream - - return ppr_stream2 - + return (Stream.mapM dump2 pipeline_stream) myCoreToStg :: DynFlags -> Module -> CoreProgram diff --git a/compiler/main/UpdateCafInfos.hs b/compiler/main/UpdateCafInfos.hs new file mode 100644 index 0000000000..c5e81150fe --- /dev/null +++ b/compiler/main/UpdateCafInfos.hs @@ -0,0 +1,141 @@ +{-# LANGUAGE CPP, BangPatterns, Strict, RecordWildCards #-} + +module UpdateCafInfos + ( updateModDetailsCafInfos + ) where + +import GhcPrelude + +import CoreSyn +import HscTypes +import Id +import IdInfo +import InstEnv +import NameEnv +import NameSet +import Util +import Var +import Outputable + +#include "HsVersions.h" + +-- | Update CafInfos of all occurences (in rules, unfoldings, class instances) +updateModDetailsCafInfos + :: NameSet -- ^ Non-CAFFY names in the module. Names not in this set are CAFFY. + -> ModDetails -- ^ ModDetails to update + -> ModDetails +updateModDetailsCafInfos non_cafs mod_details = + {- pprTrace "updateModDetailsCafInfos" (text "non_cafs:" <+> ppr non_cafs) $ -} + let + ModDetails{ md_types = type_env -- for unfoldings + , md_insts = insts + , md_rules = rules + } = mod_details + + -- type TypeEnv = NameEnv TyThing + ~type_env' = mapNameEnv (updateTyThingCafInfos type_env' non_cafs) type_env + -- Not strict! + + !insts' = strictMap (updateInstCafInfos type_env' non_cafs) insts + !rules' = strictMap (updateRuleCafInfos type_env') rules + in + mod_details{ md_types = type_env' + , md_insts = insts' + , md_rules = rules' + } + +-------------------------------------------------------------------------------- +-- Rules +-------------------------------------------------------------------------------- + +updateRuleCafInfos :: TypeEnv -> CoreRule -> CoreRule +updateRuleCafInfos _ rule@BuiltinRule{} = rule +updateRuleCafInfos type_env Rule{ .. } = Rule { ru_rhs = updateGlobalIds type_env ru_rhs, .. } + +-------------------------------------------------------------------------------- +-- Instances +-------------------------------------------------------------------------------- + +updateInstCafInfos :: TypeEnv -> NameSet -> ClsInst -> ClsInst +updateInstCafInfos type_env non_cafs = + updateClsInstDFun (updateIdUnfolding type_env . updateIdCafInfo non_cafs) + +-------------------------------------------------------------------------------- +-- TyThings +-------------------------------------------------------------------------------- + +updateTyThingCafInfos :: TypeEnv -> NameSet -> TyThing -> TyThing + +updateTyThingCafInfos type_env non_cafs (AnId id) = + AnId (updateIdUnfolding type_env (updateIdCafInfo non_cafs id)) + +updateTyThingCafInfos _ _ other = other -- AConLike, ATyCon, ACoAxiom + +-------------------------------------------------------------------------------- +-- Unfoldings +-------------------------------------------------------------------------------- + +updateIdUnfolding :: TypeEnv -> Id -> Id +updateIdUnfolding type_env id = + case idUnfolding id of + CoreUnfolding{ .. } -> + setIdUnfolding id CoreUnfolding{ uf_tmpl = updateGlobalIds type_env uf_tmpl, .. } + DFunUnfolding{ .. } -> + setIdUnfolding id DFunUnfolding{ df_args = map (updateGlobalIds type_env) df_args, .. } + _ -> id + +-------------------------------------------------------------------------------- +-- Expressions +-------------------------------------------------------------------------------- + +updateIdCafInfo :: NameSet -> Id -> Id +updateIdCafInfo non_cafs id + | idName id `elemNameSet` non_cafs + = -- pprTrace "updateIdCafInfo" (text "Marking" <+> ppr id <+> parens (ppr (idName id)) <+> text "as non-CAFFY") $ + id `setIdCafInfo` NoCafRefs + | otherwise + = id + +-------------------------------------------------------------------------------- + +updateGlobalIds :: NameEnv TyThing -> CoreExpr -> CoreExpr +-- Update occurrences of GlobalIds as directed by 'env' +-- The 'env' maps a GlobalId to a version with accurate CAF info +-- (and in due course perhaps other back-end-related info) +updateGlobalIds env e = go env e + where + go_id :: NameEnv TyThing -> Id -> Id + go_id env var = + case lookupNameEnv env (varName var) of + Nothing -> var + Just (AnId id) -> id + Just other -> pprPanic "UpdateCafInfos.updateGlobalIds" $ + text "Found a non-Id for Id Name" <+> ppr (varName var) $$ + nest 4 (text "Id:" <+> ppr var $$ + text "TyThing:" <+> ppr other) + + go :: NameEnv TyThing -> CoreExpr -> CoreExpr + go env (Var v) = Var (go_id env v) + go _ e@Lit{} = e + go env (App e1 e2) = App (go env e1) (go env e2) + go env (Lam b e) = assertNotInNameEnv env [b] (Lam b (go env e)) + go env (Let bs e) = Let (go_binds env bs) (go env e) + go env (Case e b ty alts) = + assertNotInNameEnv env [b] (Case (go env e) b ty (map go_alt alts)) + where + go_alt (k,bs,e) = assertNotInNameEnv env bs (k, bs, go env e) + go env (Cast e c) = Cast (go env e) c + go env (Tick t e) = Tick t (go env e) + go _ e@Type{} = e + go _ e@Coercion{} = e + + go_binds :: NameEnv TyThing -> CoreBind -> CoreBind + go_binds env (NonRec b e) = + assertNotInNameEnv env [b] (NonRec b (go env e)) + go_binds env (Rec prs) = + assertNotInNameEnv env (map fst prs) (Rec (mapSnd (go env) prs)) + +-- In `updateGlobaLIds` Names of local binders should not shadow Name of +-- globals. This assertion is to check that. +assertNotInNameEnv :: NameEnv a -> [Id] -> b -> b +assertNotInNameEnv env ids x = ASSERT(not (any (\id -> elemNameEnv (idName id) env) ids)) x |