summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2016-12-20 14:32:11 +0000
committerSimon Marlow <marlowsd@gmail.com>2017-01-06 15:53:36 +0000
commit3a18baff06abc193569b1b76358da26375b3c8d6 (patch)
treec80e30dc27fb548eca50b9697d1fdd2a248a891a
parent508811004d1806b28a91c3ff4a5c2247e2ad4655 (diff)
downloadhaskell-3a18baff06abc193569b1b76358da26375b3c8d6.tar.gz
More fixes for #5654
* In stg_ap_0_fast, if we're evaluating a thunk, the thunk might evaluate to a function in which case we may have to adjust its CCS. * The interpreter has its own implementation of stg_ap_0_fast, so we have to do the same shenanigans with creating empty PAPs and copying PAPs there. * GHCi creates Cost Centres as children of CCS_MAIN, which enterFunCCS() wrongly assumed to imply that they were CAFs. Now we use the is_caf flag for this, which we have to correctly initialise when we create a Cost Centre in GHCi.
-rw-r--r--includes/stg/MiscClosures.h1
-rw-r--r--rts/Apply.cmm27
-rw-r--r--rts/Interpreter.c72
-rw-r--r--rts/Printer.c5
-rw-r--r--rts/Profiling.c6
-rw-r--r--rts/StgMiscClosures.cmm10
-rw-r--r--testsuite/tests/codeGen/should_run/cgrun057.stderr2
-rw-r--r--testsuite/tests/profiling/should_run/T680.prof.sample65
-rw-r--r--testsuite/tests/profiling/should_run/all.T3
-rw-r--r--testsuite/tests/profiling/should_run/toplevel_scc_1.prof.sample41
10 files changed, 170 insertions, 62 deletions
diff --git a/includes/stg/MiscClosures.h b/includes/stg/MiscClosures.h
index b3f9a69472..994b369c46 100644
--- a/includes/stg/MiscClosures.h
+++ b/includes/stg/MiscClosures.h
@@ -63,6 +63,7 @@ RTS_RET(stg_maskUninterruptiblezh_ret);
RTS_RET(stg_maskAsyncExceptionszh_ret);
RTS_RET(stg_stack_underflow_frame);
RTS_RET(stg_restore_cccs);
+RTS_RET(stg_restore_cccs_eval);
// RTS_FUN(stg_interp_constr1_entry);
// RTS_FUN(stg_interp_constr2_entry);
diff --git a/rts/Apply.cmm b/rts/Apply.cmm
index b18c347d40..b3a04ca58c 100644
--- a/rts/Apply.cmm
+++ b/rts/Apply.cmm
@@ -52,6 +52,10 @@ stg_ap_0_fast ( P_ fun )
The mechanism we use to wrap the function is to create a
zero-argument PAP as a proxy object to hold the new CCS, and return
that.
+
+ If the closure we evaluated is itself a PAP, we cannot make a nested
+ PAP, so we copy the original PAP and set the CCS in the new PAP to
+ enterFunCCS(pap->header.prof.ccs).
*/
again:
@@ -122,6 +126,8 @@ again:
CCS_ALLOC(BYTES_TO_WDS(SIZEOF_StgPAP), CCS_OVERHEAD);
P_ pap;
pap = Hp - size + WDS(1);
+ // We'll lose the original PAP, so we should enter its CCS
+ ccall enterFunCCS(BaseReg "ptr", StgHeader_ccs(fun) "ptr");
SET_HDR(pap, stg_PAP_info, CCCS);
StgPAP_arity(pap) = StgPAP_arity(fun);
StgPAP_n_args(pap) = StgPAP_n_args(fun);
@@ -137,6 +143,27 @@ again:
goto loop;
}
}
+ case AP,
+ AP_STACK,
+ BLACKHOLE,
+ WHITEHOLE,
+ THUNK,
+ THUNK_1_0,
+ THUNK_0_1,
+ THUNK_2_0,
+ THUNK_1_1,
+ THUNK_0_2,
+ THUNK_STATIC,
+ THUNK_SELECTOR:
+ {
+ // The thunk might evaluate to a function, so we have to come
+ // back here again to adjust its CCS if necessary. The
+ // stg_restore_ccs_eval stack frame does that.
+ STK_CHK_GEN();
+ jump %ENTRY_CODE(info)
+ (stg_restore_cccs_eval_info, CCCS)
+ (UNTAG(fun));
+ }
default:
{
jump %ENTRY_CODE(info) (UNTAG(fun));
diff --git a/rts/Interpreter.c b/rts/Interpreter.c
index 7221ff64f9..5a395670b7 100644
--- a/rts/Interpreter.c
+++ b/rts/Interpreter.c
@@ -214,6 +214,48 @@ void interp_shutdown ( void )
#endif
+#ifdef PROFILING
+
+//
+// Build a zero-argument PAP with the current CCS
+// See Note [Evaluating functions with profiling] in Apply.cmm
+//
+STATIC_INLINE
+StgClosure * newEmptyPAP (Capability *cap,
+ StgClosure *tagged_obj, // a FUN or a BCO
+ uint32_t arity)
+{
+ StgPAP *pap = (StgPAP *)allocate(cap, sizeofW(StgPAP));
+ SET_HDR(pap, &stg_PAP_info, cap->r.rCCCS);
+ pap->arity = arity;
+ pap->n_args = 0;
+ pap->fun = tagged_obj;
+ return (StgClosure *)pap;
+}
+
+//
+// Make an exact copy of a PAP, except that we combine the current CCS with the
+// CCS in the PAP. See Note [Evaluating functions with profiling] in Apply.cmm
+//
+STATIC_INLINE
+StgClosure * copyPAP (Capability *cap, StgPAP *oldpap)
+{
+ uint32_t size = PAP_sizeW(oldpap->n_args);
+ StgPAP *pap = (StgPAP *)allocate(cap, size);
+ enterFunCCS(&cap->r, oldpap->header.prof.ccs);
+ SET_HDR(pap, &stg_PAP_info, cap->r.rCCCS);
+ pap->arity = oldpap->arity;
+ pap->n_args = oldpap->n_args;
+ pap->fun = oldpap->fun;
+ uint32_t i;
+ for (i = 0; i < ((StgPAP *)pap)->n_args; i++) {
+ pap->payload[i] = oldpap->payload[i];
+ }
+ return (StgClosure *)pap;
+}
+
+#endif
+
static StgWord app_ptrs_itbl[] = {
(W_)&stg_ap_p_info,
(W_)&stg_ap_pp_info,
@@ -343,6 +385,8 @@ eval_obj:
case CONSTR_1_1:
case CONSTR_0_2:
case CONSTR_NOCAF:
+ break;
+
case FUN:
case FUN_1_0:
case FUN_0_1:
@@ -350,15 +394,30 @@ eval_obj:
case FUN_1_1:
case FUN_0_2:
case FUN_STATIC:
+#ifdef PROFILING
+ if (cap->r.rCCCS != obj->header.prof.ccs) {
+ tagged_obj =
+ newEmptyPAP(cap, tagged_obj, get_fun_itbl(obj)->f.arity);
+ }
+#endif
+ break;
+
case PAP:
- // already in WHNF
+#ifdef PROFILING
+ if (cap->r.rCCCS != obj->header.prof.ccs) {
+ tagged_obj = copyPAP(cap, (StgPAP *)obj);
+ }
+#endif
break;
case BCO:
- {
ASSERT(((StgBCO *)obj)->arity > 0);
+#ifdef PROFILING
+ if (cap->r.rCCCS != obj->header.prof.ccs) {
+ tagged_obj = newEmptyPAP(cap, tagged_obj, ((StgBCO *)obj)->arity);
+ }
+#endif
break;
- }
case AP: /* Copied from stg_AP_entry. */
{
@@ -380,7 +439,7 @@ eval_obj:
// restore the CCCS after evaluating the AP
Sp -= 2;
Sp[1] = (W_)cap->r.rCCCS;
- Sp[0] = (W_)&stg_restore_cccs_info;
+ Sp[0] = (W_)&stg_restore_cccs_eval_info;
#endif
Sp -= sizeofW(StgUpdateFrame);
@@ -425,7 +484,7 @@ eval_obj:
// restore the CCCS after evaluating the closure
Sp -= 2;
Sp[1] = (W_)cap->r.rCCCS;
- Sp[0] = (W_)&stg_restore_cccs_info;
+ Sp[0] = (W_)&stg_restore_cccs_eval_info;
#endif
Sp -= 2;
Sp[1] = (W_)tagged_obj;
@@ -465,7 +524,8 @@ do_return:
// NOTE: not using get_itbl().
info = ((StgClosure *)Sp)->header.info;
- if (info == (StgInfoTable *)&stg_restore_cccs_info) {
+ if (info == (StgInfoTable *)&stg_restore_cccs_info ||
+ info == (StgInfoTable *)&stg_restore_cccs_eval_info) {
cap->r.rCCCS = (CostCentreStack*)Sp[1];
Sp += 2;
goto do_return;
diff --git a/rts/Printer.c b/rts/Printer.c
index f23e0b0636..87b11e80d8 100644
--- a/rts/Printer.c
+++ b/rts/Printer.c
@@ -526,6 +526,11 @@ printStackChunk( StgPtr sp, StgPtr spBottom )
fprintCCS(stderr, (CostCentreStack*)sp[1]);
debugBelch("\n" );
continue;
+ } else if (c == (StgWord)&stg_restore_cccs_eval_info) {
+ debugBelch("stg_restore_cccs_eval_info\n" );
+ fprintCCS(stderr, (CostCentreStack*)sp[1]);
+ debugBelch("\n" );
+ continue;
#endif
} else {
debugBelch("RET_SMALL (%p)\n", info);
diff --git a/rts/Profiling.c b/rts/Profiling.c
index 952785be18..94ec55582b 100644
--- a/rts/Profiling.c
+++ b/rts/Profiling.c
@@ -236,6 +236,10 @@ CostCentre *mkCostCentre (char *label, char *module, char *srcloc)
cc->label = label;
cc->module = module;
cc->srcloc = srcloc;
+ cc->is_caf = 0;
+ cc->mem_alloc = 0;
+ cc->time_ticks = 0;
+ cc->link = NULL;
return cc;
}
@@ -379,7 +383,7 @@ void enterFunCCS (StgRegTable *reg, CostCentreStack *ccsfn)
}
// common case 2: the function stack is empty, or just CAF
- if (ccsfn->prevStack == CCS_MAIN) {
+ if (ccsfn->cc->is_caf) {
return;
}
diff --git a/rts/StgMiscClosures.cmm b/rts/StgMiscClosures.cmm
index aa22c99be4..e8a5b8fed5 100644
--- a/rts/StgMiscClosures.cmm
+++ b/rts/StgMiscClosures.cmm
@@ -55,6 +55,16 @@ INFO_TABLE_RET (stg_restore_cccs, RET_SMALL, W_ info_ptr, W_ cccs)
jump %ENTRY_CODE(Sp(0)) [*]; // NB. all registers live!
}
+
+INFO_TABLE_RET (stg_restore_cccs_eval, RET_SMALL, W_ info_ptr, W_ cccs)
+ return (P_ ret)
+{
+#if defined(PROFILING)
+ CCCS = cccs;
+#endif
+ jump stg_ap_0_fast(ret);
+}
+
/* ----------------------------------------------------------------------------
Support for the bytecode interpreter.
------------------------------------------------------------------------- */
diff --git a/testsuite/tests/codeGen/should_run/cgrun057.stderr b/testsuite/tests/codeGen/should_run/cgrun057.stderr
index 262d74912d..5d1656d25d 100644
--- a/testsuite/tests/codeGen/should_run/cgrun057.stderr
+++ b/testsuite/tests/codeGen/should_run/cgrun057.stderr
@@ -1,4 +1,4 @@
-*** Exception (reporting due to +RTS -xc): (THUNK_2_0), stack trace:
+*** Exception (reporting due to +RTS -xc): (THUNK_STATIC), stack trace:
Main.g,
called from Main.f,
called from Main.main,
diff --git a/testsuite/tests/profiling/should_run/T680.prof.sample b/testsuite/tests/profiling/should_run/T680.prof.sample
index 1d52d7157d..8875bbd376 100644
--- a/testsuite/tests/profiling/should_run/T680.prof.sample
+++ b/testsuite/tests/profiling/should_run/T680.prof.sample
@@ -1,42 +1,41 @@
- Thu Dec 8 15:23 2016 Time and Allocation Profiling Report (Final)
+ Tue Dec 20 13:18 2016 Time and Allocation Profiling Report (Final)
- T680 +RTS -hc -p -RTS
+ T680 +RTS -p -RTS
- total time = 0.00 secs (0 ticks @ 1000 us, 1 processor)
- total alloc = 753,032 bytes (excludes profiling overheads)
+ total time = 0.20 secs (204 ticks @ 1000 us, 1 processor)
+ total alloc = 449,729,208 bytes (excludes profiling overheads)
-COST CENTRE MODULE SRC %time %alloc
+COST CENTRE MODULE SRC %time %alloc
-CAF GHC.IO.Handle.FD <entire-module> 0.0 4.6
-main Main T680.hs:20:1-14 0.0 1.2
-foo.\ Main T680.hs:3:12-40 0.0 25.5
-foo.bar Main T680.hs:(5,3)-(9,38) 0.0 29.8
-foo.bar.\ Main T680.hs:(8,11)-(9,38) 0.0 38.2
+foo.\ Main T680.hs:3:12-40 98.5 99.8
individual inherited
COST CENTRE MODULE SRC no. entries %time %alloc %time %alloc
-MAIN MAIN <built-in> 104 0 0.0 0.1 0.0 100.0
- CAF Main <entire-module> 207 0 0.0 0.0 0.0 93.5
- k Main T680.hs:12:1-17 211 1 0.0 0.0 0.0 0.0
- foo Main T680.hs:(2,1)-(9,38) 212 1 0.0 0.0 0.0 0.0
- foo.bar Main T680.hs:(5,3)-(9,38) 216 1 0.0 0.0 0.0 0.0
- foo.bar.k' Main T680.hs:6:9-34 220 1 0.0 0.0 0.0 0.0
- k.\ Main T680.hs:12:16 221 1 0.0 0.0 0.0 0.0
- main Main T680.hs:20:1-14 208 1 0.0 0.0 0.0 0.0
- r Main T680.hs:18:1-26 210 1 0.0 0.0 0.0 93.5
- k Main T680.hs:12:1-17 213 0 0.0 0.0 0.0 93.5
- foo Main T680.hs:(2,1)-(9,38) 214 0 0.0 0.0 0.0 93.5
- foo.\ Main T680.hs:3:12-40 215 4001 0.0 25.5 0.0 25.5
- foo.bar Main T680.hs:(5,3)-(9,38) 217 0 0.0 29.8 0.0 68.0
- foo.bar.\ Main T680.hs:(8,11)-(9,38) 218 4001 0.0 38.2 0.0 38.2
- foo.bar.\.k'' Main T680.hs:8:15-27 219 4000 0.0 0.0 0.0 0.0
- foo.bar.k' Main T680.hs:6:9-34 222 0 0.0 0.0 0.0 0.0
- CAF GHC.Conc.Signal <entire-module> 201 0 0.0 0.1 0.0 0.1
- CAF GHC.IO.Encoding <entire-module> 191 0 0.0 0.4 0.0 0.4
- CAF GHC.IO.Encoding.Iconv <entire-module> 189 0 0.0 0.0 0.0 0.0
- CAF GHC.IO.Handle.FD <entire-module> 181 0 0.0 4.6 0.0 4.6
- CAF GHC.IO.Handle.Text <entire-module> 179 0 0.0 0.0 0.0 0.0
- CAF GHC.Show <entire-module> 165 0 0.0 0.0 0.0 0.0
- main Main T680.hs:20:1-14 209 0 0.0 1.2 0.0 1.2
+MAIN MAIN <built-in> 108 0 0.0 0.0 100.0 100.0
+ CAF Main <entire-module> 215 0 0.0 0.0 100.0 100.0
+ k Main T680.hs:12:1-17 219 1 0.0 0.0 0.0 0.0
+ foo Main T680.hs:(2,1)-(9,38) 220 1 0.0 0.0 0.0 0.0
+ foo.bar Main T680.hs:(5,3)-(9,38) 224 1 0.0 0.0 0.0 0.0
+ foo.bar.k' Main T680.hs:6:9-34 228 1 0.0 0.0 0.0 0.0
+ k.\ Main T680.hs:12:16 229 1 0.0 0.0 0.0 0.0
+ main Main T680.hs:20:1-14 216 1 0.0 0.0 0.0 0.0
+ r Main T680.hs:18:1-26 218 1 0.0 0.0 100.0 100.0
+ k Main T680.hs:12:1-17 221 0 0.0 0.0 100.0 100.0
+ foo Main T680.hs:(2,1)-(9,38) 222 0 0.0 0.0 100.0 100.0
+ foo.\ Main T680.hs:3:12-40 223 4001 98.5 99.8 98.5 99.8
+ foo.bar Main T680.hs:(5,3)-(9,38) 225 0 0.5 0.0 1.5 0.2
+ foo.bar.\ Main T680.hs:(8,11)-(9,38) 226 4001 1.0 0.1 1.0 0.1
+ foo.bar.\.k'' Main T680.hs:8:15-27 227 4000 0.0 0.0 0.0 0.0
+ foo.bar.k' Main T680.hs:6:9-34 232 0 0.0 0.0 0.0 0.0
+ k.\ Main T680.hs:12:16 233 0 0.0 0.0 0.0 0.0
+ foo.bar.k' Main T680.hs:6:9-34 230 0 0.0 0.0 0.0 0.0
+ k.\ Main T680.hs:12:16 231 0 0.0 0.0 0.0 0.0
+ CAF GHC.Conc.Signal <entire-module> 206 0 0.0 0.0 0.0 0.0
+ CAF GHC.IO.Encoding <entire-module> 194 0 0.0 0.0 0.0 0.0
+ CAF GHC.IO.Encoding.Iconv <entire-module> 192 0 0.0 0.0 0.0 0.0
+ CAF GHC.IO.Handle.FD <entire-module> 184 0 0.0 0.0 0.0 0.0
+ CAF GHC.IO.Handle.Text <entire-module> 182 0 0.0 0.0 0.0 0.0
+ CAF GHC.Show <entire-module> 168 0 0.0 0.0 0.0 0.0
+ main Main T680.hs:20:1-14 217 0 0.0 0.0 0.0 0.0
diff --git a/testsuite/tests/profiling/should_run/all.T b/testsuite/tests/profiling/should_run/all.T
index 875a98e2dd..d85a2b8e6a 100644
--- a/testsuite/tests/profiling/should_run/all.T
+++ b/testsuite/tests/profiling/should_run/all.T
@@ -97,8 +97,7 @@ test('T5559', [], compile_and_run, [''])
test('callstack001',
# unoptimised results are different w.r.t. CAF attribution
- [ expect_broken_for_10037,
- omit_ways(['ghci-ext-prof']), # produces a different stack
+ [ omit_ways(['ghci-ext-prof']), # produces a different stack
], compile_and_run,
['-fprof-auto-calls -fno-full-laziness -fno-state-hack'])
diff --git a/testsuite/tests/profiling/should_run/toplevel_scc_1.prof.sample b/testsuite/tests/profiling/should_run/toplevel_scc_1.prof.sample
index 4eec28da28..3464946624 100644
--- a/testsuite/tests/profiling/should_run/toplevel_scc_1.prof.sample
+++ b/testsuite/tests/profiling/should_run/toplevel_scc_1.prof.sample
@@ -1,30 +1,33 @@
- Tue Jul 19 08:36 2016 Time and Allocation Profiling Report (Final)
+ Tue Dec 20 14:22 2016 Time and Allocation Profiling Report (Final)
toplevel_scc_1 +RTS -p -RTS
total time = 0.00 secs (0 ticks @ 1000 us, 1 processor)
- total alloc = 79,792 bytes (excludes profiling overheads)
+ total alloc = 75,880 bytes (excludes profiling overheads)
COST CENTRE MODULE SRC %time %alloc
+MAIN MAIN <built-in> 0.0 24.0
CAF GHC.Read <entire-module> 0.0 1.2
-CAF GHC.IO.Handle.FD <entire-module> 0.0 64.9
-CAF GHC.IO.Encoding <entire-module> 0.0 3.5
-CAF Main <entire-module> 0.0 27.6
+CAF GHC.IO.Handle.FD <entire-module> 0.0 68.3
+CAF GHC.IO.Encoding <entire-module> 0.0 3.6
- individual inherited
-COST CENTRE MODULE SRC no. entries %time %alloc %time %alloc
+ individual inherited
+COST CENTRE MODULE SRC no. entries %time %alloc %time %alloc
-MAIN MAIN <built-in> 105 0 0.0 0.4 0.0 100.0
- CAF Main <entire-module> 209 0 0.0 27.6 0.0 27.9
- Main.f1 Main toplevel_scc_1.hs:4:1-2 212 1 0.0 0.0 0.0 0.0
- Main.foo Main toplevel_scc_1.hs:7:1-2 210 1 0.0 0.2 0.0 0.3
- Main.bar Main toplevel_scc_1.hs:10:5-6 211 1 0.0 0.1 0.0 0.1
- CAF GHC.Conc.Signal <entire-module> 203 0 0.0 0.8 0.0 0.8
- CAF GHC.IO.Encoding <entire-module> 193 0 0.0 3.5 0.0 3.5
- CAF GHC.IO.Encoding.Iconv <entire-module> 191 0 0.0 0.3 0.0 0.3
- CAF GHC.IO.Handle.FD <entire-module> 183 0 0.0 64.9 0.0 64.9
- CAF GHC.IO.Handle.Text <entire-module> 181 0 0.0 0.1 0.0 0.1
- CAF GHC.Read <entire-module> 171 0 0.0 1.2 0.0 1.2
- CAF Text.Read.Lex <entire-module> 154 0 0.0 0.8 0.0 0.8
+MAIN MAIN <built-in> 105 0 0.0 24.0 0.0 100.0
+ CAF Main <entire-module> 209 0 0.0 0.3 0.0 0.5
+ Main.f1 Main toplevel_scc_1.hs:4:1-2 214 1 0.0 0.0 0.0 0.0
+ Main.foo Main toplevel_scc_1.hs:7:1-2 210 1 0.0 0.2 0.0 0.2
+ Main.bar Main toplevel_scc_1.hs:10:5-6 212 1 0.0 0.0 0.0 0.0
+ CAF GHC.Conc.Signal <entire-module> 203 0 0.0 0.8 0.0 0.8
+ CAF GHC.IO.Encoding <entire-module> 193 0 0.0 3.6 0.0 3.6
+ CAF GHC.IO.Encoding.Iconv <entire-module> 191 0 0.0 0.3 0.0 0.3
+ CAF GHC.IO.Handle.FD <entire-module> 183 0 0.0 68.3 0.0 68.3
+ CAF GHC.IO.Handle.Text <entire-module> 181 0 0.0 0.1 0.0 0.1
+ CAF GHC.Read <entire-module> 171 0 0.0 1.2 0.0 1.2
+ CAF Text.Read.Lex <entire-module> 154 0 0.0 0.9 0.0 0.9
+ Main.f1 Main toplevel_scc_1.hs:4:1-2 215 0 0.0 0.0 0.0 0.0
+ Main.foo Main toplevel_scc_1.hs:7:1-2 211 0 0.0 0.1 0.0 0.1
+ Main.bar Main toplevel_scc_1.hs:10:5-6 213 0 0.0 0.1 0.0 0.1