summaryrefslogtreecommitdiff
path: root/rts/RtsAPI.c
diff options
context:
space:
mode:
authorTravis Whitaker <pi.boy.travis@gmail.com>2019-04-03 15:26:16 -0700
committerBen Gamari <ben@smart-cactus.org>2020-05-19 09:55:49 -0400
commit62e19a6d0889187dfbce0ba2f404849b90b1ef02 (patch)
tree7a0aac77e7e6a53455fa4252ea0d50baeeba0989 /rts/RtsAPI.c
parent568d7279a80cf945271f0659f11a94eea3f1433d (diff)
downloadhaskell-wip/more-barriers.tar.gz
Correct closure observation, construction, and mutation on weak memory machines.wip/more-barriers
Here the following changes are introduced: - A read barrier machine op is added to Cmm. - The order in which a closure's fields are read and written is changed. - Memory barriers are added to RTS code to ensure correctness on out-or-order machines with weak memory ordering. Cmm has a new CallishMachOp called MO_ReadBarrier. On weak memory machines, this is lowered to an instruction that ensures memory reads that occur after said instruction in program order are not performed before reads coming before said instruction in program order. On machines with strong memory ordering properties (e.g. X86, SPARC in TSO mode) no such instruction is necessary, so MO_ReadBarrier is simply erased. However, such an instruction is necessary on weakly ordered machines, e.g. ARM and PowerPC. Weam memory ordering has consequences for how closures are observed and mutated. For example, consider a closure that needs to be updated to an indirection. In order for the indirection to be safe for concurrent observers to enter, said observers must read the indirection's info table before they read the indirectee. Furthermore, the entering observer makes assumptions about the closure based on its info table contents, e.g. an INFO_TYPE of IND imples the closure has an indirectee pointer that is safe to follow. When a closure is updated with an indirection, both its info table and its indirectee must be written. With weak memory ordering, these two writes can be arbitrarily reordered, and perhaps even interleaved with other threads' reads and writes (in the absence of memory barrier instructions). Consider this example of a bad reordering: - An updater writes to a closure's info table (INFO_TYPE is now IND). - A concurrent observer branches upon reading the closure's INFO_TYPE as IND. - A concurrent observer reads the closure's indirectee and enters it. (!!!) - An updater writes the closure's indirectee. Here the update to the indirectee comes too late and the concurrent observer has jumped off into the abyss. Speculative execution can also cause us issues, consider: - An observer is about to case on a value in closure's info table. - The observer speculatively reads one or more of closure's fields. - An updater writes to closure's info table. - The observer takes a branch based on the new info table value, but with the old closure fields! - The updater writes to the closure's other fields, but its too late. Because of these effects, reads and writes to a closure's info table must be ordered carefully with respect to reads and writes to the closure's other fields, and memory barriers must be placed to ensure that reads and writes occur in program order. Specifically, updates to a closure must follow the following pattern: - Update the closure's (non-info table) fields. - Write barrier. - Update the closure's info table. Observing a closure's fields must follow the following pattern: - Read the closure's info pointer. - Read barrier. - Read the closure's (non-info table) fields. This patch updates RTS code to obey this pattern. This should fix long-standing SMP bugs on ARM (specifically newer aarch64 microarchitectures supporting out-of-order execution) and PowerPC. This fixesd issue #15449.
Diffstat (limited to 'rts/RtsAPI.c')
-rw-r--r--rts/RtsAPI.c51
1 files changed, 34 insertions, 17 deletions
diff --git a/rts/RtsAPI.c b/rts/RtsAPI.c
index 51a1f2b7cf..310fe26c1d 100644
--- a/rts/RtsAPI.c
+++ b/rts/RtsAPI.c
@@ -30,8 +30,9 @@ HaskellObj
rts_mkChar (Capability *cap, HsChar c)
{
StgClosure *p = (StgClosure *)allocate(cap, CONSTR_sizeW(0,1));
- SET_HDR(p, Czh_con_info, CCS_SYSTEM);
p->payload[0] = (StgClosure *)(StgWord)(StgChar)c;
+ write_barrier();
+ SET_HDR(p, Czh_con_info, CCS_SYSTEM);
return p;
}
@@ -39,8 +40,9 @@ HaskellObj
rts_mkInt (Capability *cap, HsInt i)
{
StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
- SET_HDR(p, Izh_con_info, CCS_SYSTEM);
p->payload[0] = (StgClosure *)(StgInt)i;
+ write_barrier();
+ SET_HDR(p, Izh_con_info, CCS_SYSTEM);
return p;
}
@@ -48,9 +50,10 @@ HaskellObj
rts_mkInt8 (Capability *cap, HsInt8 i)
{
StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
- SET_HDR(p, I8zh_con_info, CCS_SYSTEM);
/* Make sure we mask out the bits above the lowest 8 */
p->payload[0] = (StgClosure *)(StgInt)i;
+ write_barrier();
+ SET_HDR(p, I8zh_con_info, CCS_SYSTEM);
return p;
}
@@ -58,9 +61,10 @@ HaskellObj
rts_mkInt16 (Capability *cap, HsInt16 i)
{
StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
- SET_HDR(p, I16zh_con_info, CCS_SYSTEM);
/* Make sure we mask out the relevant bits */
p->payload[0] = (StgClosure *)(StgInt)i;
+ write_barrier();
+ SET_HDR(p, I16zh_con_info, CCS_SYSTEM);
return p;
}
@@ -68,8 +72,9 @@ HaskellObj
rts_mkInt32 (Capability *cap, HsInt32 i)
{
StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
- SET_HDR(p, I32zh_con_info, CCS_SYSTEM);
p->payload[0] = (StgClosure *)(StgInt)i;
+ write_barrier();
+ SET_HDR(p, I32zh_con_info, CCS_SYSTEM);
return p;
}
@@ -77,8 +82,9 @@ HaskellObj
rts_mkInt64 (Capability *cap, HsInt64 i)
{
StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,2));
- SET_HDR(p, I64zh_con_info, CCS_SYSTEM);
ASSIGN_Int64((P_)&(p->payload[0]), i);
+ write_barrier();
+ SET_HDR(p, I64zh_con_info, CCS_SYSTEM);
return p;
}
@@ -86,8 +92,9 @@ HaskellObj
rts_mkWord (Capability *cap, HsWord i)
{
StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
- SET_HDR(p, Wzh_con_info, CCS_SYSTEM);
p->payload[0] = (StgClosure *)(StgWord)i;
+ write_barrier();
+ SET_HDR(p, Wzh_con_info, CCS_SYSTEM);
return p;
}
@@ -96,8 +103,9 @@ rts_mkWord8 (Capability *cap, HsWord8 w)
{
/* see rts_mkInt* comments */
StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
- SET_HDR(p, W8zh_con_info, CCS_SYSTEM);
p->payload[0] = (StgClosure *)(StgWord)(w & 0xff);
+ write_barrier();
+ SET_HDR(p, W8zh_con_info, CCS_SYSTEM);
return p;
}
@@ -106,8 +114,9 @@ rts_mkWord16 (Capability *cap, HsWord16 w)
{
/* see rts_mkInt* comments */
StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
- SET_HDR(p, W16zh_con_info, CCS_SYSTEM);
p->payload[0] = (StgClosure *)(StgWord)(w & 0xffff);
+ write_barrier();
+ SET_HDR(p, W16zh_con_info, CCS_SYSTEM);
return p;
}
@@ -116,8 +125,9 @@ rts_mkWord32 (Capability *cap, HsWord32 w)
{
/* see rts_mkInt* comments */
StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
- SET_HDR(p, W32zh_con_info, CCS_SYSTEM);
p->payload[0] = (StgClosure *)(StgWord)(w & 0xffffffff);
+ write_barrier();
+ SET_HDR(p, W32zh_con_info, CCS_SYSTEM);
return p;
}
@@ -126,8 +136,9 @@ rts_mkWord64 (Capability *cap, HsWord64 w)
{
StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,2));
/* see mk_Int8 comment */
- SET_HDR(p, W64zh_con_info, CCS_SYSTEM);
ASSIGN_Word64((P_)&(p->payload[0]), w);
+ write_barrier();
+ SET_HDR(p, W64zh_con_info, CCS_SYSTEM);
return p;
}
@@ -136,8 +147,9 @@ HaskellObj
rts_mkFloat (Capability *cap, HsFloat f)
{
StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
- SET_HDR(p, Fzh_con_info, CCS_SYSTEM);
ASSIGN_FLT((P_)p->payload, (StgFloat)f);
+ write_barrier();
+ SET_HDR(p, Fzh_con_info, CCS_SYSTEM);
return p;
}
@@ -145,8 +157,9 @@ HaskellObj
rts_mkDouble (Capability *cap, HsDouble d)
{
StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,sizeofW(StgDouble)));
- SET_HDR(p, Dzh_con_info, CCS_SYSTEM);
ASSIGN_DBL((P_)p->payload, (StgDouble)d);
+ write_barrier();
+ SET_HDR(p, Dzh_con_info, CCS_SYSTEM);
return p;
}
@@ -154,8 +167,9 @@ HaskellObj
rts_mkStablePtr (Capability *cap, HsStablePtr s)
{
StgClosure *p = (StgClosure *)allocate(cap,sizeofW(StgHeader)+1);
- SET_HDR(p, StablePtr_con_info, CCS_SYSTEM);
p->payload[0] = (StgClosure *)s;
+ write_barrier();
+ SET_HDR(p, StablePtr_con_info, CCS_SYSTEM);
return p;
}
@@ -163,8 +177,9 @@ HaskellObj
rts_mkPtr (Capability *cap, HsPtr a)
{
StgClosure *p = (StgClosure *)allocate(cap,sizeofW(StgHeader)+1);
- SET_HDR(p, Ptr_con_info, CCS_SYSTEM);
p->payload[0] = (StgClosure *)a;
+ write_barrier();
+ SET_HDR(p, Ptr_con_info, CCS_SYSTEM);
return p;
}
@@ -172,8 +187,9 @@ HaskellObj
rts_mkFunPtr (Capability *cap, HsFunPtr a)
{
StgClosure *p = (StgClosure *)allocate(cap,sizeofW(StgHeader)+1);
- SET_HDR(p, FunPtr_con_info, CCS_SYSTEM);
p->payload[0] = (StgClosure *)a;
+ write_barrier();
+ SET_HDR(p, FunPtr_con_info, CCS_SYSTEM);
return p;
}
@@ -202,9 +218,10 @@ rts_apply (Capability *cap, HaskellObj f, HaskellObj arg)
// Here we don't want to use CCS_SYSTEM, because it's a hidden cost centre,
// and evaluating Haskell code under a hidden cost centre leads to
// confusing profiling output. (#7753)
- SET_HDR(ap, (StgInfoTable *)&stg_ap_2_upd_info, CCS_MAIN);
ap->payload[0] = f;
ap->payload[1] = arg;
+ write_barrier();
+ SET_HDR(ap, (StgInfoTable *)&stg_ap_2_upd_info, CCS_MAIN);
return (StgClosure *)ap;
}