diff options
author | Carl Shapiro <cshapiro@google.com> | 2013-12-05 17:35:22 -0800 |
---|---|---|
committer | Carl Shapiro <cshapiro@google.com> | 2013-12-05 17:35:22 -0800 |
commit | 950d5e501cfce748cb115e8a8491672850613cb2 (patch) | |
tree | eef04c58ca814e0cecfb6fe7663439af10b7fbba /src | |
parent | 89f7fae4071236c62ed6833c5e099a78f9b53721 (diff) | |
download | go-950d5e501cfce748cb115e8a8491672850613cb2.tar.gz |
cmd/5g, cmd/5l, cmd/6g, cmd/6l, cmd/8g, cmd/8l, cmd/gc, runtime: generate pointer maps by liveness analysis
This change allows the garbage collector to examine stack
slots that are determined as live and containing a pointer
value by the garbage collector. This results in a mean
reduction of 65% in the number of stack slots scanned during
an invocation of "GOGC=1 all.bash".
Unfortunately, this does not yet allow garbage collection to
be precise for the stack slots computed as live. Pointers
confound the determination of what definitions reach a given
instruction. In general, this problem is not solvable without
runtime cost but some advanced cooperation from the compiler
might mitigate common cases.
R=golang-dev, rsc, cshapiro
CC=golang-dev
https://codereview.appspot.com/14430048
Diffstat (limited to 'src')
-rw-r--r-- | src/cmd/5g/ggen.c | 63 | ||||
-rw-r--r-- | src/cmd/5g/peep.c | 1 | ||||
-rw-r--r-- | src/cmd/5g/prog.c | 1 | ||||
-rw-r--r-- | src/cmd/5g/reg.c | 3 | ||||
-rw-r--r-- | src/cmd/5l/5.out.h | 1 | ||||
-rw-r--r-- | src/cmd/6g/ggen.c | 45 | ||||
-rw-r--r-- | src/cmd/6g/prog.c | 1 | ||||
-rw-r--r-- | src/cmd/6g/reg.c | 3 | ||||
-rw-r--r-- | src/cmd/6l/6.out.h | 1 | ||||
-rw-r--r-- | src/cmd/6l/optab.c | 3 | ||||
-rw-r--r-- | src/cmd/8g/ggen.c | 45 | ||||
-rw-r--r-- | src/cmd/8g/prog.c | 1 | ||||
-rw-r--r-- | src/cmd/8g/reg.c | 3 | ||||
-rw-r--r-- | src/cmd/8l/8.out.h | 1 | ||||
-rw-r--r-- | src/cmd/8l/optab.c | 2 | ||||
-rw-r--r-- | src/cmd/cc/pgen.c | 124 | ||||
-rw-r--r-- | src/cmd/dist/build.c | 6 | ||||
-rw-r--r-- | src/cmd/gc/array.c | 129 | ||||
-rw-r--r-- | src/cmd/gc/bv.c | 120 | ||||
-rw-r--r-- | src/cmd/gc/gen.c | 4 | ||||
-rw-r--r-- | src/cmd/gc/go.h | 32 | ||||
-rw-r--r-- | src/cmd/gc/pgen.c | 248 | ||||
-rw-r--r-- | src/cmd/gc/plive.c | 1489 | ||||
-rw-r--r-- | src/pkg/runtime/funcdata.h | 6 | ||||
-rw-r--r-- | src/pkg/runtime/mgc0.c | 191 |
25 files changed, 2031 insertions, 492 deletions
diff --git a/src/cmd/5g/ggen.c b/src/cmd/5g/ggen.c index 040c3d2a9..e8a10747c 100644 --- a/src/cmd/5g/ggen.c +++ b/src/cmd/5g/ggen.c @@ -9,15 +9,11 @@ #include "gg.h" #include "opt.h" -static Prog* appendp(Prog*, int, int, int, int32, int, int, int32); - void -defframe(Prog *ptxt, Bvec *bv) +defframe(Prog *ptxt) { - int i, j, first; uint32 frame; - Prog *p, *p1; - + // fill in argument size ptxt->to.type = D_CONST2; ptxt->to.offset2 = rnd(curfn->type->argwid, widthptr); @@ -28,59 +24,6 @@ defframe(Prog *ptxt, Bvec *bv) frame = rnd(maxstksize+maxarg, widthptr); ptxt->to.offset = frame; maxstksize = 0; - - // insert code to clear pointered part of the frame, - // so that garbage collector only sees initialized values - // when it looks for pointers. - p = ptxt; - while(p->link->as == AFUNCDATA || p->link->as == APCDATA || p->link->as == ATYPE) - p = p->link; - if(stkzerosize >= 8*widthptr) { - p = appendp(p, AMOVW, D_CONST, NREG, 0, D_REG, 0, 0); - p = appendp(p, AADD, D_CONST, NREG, 4+frame-stkzerosize, D_REG, 1, 0); - p->reg = REGSP; - p = appendp(p, AADD, D_CONST, NREG, stkzerosize, D_REG, 2, 0); - p->reg = 1; - p1 = p = appendp(p, AMOVW, D_REG, 0, 0, D_OREG, 1, 4); - p->scond |= C_PBIT; - p = appendp(p, ACMP, D_REG, 1, 0, D_NONE, 0, 0); - p->reg = 2; - p = appendp(p, ABNE, D_NONE, NREG, 0, D_BRANCH, NREG, 0); - patch(p, p1); - } else { - first = 1; - j = (stkptrsize - stkzerosize)/widthptr * 2; - for(i=0; i<stkzerosize; i+=widthptr) { - if(bvget(bv, j) || bvget(bv, j+1)) { - if(first) { - p = appendp(p, AMOVW, D_CONST, NREG, 0, D_REG, 0, 0); - first = 0; - } - p = appendp(p, AMOVW, D_REG, 0, 0, D_OREG, REGSP, 4+frame-stkzerosize+i); - } - j += 2; - } - } -} - -static Prog* -appendp(Prog *p, int as, int ftype, int freg, int32 foffset, int ttype, int treg, int32 toffset) -{ - Prog *q; - - q = mal(sizeof(*q)); - clearp(q); - q->as = as; - q->lineno = p->lineno; - q->from.type = ftype; - q->from.reg = freg; - q->from.offset = foffset; - q->to.type = ttype; - q->to.reg = treg; - q->to.offset = toffset; - q->link = p->link; - p->link = q; - return q; } // Sweep the prog list to mark any used nodes. @@ -829,6 +772,8 @@ clearfat(Node *nl) if(componentgen(N, nl)) return; + gfatvardef(nl); + c = w % 4; // bytes q = w / 4; // quads diff --git a/src/cmd/5g/peep.c b/src/cmd/5g/peep.c index c78fb3d1c..a605f457f 100644 --- a/src/cmd/5g/peep.c +++ b/src/cmd/5g/peep.c @@ -1164,6 +1164,7 @@ copyu(Prog *p, Adr *v, Adr *s) case APCDATA: case AFUNCDATA: + case AFATVARDEF: return 0; } } diff --git a/src/cmd/5g/prog.c b/src/cmd/5g/prog.c index 5aa6163d8..e5b8fbd27 100644 --- a/src/cmd/5g/prog.c +++ b/src/cmd/5g/prog.c @@ -29,6 +29,7 @@ static ProgInfo progtable[ALAST] = { [AUNDEF]= {OK}, [AUSEFIELD]= {OK}, [ACHECKNIL]= {LeftRead}, + [AFATVARDEF]= {Pseudo | RightWrite}, // NOP is an internal no-op that also stands // for USED and SET annotations, not the Intel opcode. diff --git a/src/cmd/5g/reg.c b/src/cmd/5g/reg.c index d2a8cc488..ceee57c66 100644 --- a/src/cmd/5g/reg.c +++ b/src/cmd/5g/reg.c @@ -168,8 +168,7 @@ regopt(Prog *firstp) fmtinstall('Q', Qconv); first = 0; } - - fixjmp(firstp); + mergetemp(firstp); /* diff --git a/src/cmd/5l/5.out.h b/src/cmd/5l/5.out.h index e8cf83ddd..ebbadde2c 100644 --- a/src/cmd/5l/5.out.h +++ b/src/cmd/5l/5.out.h @@ -198,6 +198,7 @@ enum as AFUNCDATA, APCDATA, ACHECKNIL, + AFATVARDEF, ALAST, }; diff --git a/src/cmd/6g/ggen.c b/src/cmd/6g/ggen.c index 9fad9f7f1..8e8e4a91a 100644 --- a/src/cmd/6g/ggen.c +++ b/src/cmd/6g/ggen.c @@ -9,14 +9,10 @@ #include "gg.h" #include "opt.h" -static Prog* appendp(Prog*, int, int, vlong, int, vlong); - void -defframe(Prog *ptxt, Bvec *bv) +defframe(Prog *ptxt) { - int i, j; uint32 frame; - Prog *p; // fill in argument size ptxt->to.offset = rnd(curfn->type->argwid, widthptr); @@ -25,43 +21,6 @@ defframe(Prog *ptxt, Bvec *bv) ptxt->to.offset <<= 32; frame = rnd(stksize+maxarg, widthptr); ptxt->to.offset |= frame; - - // insert code to clear pointered part of the frame, - // so that garbage collector only sees initialized values - // when it looks for pointers. - p = ptxt; - if(stkzerosize >= 8*widthptr) { - p = appendp(p, AMOVQ, D_CONST, 0, D_AX, 0); - p = appendp(p, AMOVQ, D_CONST, stkzerosize/widthptr, D_CX, 0); - p = appendp(p, ALEAQ, D_SP+D_INDIR, frame-stkzerosize, D_DI, 0); - p = appendp(p, AREP, D_NONE, 0, D_NONE, 0); - appendp(p, ASTOSQ, D_NONE, 0, D_NONE, 0); - } else { - j = (stkptrsize - stkzerosize)/widthptr * 2; - for(i=0; i<stkzerosize; i+=widthptr) { - if(bvget(bv, j) || bvget(bv, j+1)) - p = appendp(p, AMOVQ, D_CONST, 0, D_SP+D_INDIR, frame-stkzerosize+i); - j += 2; - } - } -} - -static Prog* -appendp(Prog *p, int as, int ftype, vlong foffset, int ttype, vlong toffset) -{ - Prog *q; - - q = mal(sizeof(*q)); - clearp(q); - q->as = as; - q->lineno = p->lineno; - q->from.type = ftype; - q->from.offset = foffset; - q->to.type = ttype; - q->to.offset = toffset; - q->link = p->link; - p->link = q; - return q; } // Sweep the prog list to mark any used nodes. @@ -1037,6 +996,8 @@ clearfat(Node *nl) if(componentgen(N, nl)) return; + gfatvardef(nl); + c = w % 8; // bytes q = w / 8; // quads diff --git a/src/cmd/6g/prog.c b/src/cmd/6g/prog.c index 90571a21a..657cc9f77 100644 --- a/src/cmd/6g/prog.c +++ b/src/cmd/6g/prog.c @@ -41,6 +41,7 @@ static ProgInfo progtable[ALAST] = { [AUNDEF]= {OK}, [AUSEFIELD]= {OK}, [ACHECKNIL]= {LeftRead}, + [AFATVARDEF]= {Pseudo | RightWrite}, // NOP is an internal no-op that also stands // for USED and SET annotations, not the Intel opcode. diff --git a/src/cmd/6g/reg.c b/src/cmd/6g/reg.c index 63fd0deca..4b28b9311 100644 --- a/src/cmd/6g/reg.c +++ b/src/cmd/6g/reg.c @@ -155,9 +155,8 @@ regopt(Prog *firstp) first = 0; } - fixjmp(firstp); mergetemp(firstp); - + /* * control flow is more complicated in generated go code * than in generated c code. define pseudo-variables for diff --git a/src/cmd/6l/6.out.h b/src/cmd/6l/6.out.h index 5fa73a65b..64c8ac2fe 100644 --- a/src/cmd/6l/6.out.h +++ b/src/cmd/6l/6.out.h @@ -762,6 +762,7 @@ enum as AFUNCDATA, APCDATA, ACHECKNIL, + AFATVARDEF, ALAST }; diff --git a/src/cmd/6l/optab.c b/src/cmd/6l/optab.c index 46603ad45..f48c6c329 100644 --- a/src/cmd/6l/optab.c +++ b/src/cmd/6l/optab.c @@ -1352,8 +1352,11 @@ Optab optab[] = { APCLMULQDQ, yxshuf, Pq, 0x3a,0x44,0 }, { AUSEFIELD, ynop, Px, 0,0 }, + { ATYPE }, { AFUNCDATA, yfuncdata, Px, 0,0 }, { APCDATA, ypcdata, Px, 0,0 }, + { ACHECKNIL }, + { AFATVARDEF }, { AEND }, 0 diff --git a/src/cmd/8g/ggen.c b/src/cmd/8g/ggen.c index fa5ed00dd..50afb98e2 100644 --- a/src/cmd/8g/ggen.c +++ b/src/cmd/8g/ggen.c @@ -9,14 +9,10 @@ #include "gg.h" #include "opt.h" -static Prog* appendp(Prog*, int, int, int32, int, int32); - void -defframe(Prog *ptxt, Bvec *bv) +defframe(Prog *ptxt) { uint32 frame; - Prog *p; - int i, j; // fill in argument size ptxt->to.offset2 = rnd(curfn->type->argwid, widthptr); @@ -27,43 +23,6 @@ defframe(Prog *ptxt, Bvec *bv) frame = rnd(maxstksize+maxarg, widthptr); ptxt->to.offset = frame; maxstksize = 0; - - // insert code to clear pointered part of the frame, - // so that garbage collector only sees initialized values - // when it looks for pointers. - p = ptxt; - if(stkzerosize >= 8*widthptr) { - p = appendp(p, AMOVL, D_CONST, 0, D_AX, 0); - p = appendp(p, AMOVL, D_CONST, stkzerosize/widthptr, D_CX, 0); - p = appendp(p, ALEAL, D_SP+D_INDIR, frame-stkzerosize, D_DI, 0); - p = appendp(p, AREP, D_NONE, 0, D_NONE, 0); - appendp(p, ASTOSL, D_NONE, 0, D_NONE, 0); - } else { - j = (stkptrsize - stkzerosize)/widthptr * 2; - for(i=0; i<stkzerosize; i+=widthptr) { - if(bvget(bv, j) || bvget(bv, j+1)) - p = appendp(p, AMOVL, D_CONST, 0, D_SP+D_INDIR, frame-stkzerosize+i); - j += 2; - } - } -} - -static Prog* -appendp(Prog *p, int as, int ftype, int32 foffset, int ttype, int32 toffset) -{ - Prog *q; - - q = mal(sizeof(*q)); - clearp(q); - q->as = as; - q->lineno = p->lineno; - q->from.type = ftype; - q->from.offset = foffset; - q->to.type = ttype; - q->to.offset = toffset; - q->link = p->link; - p->link = q; - return q; } // Sweep the prog list to mark any used nodes. @@ -119,6 +78,8 @@ clearfat(Node *nl) if(componentgen(N, nl)) return; + gfatvardef(nl); + c = w % 4; // bytes q = w / 4; // quads diff --git a/src/cmd/8g/prog.c b/src/cmd/8g/prog.c index 14f197b6a..7745d503b 100644 --- a/src/cmd/8g/prog.c +++ b/src/cmd/8g/prog.c @@ -41,6 +41,7 @@ static ProgInfo progtable[ALAST] = { [AUNDEF]= {OK}, [AUSEFIELD]= {OK}, [ACHECKNIL]= {LeftRead}, + [AFATVARDEF]= {Pseudo | RightWrite}, // NOP is an internal no-op that also stands // for USED and SET annotations, not the Intel opcode. diff --git a/src/cmd/8g/reg.c b/src/cmd/8g/reg.c index a85c6608a..5dd93cceb 100644 --- a/src/cmd/8g/reg.c +++ b/src/cmd/8g/reg.c @@ -124,8 +124,7 @@ regopt(Prog *firstp) exregoffset = D_DI; // no externals first = 0; } - - fixjmp(firstp); + mergetemp(firstp); /* diff --git a/src/cmd/8l/8.out.h b/src/cmd/8l/8.out.h index 988e50f3e..a28115eb4 100644 --- a/src/cmd/8l/8.out.h +++ b/src/cmd/8l/8.out.h @@ -578,6 +578,7 @@ enum as AFUNCDATA, APCDATA, ACHECKNIL, + AFATVARDEF, ALAST }; diff --git a/src/cmd/8l/optab.c b/src/cmd/8l/optab.c index a4c40e8e3..19952e5f9 100644 --- a/src/cmd/8l/optab.c +++ b/src/cmd/8l/optab.c @@ -1025,6 +1025,8 @@ Optab optab[] = { ATYPE }, { AFUNCDATA, yfuncdata, Px, 0,0 }, { APCDATA, ypcdata, Px, 0,0 }, + { ACHECKNIL }, + { AFATVARDEF }, 0 }; diff --git a/src/cmd/cc/pgen.c b/src/cmd/cc/pgen.c index b82872bc5..d63e90c24 100644 --- a/src/cmd/cc/pgen.c +++ b/src/cmd/cc/pgen.c @@ -35,6 +35,25 @@ enum { BitsPerPointer = 2 }; static void dumpgcargs(Type *fn, Sym *sym); +static Sym* +makefuncdatasym(char *namefmt, int64 funcdatakind) +{ + Node nod; + Sym *sym; + static int32 nsym; + static char namebuf[40]; + + snprint(namebuf, sizeof(namebuf), namefmt, nsym++); + sym = slookup(namebuf); + sym->class = CSTATIC; + memset(&nod, 0, sizeof nod); + nod.op = ONAME; + nod.sym = sym; + nod.class = CSTATIC; + gins(AFUNCDATA, nodconst(funcdatakind), &nod); + return sym; +} + int hasdotdotdot(void) { @@ -80,10 +99,10 @@ void codgen(Node *n, Node *nn) { Prog *sp; - Node *n1, nod, nod1, nod2; - Sym *gcsym, *gclocalssym; - static int ngcsym, ngclocalssym; - static char namebuf[40]; + Node *n1, nod, nod1; + Sym *gcargs; + Sym *gclocals; + int isvarargs; cursafe = 0; curarg = 0; @@ -109,25 +128,11 @@ codgen(Node *n, Node *nn) * generate funcdata symbol for this function. * data is filled in at the end of codgen(). */ - snprint(namebuf, sizeof namebuf, "gc·%d", ngcsym++); - gcsym = slookup(namebuf); - gcsym->class = CSTATIC; - - memset(&nod, 0, sizeof nod); - nod.op = ONAME; - nod.sym = gcsym; - nod.class = CSTATIC; - gins(AFUNCDATA, nodconst(FUNCDATA_GCArgs), &nod); - - snprint(namebuf, sizeof(namebuf), "gclocalssym·%d", ngclocalssym++); - gclocalssym = slookup(namebuf); - gclocalssym->class = CSTATIC; - - memset(&nod2, 0, sizeof(nod2)); - nod2.op = ONAME; - nod2.sym = gclocalssym; - nod2.class = CSTATIC; - gins(AFUNCDATA, nodconst(FUNCDATA_GCLocals), &nod2); + isvarargs = hasdotdotdot(); + gcargs = nil; + if(!isvarargs) + gcargs = makefuncdatasym("gcargs·%d", FUNCDATA_ArgsPointerMaps); + gclocals = makefuncdatasym("gclocals·%d", FUNCDATA_LocalsPointerMaps); /* * isolate first argument @@ -166,7 +171,8 @@ codgen(Node *n, Node *nn) maxargsafe = xround(maxargsafe, 8); sp->to.offset += maxargsafe; - dumpgcargs(thisfn, gcsym); + if(!isvarargs) + dumpgcargs(thisfn, gcargs); // TODO(rsc): "stkoff" is not right. It does not account for // the possibility of data stored in .safe variables. @@ -177,9 +183,9 @@ codgen(Node *n, Node *nn) // area its own section. // That said, we've been using stkoff for months // and nothing too terrible has happened. - gextern(gclocalssym, nodconst(-stkoff), 0, 4); // locals - gclocalssym->type = typ(0, T); - gclocalssym->type->width = 4; + gextern(gclocals, nodconst(-stkoff), 0, 4); // locals + gclocals->type = typ(0, T); + gclocals->type->width = 4; } void @@ -715,39 +721,39 @@ dumpgcargs(Type *fn, Sym *sym) int32 argbytes; int32 symoffset, argoffset; - if(hasdotdotdot()) { - // give up for C vararg functions. - // TODO: maybe make a map just for the args we do know? - gextern(sym, nodconst(0), 0, 4); // nptrs=0 - symoffset = 4; - } else { - argbytes = (argsize() + ewidth[TIND] - 1); - bv = bvalloc((argbytes / ewidth[TIND]) * BitsPerPointer); - argoffset = align(0, fn->link, Aarg0, nil); - if(argoffset > 0) { - // The C calling convention returns structs by - // copying them to a location pointed to by a - // hidden first argument. This first argument - // is a pointer. - if(argoffset != ewidth[TIND]) - yyerror("passbyptr arg not the right size"); - bvset(bv, 0); - } - for(t = fn->down; t != T; t = t->down) { - if(t->etype == TVOID) - continue; - argoffset = align(argoffset, t, Aarg1, nil); - walktype1(t, argoffset, bv, 1); - argoffset = align(argoffset, t, Aarg2, nil); - } - gextern(sym, nodconst(bv->n), 0, 4); - symoffset = 4; - for(i = 0; i < bv->n; i += 32) { - gextern(sym, nodconst(bv->b[i/32]), symoffset, 4); - symoffset += 4; - } - free(bv); + // Dump the length of the bitmap array. This value is always one for + // functions written in C. + symoffset = 0; + gextern(sym, nodconst(1), symoffset, 4); + symoffset += 4; + argbytes = (argsize() + ewidth[TIND] - 1); + bv = bvalloc((argbytes / ewidth[TIND]) * BitsPerPointer); + argoffset = align(0, fn->link, Aarg0, nil); + if(argoffset > 0) { + // The C calling convention returns structs by copying them to a + // location pointed to by a hidden first argument. This first + // argument is a pointer. + if(argoffset != ewidth[TIND]) + yyerror("passbyptr arg not the right size"); + bvset(bv, 0); + } + for(t = fn->down; t != T; t = t->down) { + if(t->etype == TVOID) + continue; + argoffset = align(argoffset, t, Aarg1, nil); + walktype1(t, argoffset, bv, 1); + argoffset = align(argoffset, t, Aarg2, nil); + } + // Dump the length of the bitmap. + gextern(sym, nodconst(bv->n), symoffset, 4); + symoffset += 4; + // Dump the words of the bitmap. + for(i = 0; i < bv->n; i += 32) { + gextern(sym, nodconst(bv->b[i/32]), symoffset, 4); + symoffset += 4; } + free(bv); + // Finalize the gc symbol. sym->type = typ(0, T); sym->type->width = symoffset; } diff --git a/src/cmd/dist/build.c b/src/cmd/dist/build.c index e6e5f0cf7..e3b3c73af 100644 --- a/src/cmd/dist/build.c +++ b/src/cmd/dist/build.c @@ -452,7 +452,7 @@ static char *proto_gccargs[] = { // Fix available at http://patchwork.ozlabs.org/patch/64562/. "-O1", #else - "-O2", + "-O0", #endif }; @@ -500,6 +500,7 @@ static struct { {"cmd/gc", { "-cplx.c", "-pgen.c", + "-plive.c", "-popt.c", "-y1.tab.c", // makefile dreg "opnames.h", @@ -525,6 +526,7 @@ static struct { {"cmd/5g", { "../gc/cplx.c", "../gc/pgen.c", + "../gc/plive.c", "../gc/popt.c", "../gc/popt.h", "../5l/enam.c", @@ -533,6 +535,7 @@ static struct { {"cmd/6g", { "../gc/cplx.c", "../gc/pgen.c", + "../gc/plive.c", "../gc/popt.c", "../gc/popt.h", "../6l/enam.c", @@ -541,6 +544,7 @@ static struct { {"cmd/8g", { "../gc/cplx.c", "../gc/pgen.c", + "../gc/plive.c", "../gc/popt.c", "../gc/popt.h", "../8l/enam.c", diff --git a/src/cmd/gc/array.c b/src/cmd/gc/array.c new file mode 100644 index 000000000..5e53c1ff0 --- /dev/null +++ b/src/cmd/gc/array.c @@ -0,0 +1,129 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include <u.h> +#include <libc.h> +#include "go.h" + +enum { + DEFAULTCAPACITY = 16, +}; + +struct Array +{ + int32 length; // number of elements + int32 size; // element size + int32 capacity; // size of data in elements + char *data; // element storage +}; + +Array* +arraynew(int32 capacity, int32 size) +{ + Array *result; + + if(capacity < 0) + fatal("arraynew: capacity %d is not positive", capacity); + if(size < 0) + fatal("arraynew: size %d is not positive\n", size); + result = malloc(sizeof(*result)); + if(result == nil) + fatal("arraynew: malloc failed\n"); + result->length = 0; + result->size = size; + result->capacity = capacity == 0 ? DEFAULTCAPACITY : capacity; + result->data = malloc(result->capacity * result->size); + if(result->data == nil) + fatal("arraynew: malloc failed\n"); + return result; +} + +void +arrayfree(Array *array) +{ + if(array == nil) + return; + free(array->data); + free(array); +} + +int32 +arraylength(Array *array) +{ + return array->length; +} + +void* +arrayget(Array *array, int32 index) +{ + if(array == nil) + fatal("arrayget: array is nil\n"); + if(index < 0 || index >= array->length) + fatal("arrayget: index %d is out of bounds for length %d\n", index, array->length); + return array->data + index * array->size; +} + +void +arrayset(Array *array, int32 index, void *element) +{ + if(array == nil) + fatal("arrayset: array is nil\n"); + if(element == nil) + fatal("arrayset: element is nil\n"); + if(index < 0 || index >= array->length) + fatal("arrayget: index %d is out of bounds for length %d\n", index, array->length); + memmove(array->data + index * array->size, element, array->size); +} + +static void +ensurecapacity(Array *array, int32 capacity) +{ + int32 newcapacity; + char *newdata; + + if(array == nil) + fatal("ensurecapacity: array is nil\n"); + if(capacity < 0) + fatal("ensurecapacity: capacity %d is not positive", capacity); + if(capacity >= array->capacity) { + newcapacity = capacity + (capacity >> 1); + newdata = realloc(array->data, newcapacity * array->size); + if(newdata == nil) + fatal("ensurecapacity: realloc failed\n"); + array->capacity = newcapacity; + array->data = newdata; + } +} + +void +arrayadd(Array *array, void *element) +{ + if(array == nil) + fatal("arrayset: array is nil\n"); + if(element == nil) + fatal("arrayset: element is nil\n"); + ensurecapacity(array, array->length + 1); + array->length++; + arrayset(array, array->length - 1, element); +} + +int32 +arrayindexof(Array *array, void *element) +{ + void *p; + int32 i; + + for(i = 0; i < array->length; i++) { + p = arrayget(array, i); + if(memcmp(p, &element, array->size) == 0) + return i; + } + return -1; +} + +void +arraysort(Array *array, int (*cmp)(const void*, const void*)) +{ + qsort(array->data, array->length, array->size, cmp); +} diff --git a/src/cmd/gc/bv.c b/src/cmd/gc/bv.c index 92834a97b..847e777ee 100644 --- a/src/cmd/gc/bv.c +++ b/src/cmd/gc/bv.c @@ -11,12 +11,24 @@ enum { WORDBITS = 32, }; -uintptr +static uintptr bvsize(uintptr n) { return ((n + WORDBITS - 1) / WORDBITS) * WORDSIZE; } +int32 +bvbits(Bvec *bv) +{ + return bv->n; +} + +int32 +bvwords(Bvec *bv) +{ + return (bv->n + WORDBITS - 1) / WORDBITS; +} + Bvec* bvalloc(int32 n) { @@ -34,26 +46,49 @@ bvalloc(int32 n) return bv; } +/* difference */ void -bvset(Bvec *bv, int32 i) +bvandnot(Bvec *dst, Bvec *src1, Bvec *src2) { - uint32 mask; + int32 i, w; - if(i < 0 || i >= bv->n) - fatal("bvset: index %d is out of bounds with length %d\n", i, bv->n); - mask = 1U << (i % WORDBITS); - bv->b[i / WORDBITS] |= mask; + if(dst->n != src1->n || dst->n != src2->n) + fatal("bvand: lengths %d, %d, and %d are not equal", dst->n, src1->n, src2->n); + for(i = 0, w = 0; i < dst->n; i += WORDBITS, w++) + dst->b[w] = src1->b[w] & ~src2->b[w]; +} + +int +bvcmp(Bvec *bv1, Bvec *bv2) +{ + uintptr nbytes; + + if(bv1->n != bv2->n) + fatal("bvequal: lengths %d and %d are not equal", bv1->n, bv2->n); + nbytes = bvsize(bv1->n); + return memcmp(bv1->b, bv2->b, nbytes); } void -bvres(Bvec *bv, int32 i) +bvcopy(Bvec *dst, Bvec *src) { - uint32 mask; + memmove(dst->b, src->b, bvsize(dst->n)); +} - if(i < 0 || i >= bv->n) - fatal("bvres: index %d is out of bounds with length %d\n", i, bv->n); - mask = ~(1 << (i % WORDBITS)); - bv->b[i / WORDBITS] &= mask; +Bvec* +bvconcat(Bvec *src1, Bvec *src2) +{ + Bvec *dst; + int32 i; + + dst = bvalloc(src1->n + src2->n); + for(i = 0; i < src1->n; i++) + if(bvget(src1, i)) + bvset(dst, i); + for(i = 0; i < src2->n; i++) + if(bvget(src2, i)) + bvset(dst, i + src1->n); + return dst; } int @@ -78,3 +113,62 @@ bvisempty(Bvec *bv) return 0; return 1; } + +void +bvnot(Bvec *bv) +{ + int32 i, w; + + for(i = 0, w = 0; i < bv->n; i += WORDBITS, w++) + bv->b[w] = ~bv->b[w]; +} + +/* union */ +void +bvor(Bvec *dst, Bvec *src1, Bvec *src2) +{ + int32 i, w; + + if(dst->n != src1->n || dst->n != src2->n) + fatal("bvor: lengths %d, %d, and %d are not equal", dst->n, src1->n, src2->n); + for(i = 0, w = 0; i < dst->n; i += WORDBITS, w++) + dst->b[w] = src1->b[w] | src2->b[w]; +} + +void +bvprint(Bvec *bv) +{ + int32 i; + + print("#*"); + for(i = 0; i < bv->n; i++) + print("%d", bvget(bv, i)); +} + +void +bvreset(Bvec *bv, int32 i) +{ + uint32 mask; + + if(i < 0 || i >= bv->n) + fatal("bvreset: index %d is out of bounds with length %d\n", i, bv->n); + mask = ~(1 << (i % WORDBITS)); + bv->b[i / WORDBITS] &= mask; +} + +void +bvresetall(Bvec *bv) +{ + memset(bv->b, 0x00, bvsize(bv->n)); +} + +void +bvset(Bvec *bv, int32 i) +{ + uint32 mask; + + if(i < 0 || i >= bv->n) + fatal("bvset: index %d is out of bounds with length %d\n", i, bv->n); + mask = 1U << (i % WORDBITS); + bv->b[i / WORDBITS] |= mask; +} diff --git a/src/cmd/gc/gen.c b/src/cmd/gc/gen.c index ada16eacc..49b3f7a99 100644 --- a/src/cmd/gc/gen.c +++ b/src/cmd/gc/gen.c @@ -767,6 +767,8 @@ cgen_eface(Node *n, Node *res) * so it's important that it is done first */ Node dst; + + gfatvardef(res); dst = *res; dst.type = types[tptr]; dst.xoffset += widthptr; @@ -795,6 +797,8 @@ cgen_slice(Node *n, Node *res) if(n->list->next->next) offs = n->list->next->next->n; + gfatvardef(res); + // dst.len = hi [ - lo ] dst = *res; dst.xoffset += Array_nel; diff --git a/src/cmd/gc/go.h b/src/cmd/gc/go.h index 821f30492..d4afd35c9 100644 --- a/src/cmd/gc/go.h +++ b/src/cmd/gc/go.h @@ -129,6 +129,7 @@ struct Val } u; }; +typedef struct Array Array; typedef struct Bvec Bvec; typedef struct Pkg Pkg; typedef struct Sym Sym; @@ -1004,6 +1005,18 @@ vlong rnd(vlong o, vlong r); void typeinit(void); /* + * array.c + */ +Array* arraynew(int32 capacity, int32 size); +void arrayfree(Array *array); +int32 arraylength(Array *array); +void* arrayget(Array *array, int32 index); +void arrayset(Array *array, int32 index, void *element); +void arrayadd(Array *array, void *element); +int32 arrayindexof(Array* array, void *element); +void arraysort(Array* array, int (*cmp)(const void*, const void*)); + +/* * bits.c */ int Qconv(Fmt *fp); @@ -1021,11 +1034,18 @@ int bset(Bits a, uint n); * bv.c */ Bvec* bvalloc(int32 n); -void bvset(Bvec *bv, int32 i); -void bvres(Bvec *bv, int32 i); +void bvandnot(Bvec *dst, Bvec *src1, Bvec *src2); +int bvcmp(Bvec *bv1, Bvec *bv2); +void bvcopy(Bvec *dst, Bvec *src); +Bvec* bvconcat(Bvec *src1, Bvec *src2); int bvget(Bvec *bv, int32 i); int bvisempty(Bvec *bv); -int bvcmp(Bvec *bv1, Bvec *bv2); +void bvnot(Bvec *bv); +void bvor(Bvec *dst, Bvec *src1, Bvec *src2); +void bvprint(Bvec *bv); +void bvreset(Bvec *bv, int32 i); +void bvresetall(Bvec *bv); +void bvset(Bvec *bv, int32 i); /* * closure.c @@ -1439,7 +1459,7 @@ Node* conv(Node*, Type*); int candiscard(Node*); /* - * arch-specific ggen.c/gsubr.c/gobj.c/pgen.c + * arch-specific ggen.c/gsubr.c/gobj.c/pgen.c/plive.c */ #define P ((Prog*)0) @@ -1477,7 +1497,7 @@ void cgen_checknil(Node*); void cgen_ret(Node *n); void clearfat(Node *n); void compile(Node*); -void defframe(Prog*, Bvec*); +void defframe(Prog*); int dgostringptr(Sym*, int off, char *str); int dgostrlitptr(Sym*, int off, Strlit*); int dstringptr(Sym *s, int off, char *str); @@ -1491,10 +1511,12 @@ void gdatacomplex(Node*, Mpcplx*); void gdatastring(Node*, Strlit*); void ggloblnod(Node *nam); void ggloblsym(Sym *s, int32 width, int dupok, int rodata); +void gfatvardef(Node*); Prog* gjmp(Prog*); void gused(Node*); void movelarge(NodeList*); int isfat(Type*); +void liveness(Node*, Prog*, Sym*, Sym*, Sym*); void markautoused(Prog*); Plist* newplist(void); Node* nodarg(Type*, int); diff --git a/src/cmd/gc/pgen.c b/src/cmd/gc/pgen.c index 2d364529e..c9ba89397 100644 --- a/src/cmd/gc/pgen.c +++ b/src/cmd/gc/pgen.c @@ -12,26 +12,66 @@ #include "opt.h" #include "../../pkg/runtime/funcdata.h" -enum { BitsPerPointer = 2 }; - static void allocauto(Prog* p); -static void dumpgcargs(Node*, Sym*); -static Bvec* dumpgclocals(Node*, Sym*); + +static Sym* +makefuncdatasym(char *namefmt, int64 funcdatakind) +{ + Node nod; + Node *pnod; + Sym *sym; + static int32 nsym; + + snprint(namebuf, sizeof(namebuf), namefmt, nsym++); + sym = lookup(namebuf); + pnod = newname(sym); + pnod->class = PEXTERN; + nodconst(&nod, types[TINT32], funcdatakind); + gins(AFUNCDATA, &nod, pnod); + return sym; +} + +void +gfatvardef(Node *n) +{ + if(n == N || !isfat(n->type)) + fatal("gfatvardef: node is not fat"); + switch(n->class) { + case PAUTO: + case PPARAM: + case PPARAMOUT: + gins(AFATVARDEF, N, n); + } +} + +static void +removefatvardef(Prog *firstp) +{ + Prog *p; + + for(p = firstp; p != P; p = p->link) { + while(p->link != P && p->link->as == AFATVARDEF) + p->link = p->link->link; + if(p->to.type == D_BRANCH) + while(p->to.u.branch != P && p->to.u.branch->as == AFATVARDEF) + p->to.u.branch = p->to.u.branch->link; + } +} void compile(Node *fn) { - Bvec *bv; Plist *pl; - Node nod1, *n, *gcargsnod, *gclocalsnod; + Node nod1, *n; Prog *ptxt, *p, *p1; int32 lno; Type *t; Iter save; vlong oldstksize; NodeList *l; - Sym *gcargssym, *gclocalssym; - static int ngcargs, ngclocals; + Sym *gcargs; + Sym *gclocals; + Sym *gcdead; if(newproc == N) { newproc = sysfunc("newproc"); @@ -111,21 +151,9 @@ compile(Node *fn) ginit(); - snprint(namebuf, sizeof namebuf, "gcargs·%d", ngcargs++); - gcargssym = lookup(namebuf); - gcargsnod = newname(gcargssym); - gcargsnod->class = PEXTERN; - - nodconst(&nod1, types[TINT32], FUNCDATA_GCArgs); - gins(AFUNCDATA, &nod1, gcargsnod); - - snprint(namebuf, sizeof(namebuf), "gclocals·%d", ngclocals++); - gclocalssym = lookup(namebuf); - gclocalsnod = newname(gclocalssym); - gclocalsnod->class = PEXTERN; - - nodconst(&nod1, types[TINT32], FUNCDATA_GCLocals); - gins(AFUNCDATA, &nod1, gclocalsnod); + gcargs = makefuncdatasym("gcargs·%d", FUNCDATA_ArgsPointerMaps); + gclocals = makefuncdatasym("gclocals·%d", FUNCDATA_LocalsPointerMaps); + gcdead = makefuncdatasym("gcdead·%d", FUNCDATA_DeadPointerMaps); for(t=curfn->paramfld; t; t=t->down) gtrack(tracksym(t->type)); @@ -184,6 +212,7 @@ compile(Node *fn) pc->as = ARET; // overwrite AEND pc->lineno = lineno; + fixjmp(ptxt); if(!debug['N'] || debug['R'] || debug['P']) { regopt(ptxt); nilopt(ptxt); @@ -203,184 +232,19 @@ compile(Node *fn) } // Emit garbage collection symbols. - dumpgcargs(fn, gcargssym); - bv = dumpgclocals(curfn, gclocalssym); + liveness(curfn, ptxt, gcargs, gclocals, gcdead); - defframe(ptxt, bv); - free(bv); + defframe(ptxt); if(0) frame(0); + // Remove leftover instrumentation from the instruction stream. + removefatvardef(ptxt); ret: lineno = lno; } -static void -walktype1(Type *t, vlong *xoffset, Bvec *bv) -{ - vlong fieldoffset, i, o; - Type *t1; - - if(t->align > 0 && (*xoffset % t->align) != 0) - fatal("walktype1: invalid initial alignment, %T", t); - - switch(t->etype) { - case TINT8: - case TUINT8: - case TINT16: - case TUINT16: - case TINT32: - case TUINT32: - case TINT64: - case TUINT64: - case TINT: - case TUINT: - case TUINTPTR: - case TBOOL: - case TFLOAT32: - case TFLOAT64: - case TCOMPLEX64: - case TCOMPLEX128: - *xoffset += t->width; - break; - - case TPTR32: - case TPTR64: - case TUNSAFEPTR: - case TFUNC: - case TCHAN: - case TMAP: - if(*xoffset % widthptr != 0) - fatal("walktype1: invalid alignment, %T", t); - bvset(bv, (*xoffset / widthptr) * BitsPerPointer); - *xoffset += t->width; - break; - - case TSTRING: - // struct { byte *str; intgo len; } - if(*xoffset % widthptr != 0) - fatal("walktype1: invalid alignment, %T", t); - bvset(bv, (*xoffset / widthptr) * BitsPerPointer); - *xoffset += t->width; - break; - - case TINTER: - // struct { Itab* tab; union { void* ptr, uintptr val } data; } - // or, when isnilinter(t)==true: - // struct { Type* type; union { void* ptr, uintptr val } data; } - if(*xoffset % widthptr != 0) - fatal("walktype1: invalid alignment, %T", t); - bvset(bv, ((*xoffset / widthptr) * BitsPerPointer) + 1); - if(isnilinter(t)) - bvset(bv, ((*xoffset / widthptr) * BitsPerPointer)); - *xoffset += t->width; - break; - - case TARRAY: - // The value of t->bound is -1 for slices types and >0 for - // for fixed array types. All other values are invalid. - if(t->bound < -1) - fatal("walktype1: invalid bound, %T", t); - if(isslice(t)) { - // struct { byte* array; uintgo len; uintgo cap; } - if(*xoffset % widthptr != 0) - fatal("walktype1: invalid TARRAY alignment, %T", t); - bvset(bv, (*xoffset / widthptr) * BitsPerPointer); - *xoffset += t->width; - } else if(!haspointers(t->type)) - *xoffset += t->width; - else - for(i = 0; i < t->bound; ++i) - walktype1(t->type, xoffset, bv); - break; - - case TSTRUCT: - o = 0; - for(t1 = t->type; t1 != T; t1 = t1->down) { - fieldoffset = t1->width; - *xoffset += fieldoffset - o; - walktype1(t1->type, xoffset, bv); - o = fieldoffset + t1->type->width; - } - *xoffset += t->width - o; - break; - - default: - fatal("walktype1: unexpected type, %T", t); - } -} - -static void -walktype(Type *type, Bvec *bv) -{ - vlong xoffset; - - // Start the walk at offset 0. The correct offset will be - // filled in by the first type encountered during the walk. - xoffset = 0; - walktype1(type, &xoffset, bv); -} - -// Compute a bit vector to describe the pointer-containing locations -// in the in and out argument list and dump the bitvector length and -// data to the provided symbol. -static void -dumpgcargs(Node *fn, Sym *sym) -{ - Type *thistype, *inargtype, *outargtype; - Bvec *bv; - int32 i; - int off; - - thistype = getthisx(fn->type); - inargtype = getinargx(fn->type); - outargtype = getoutargx(fn->type); - bv = bvalloc((fn->type->argwid / widthptr) * BitsPerPointer); - if(thistype != nil) - walktype(thistype, bv); - if(inargtype != nil) - walktype(inargtype, bv); - if(outargtype != nil) - walktype(outargtype, bv); - off = duint32(sym, 0, bv->n); - for(i = 0; i < bv->n; i += 32) - off = duint32(sym, off, bv->b[i/32]); - free(bv); - ggloblsym(sym, off, 0, 1); -} - -// Compute a bit vector to describe the pointer-containing locations -// in local variables and dump the bitvector length and data out to -// the provided symbol. Return the vector for use and freeing by caller. -static Bvec* -dumpgclocals(Node* fn, Sym *sym) -{ - Bvec *bv; - NodeList *ll; - Node *node; - vlong xoffset; - int32 i; - int off; - - bv = bvalloc((stkptrsize / widthptr) * BitsPerPointer); - for(ll = fn->dcl; ll != nil; ll = ll->next) { - node = ll->n; - if(node->class == PAUTO && node->op == ONAME) { - if(haspointers(node->type)) { - xoffset = node->xoffset + stkptrsize; - walktype1(node->type, &xoffset, bv); - } - } - } - off = duint32(sym, 0, bv->n); - for(i = 0; i < bv->n; i += 32) { - off = duint32(sym, off, bv->b[i/32]); - } - ggloblsym(sym, off, 0, 1); - return bv; -} - // Sort the list of stack variables. Autos after anything else, // within autos, unused after used, within used, things with // pointers first, zeroed things first, and then decreasing size. diff --git a/src/cmd/gc/plive.c b/src/cmd/gc/plive.c new file mode 100644 index 000000000..a309db9f6 --- /dev/null +++ b/src/cmd/gc/plive.c @@ -0,0 +1,1489 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include <u.h> +#include <libc.h> +#include "gg.h" +#include "opt.h" +#include "../../pkg/runtime/funcdata.h" + +enum { BitsPerPointer = 2 }; + +enum { + UNVISITED = 0, + VISITED = 1, +}; + +// An ordinary basic block. +// +// Instructions are threaded together in a doubly-linked list. To iterate in +// program order follow the link pointer from the first node and stop after the +// last node has been visited +// +// for(p = bb->first;; p = p->link) { +// ... +// if(p == bb->last) +// break; +// } +// +// To iterate in reverse program order by following the opt pointer from the +// last node +// +// for(p = bb->last; p != nil; p = p->opt) { +// ... +// } +typedef struct BasicBlock BasicBlock; +struct BasicBlock { + // An array of preceding blocks. If the length of this array is 0 the + // block is probably the start block of the CFG. + Array *pred; + + // An array out succeeding blocks. If the length of this array is zero, + // the block probably ends in a return instruction. + Array *succ; + + // First instruction in the block. When part of a fully initialized + // control flow graph, the opt member will be nil. + Prog *first; + + // Last instruction in the basic block. + Prog *last; + + // The reverse post order number. This value is initialized to -1 and + // will be replaced by a non-negative value when the CFG is constructed. + // After CFG construction, if rpo is -1 this block is unreachable. + int rpo; + + // State to denote whether the block has been visited during a + // traversal. + int mark; +}; + +// A collection of global state used by liveness analysis. +typedef struct Liveness Liveness; +struct Liveness { + // A pointer to the node corresponding to the function being analyzed. + Node *fn; + + // A linked list of instructions for this function. + Prog *ptxt; + + // A list of arguments and local variables in this function. + Array *vars; + + // A list of basic blocks that are overlayed on the instruction list. + Array *cfg; + + // Summary sets of block effects. The upward exposed variables and + // variables killed sets are computed during the dataflow prologue. The + // live in and live out are solved for and serialized in the epilogue. + Bvec **uevar; + Bvec **varkill; + Bvec **livein; + Bvec **liveout; + + // An array with a bit vector for each safe point tracking live pointers + // in the arguments and locals area. + Array *argslivepointers; + Array *livepointers; + + // An array with a bit vector for each safe point tracking dead values + // pointers in the arguments and locals area. + Array *argsdeadvalues; + Array *deadvalues; +}; + +static int printnoise = 0; + +static void* +xmalloc(uintptr size) +{ + void *result; + + result = malloc(size); + if(result == nil) + fatal("malloc failed"); + return result; +} + +// Constructs a new basic block containing a single instruction. +static BasicBlock* +newblock(Prog *prog) +{ + BasicBlock *result; + + if(prog == nil) + fatal("newblock: prog cannot be nil"); + result = xmalloc(sizeof(*result)); + result->rpo = -1; + result->mark = UNVISITED; + result->first = prog; + result->last = prog; + result->pred = arraynew(2, sizeof(BasicBlock*)); + result->succ = arraynew(2, sizeof(BasicBlock*)); + return result; +} + +// Frees a basic block and all of its leaf data structures. +static void +freeblock(BasicBlock *bb) +{ + if(bb == nil) + fatal("freeblock: cannot free nil"); + arrayfree(bb->pred); + arrayfree(bb->succ); + free(bb); +} + +// Adds an edge between two basic blocks by making from a predecessor of to and +// to a successor of from. +static void +addedge(BasicBlock *from, BasicBlock *to) +{ + if(from == nil) + fatal("addedge: from is nil"); + if(to == nil) + fatal("addedge: to is nil"); + arrayadd(from->succ, &to); + arrayadd(to->pred, &from); +} + +// Inserts a new instruction ahead of an existing instruction in the instruction +// stream. Any control flow, such as branches or fall throughs, that target the +// existing instruction are adjusted to target the new instruction. +static void +splicebefore(Liveness *lv, BasicBlock *bb, Prog *prev, Prog *curr) +{ + Prog *p; + + prev->opt = curr->opt; + curr->opt = prev; + prev->link = curr; + if(prev->opt != nil) + ((Prog*)prev->opt)->link = prev; + else + bb->first = prev; + for(p = lv->ptxt; p != nil; p = p->link) { + if(p != prev) { + if(p->link == curr) + p->link = prev; + if(p->as != ACALL && p->to.type == D_BRANCH && p->to.u.branch == curr) + p->to.u.branch = prev; + } + } +} + +// A pretty printer for basic blocks. +static void +printblock(BasicBlock *bb) +{ + BasicBlock *pred; + BasicBlock *succ; + Prog *prog; + int i; + + print("basic block %d\n", bb->rpo); + print("\tpred:"); + for(i = 0; i < arraylength(bb->pred); i++) { + pred = *(BasicBlock**)arrayget(bb->pred, i); + print(" %d", pred->rpo); + } + print("\n"); + print("\tsucc:"); + for(i = 0; i < arraylength(bb->succ); i++) { + succ = *(BasicBlock**)arrayget(bb->succ, i); + print(" %d", succ->rpo); + } + print("\n"); + print("\tprog:\n"); + for(prog = bb->first;; prog=prog->link) { + print("\t\t%P\n", prog); + if(prog == bb->last) + break; + } +} + + +// Iterates over a basic block applying a callback to each instruction. There +// are two criteria for termination. If the end of basic block is reached a +// value of zero is returned. If the callback returns a non-zero value, the +// iteration is stopped and the value of the callback is returned. +static int +blockany(BasicBlock *bb, int (*callback)(Prog*)) +{ + Prog *p; + int result; + + for(p = bb->last; p != nil; p = p->opt) { + result = (*callback)(p); + if(result != 0) + return result; + } + return 0; +} + +// Collects and returns and array of Node*s for functions arguments and local +// variables. TODO(cshapiro): only return pointer containing nodes. +static Array* +getvariables(Node *fn) +{ + Array *result; + NodeList *ll; + + result = arraynew(0, sizeof(Node*)); + for(ll = fn->dcl; ll != nil; ll = ll->next) { + if(ll->n->op == ONAME) { + switch(ll->n->class & ~PHEAP) { + case PAUTO: + case PPARAM: + case PPARAMOUT: + arrayadd(result, &ll->n); + } + } + } + return result; +} + +// A pretty printer for control flow graphs. Takes an array of BasicBlock*s. +static void +printcfg(Array *cfg) +{ + BasicBlock *bb; + int32 i; + + for(i = 0; i < arraylength(cfg); i++) { + bb = *(BasicBlock**)arrayget(cfg, i); + printblock(bb); + } +} + +// Assigns a reverse post order number to each connected basic block using the +// standard algorithm. Unconnected blocks will not be affected. +static void +reversepostorder(BasicBlock *root, int32 *rpo) +{ + BasicBlock *bb; + int i; + + root->mark = VISITED; + for(i = 0; i < arraylength(root->succ); i++) { + bb = *(BasicBlock**)arrayget(root->succ, i); + if(bb->mark == UNVISITED) + reversepostorder(bb, rpo); + } + *rpo -= 1; + root->rpo = *rpo; +} + +// Comparison predicate used for sorting basic blocks by their rpo in ascending +// order. +static int +blockrpocmp(const void *p1, const void *p2) +{ + BasicBlock *bb1; + BasicBlock *bb2; + + bb1 = *(BasicBlock**)p1; + bb2 = *(BasicBlock**)p2; + if(bb1->rpo < bb2->rpo) + return -1; + if(bb1->rpo > bb2->rpo) + return 1; + return 0; +} + +// A pattern matcher for call instructions. Returns true when the instruction +// is a call to a specific package qualified function name. +static int +iscall(Prog *prog, Sym *name) +{ + if(prog == nil) + fatal("iscall: prog is nil"); + if(name == nil) + fatal("iscall: function name is nil"); + if(prog->as != ACALL) + return 0; + return name == prog->to.sym; +} + +// Returns true for instructions that call a runtime function implementing a +// select communication clause. +static int +isselectcommcasecall(Prog *prog) +{ + static Sym* names[5]; + int32 i; + + if(names[0] == nil) { + names[0] = pkglookup("selectsend", runtimepkg); + names[1] = pkglookup("selectrecv", runtimepkg); + names[2] = pkglookup("selectrecv2", runtimepkg); + names[3] = pkglookup("selectdefault", runtimepkg); + } + for(i = 0; names[i] != nil; i++) + if(iscall(prog, names[i])) + return 1; + return 0; +} + +// Returns true for call instructions that target runtime·newselect. +static int +isnewselect(Prog *prog) +{ + static Sym *sym; + + if(sym == nil) + sym = pkglookup("newselect", runtimepkg); + return iscall(prog, sym); +} + +// Returns true for call instructions that target runtime·selectgo. +static int +isselectgocall(Prog *prog) +{ + static Sym *sym; + + if(sym == nil) + sym = pkglookup("selectgo", runtimepkg); + return iscall(prog, sym); +} + +static int +isdeferreturn(Prog *prog) +{ + static Sym *sym; + + if(sym == nil) + sym = pkglookup("deferreturn", runtimepkg); + return iscall(prog, sym); +} + +// Walk backwards from a runtime·selectgo call up to its immediately dominating +// runtime·newselect call. Any successor nodes of communication clause nodes +// are implicit successors of the runtime·selectgo call node. The goal of this +// analysis is to add these missing edges to complete the control flow graph. +static void +addselectgosucc(BasicBlock *selectgo) +{ + BasicBlock *pred; + BasicBlock *succ; + + pred = selectgo; + for(;;) { + if(arraylength(pred->pred) == 0) + fatal("selectgo does not have a newselect"); + pred = *(BasicBlock**)arrayget(pred->pred, 0); + if(blockany(pred, isselectcommcasecall)) { + // A select comm case block should have exactly one + // successor. + if(arraylength(pred->succ) != 1) + fatal("select comm case has too many successors"); + succ = *(BasicBlock**)arrayget(pred->succ, 0); + // Its successor should have exactly two successors. + // The drop through should flow to the selectgo block + // and the branch should lead to the select case + // statements block. + if(arraylength(succ->succ) != 2) + fatal("select comm case successor has too many successors"); + // Add the block as a successor of the selectgo block. + addedge(selectgo, succ); + } + if(blockany(pred, isnewselect)) { + // Reached the matching newselect. + break; + } + } +} + +// The entry point for the missing selectgo control flow algorithm. Takes an +// array of BasicBlock*s containing selectgo calls. +static void +fixselectgo(Array *selectgo) +{ + BasicBlock *bb; + int32 i; + + for(i = 0; i < arraylength(selectgo); i++) { + bb = *(BasicBlock**)arrayget(selectgo, i); + addselectgosucc(bb); + } +} + +// Constructs a control flow graph from a sequence of instructions. This +// procedure is complicated by various sources of implicit control flow that are +// not accounted for using the standard cfg construction algorithm. Returns an +// array of BasicBlock*s in control flow graph form (basic blocks ordered by +// their RPO number). +static Array* +newcfg(Prog *firstp) +{ + Prog *p; + Prog *prev; + BasicBlock *bb; + Array *cfg; + Array *selectgo; + int32 i; + int32 rpo; + + // Reset the opt field of each prog to nil. In the first and second + // passes, instructions that are labels temporarily use the opt field to + // point to their basic block. In the third pass, the opt field reset + // to point to the predecessor of an instruction in its basic block. + for(p = firstp; p != P; p = p->link) + p->opt = nil; + + // Allocate an array to remember where we have seen selectgo calls. + // These blocks will be revisited to add successor control flow edges. + selectgo = arraynew(0, sizeof(BasicBlock*)); + + // Loop through all instructions identifying branch targets + // and fall-throughs and allocate basic blocks. + cfg = arraynew(0, sizeof(BasicBlock*)); + bb = newblock(firstp); + arrayadd(cfg, &bb); + for(p = firstp; p != P; p = p->link) { + if(p->to.type == D_BRANCH) { + if(p->to.u.branch == nil) + fatal("prog branch to nil"); + if(p->to.u.branch->opt == nil) { + p->to.u.branch->opt = newblock(p->to.u.branch); + arrayadd(cfg, &p->to.u.branch->opt); + } + if(p->as != AJMP && p->link != nil && p->link->opt == nil) { + p->link->opt = newblock(p->link); + arrayadd(cfg, &p->link->opt); + } + } else if(isselectcommcasecall(p) || isselectgocall(p)) { + // Accommodate implicit selectgo control flow. + if(p->link->opt == nil) { + p->link->opt = newblock(p->link); + arrayadd(cfg, &p->link->opt); + } + } + } + + // Loop through all basic blocks maximally growing the list of + // contained instructions until a label is reached. Add edges + // for branches and fall-through instructions. + for(i = 0; i < arraylength(cfg); i++) { + bb = *(BasicBlock**)arrayget(cfg, i); + for(p = bb->last; p != nil; p = p->link) { + if(p->opt != nil && p != bb->last) + break; + bb->last = p; + + // Pattern match an unconditional branch followed by a + // dead return instruction. This avoids a creating + // unreachable control flow nodes. + if(p->link != nil && p->link->link == nil) + if (p->as == AJMP && p->link->as == ARET && p->link->opt == nil) + break; + + // Collect basic blocks with selectgo calls. + if(isselectgocall(p)) + arrayadd(selectgo, &bb); + } + if(bb->last->to.type == D_BRANCH) + addedge(bb, bb->last->to.u.branch->opt); + if(bb->last->link != nil) { + // Add a fall-through when the instruction is + // not an unconditional control transfer. + switch(bb->last->as) { + case AJMP: + case ARET: + break; + default: + addedge(bb, bb->last->link->opt); + } + } + } + + // Add back links so the instructions in a basic block can be traversed + // backward. This is the final state of the instruction opt field. + for(i = 0; i < arraylength(cfg); i++) { + bb = *(BasicBlock**)arrayget(cfg, i); + p = bb->first; + prev = nil; + for(;;) { + p->opt = prev; + if(p == bb->last) + break; + prev = p; + p = p->link; + } + } + + // Add missing successor edges to the selectgo blocks. + if(arraylength(selectgo)) + fixselectgo(selectgo); + arrayfree(selectgo); + + // Find a depth-first order and assign a depth-first number to + // all basic blocks. + for(i = 0; i < arraylength(cfg); i++) { + bb = *(BasicBlock**)arrayget(cfg, i); + bb->mark = UNVISITED; + } + bb = *(BasicBlock**)arrayget(cfg, 0); + rpo = arraylength(cfg); + reversepostorder(bb, &rpo); + + // Sort the basic blocks by their depth first number. The + // array is now a depth-first spanning tree with the first + // node being the root. + arraysort(cfg, blockrpocmp); + bb = *(BasicBlock**)arrayget(cfg, 0); + + // Unreachable control flow nodes are indicated by a -1 in the rpo + // field. If we see these nodes something must have gone wrong in an + // upstream compilation phase. + if(bb->rpo == -1) + fatal("newcfg: unreferenced basic blocks"); + + return cfg; +} + +// Frees a control flow graph (an array of BasicBlock*s) and all of its leaf +// data structures. +static void +freecfg(Array *cfg) +{ + BasicBlock *bb; + BasicBlock *bb0; + Prog *p; + int32 i; + int32 len; + + len = arraylength(cfg); + if(len > 0) { + bb0 = *(BasicBlock**)arrayget(cfg, 0); + for(p = bb0->first; p != P; p = p->link) { + p->opt = nil; + } + for(i = 0; i < len; i++) { + bb = *(BasicBlock**)arrayget(cfg, i); + freeblock(bb); + } + } + arrayfree(cfg); +} + +// Returns true if the node names a variable that is otherwise uninteresting to +// the liveness computation. +static int +isfunny(Node *node) +{ + char *names[] = { ".fp", ".args", "_", nil }; + int i; + + if(node->sym != nil && node->sym->name != nil) + for(i = 0; names[i] != nil; i++) + if(strcmp(node->sym->name, names[i]) == 0) + return 1; + return 0; +} + +// Computes the upward exposure and kill effects of an instruction on a set of +// variables. The vars argument is an array of Node*s. +static void +progeffects(Prog *prog, Array *vars, Bvec *uevar, Bvec *varkill) +{ + ProgInfo info; + Adr *from; + Adr *to; + Node *node; + int32 i; + int32 pos; + + bvresetall(uevar); + bvresetall(varkill); + proginfo(&info, prog); + if(prog->as == ARET) { + // Return instructions implicitly read all the arguments. For + // the sake of correctness, out arguments must be read. For the + // sake of backtrace quality, we read in arguments as well. + for(i = 0; i < arraylength(vars); i++) { + node = *(Node**)arrayget(vars, i); + switch(node->class & ~PHEAP) { + case PPARAM: + case PPARAMOUT: + bvset(uevar, i); + break; + case PAUTO: + // Because the lifetime of stack variables + // that have their address taken is undecidable, + // we conservatively assume their lifetime + // extends to the return as well. + if(isfat(node->type) || node->addrtaken) + bvset(uevar, i); + } + } + return; + } + if(prog->as == ATEXT) { + // A text instruction marks the entry point to a function and + // the definition point of all in arguments. + for(i = 0; i < arraylength(vars); i++) { + node = *(Node**)arrayget(vars, i); + switch(node->class & ~PHEAP) { + case PPARAM: + bvset(varkill, i); + } + } + return; + } + if(info.flags & (LeftRead | LeftWrite | LeftAddr)) { + from = &prog->from; + if (from->node != nil && !isfunny(from->node) && from->sym != nil) { + switch(prog->from.node->class & ~PHEAP) { + case PAUTO: + case PPARAM: + case PPARAMOUT: + pos = arrayindexof(vars, from->node); + if(pos == -1) + fatal("progeffects: variable %N is unknown", prog->from.node); + if(info.flags & (LeftRead | LeftAddr)) + bvset(uevar, pos); + if(info.flags & LeftWrite) + if(from->node != nil && (!isfat(from->node->type) || prog->as == AFATVARDEF)) + bvset(varkill, pos); + } + } + } + if(info.flags & (RightRead | RightWrite | RightAddr)) { + to = &prog->to; + if (to->node != nil && to->sym != nil && !isfunny(to->node)) { + switch(prog->to.node->class & ~PHEAP) { + case PAUTO: + case PPARAM: + case PPARAMOUT: + pos = arrayindexof(vars, to->node); + if(pos == -1) + fatal("progeffects: variable %N is unknown", to->node); + if(info.flags & (RightRead | RightAddr)) + bvset(uevar, pos); + if(info.flags & RightWrite) + if(to->node != nil && (!isfat(to->node->type) || prog->as == AFATVARDEF)) + bvset(varkill, pos); + } + } + } +} + +// Constructs a new liveness structure used to hold the global state of the +// liveness computation. The cfg argument is an array of BasicBlock*s and the +// vars argument is an array of Node*s. +static Liveness* +newliveness(Node *fn, Prog *ptxt, Array *cfg, Array *vars) +{ + Liveness *result; + int32 i; + int32 nblocks; + int32 nvars; + + result = xmalloc(sizeof(*result)); + result->fn = fn; + result->ptxt = ptxt; + result->cfg = cfg; + result->vars = vars; + + nblocks = arraylength(cfg); + result->uevar = xmalloc(sizeof(Bvec*) * nblocks); + result->varkill = xmalloc(sizeof(Bvec*) * nblocks); + result->livein = xmalloc(sizeof(Bvec*) * nblocks); + result->liveout = xmalloc(sizeof(Bvec*) * nblocks); + + nvars = arraylength(vars); + for(i = 0; i < nblocks; i++) { + result->uevar[i] = bvalloc(nvars); + result->varkill[i] = bvalloc(nvars); + result->livein[i] = bvalloc(nvars); + result->liveout[i] = bvalloc(nvars); + } + + result->livepointers = arraynew(0, sizeof(Bvec*)); + result->argslivepointers = arraynew(0, sizeof(Bvec*)); + result->deadvalues = arraynew(0, sizeof(Bvec*)); + result->argsdeadvalues = arraynew(0, sizeof(Bvec*)); + return result; +} + +// Frees the liveness structure and all of its leaf data structures. +static void +freeliveness(Liveness *lv) +{ + int32 i; + + if(lv == nil) + fatal("freeliveness: cannot free nil"); + + for(i = 0; i < arraylength(lv->livepointers); i++) + free(*(Bvec**)arrayget(lv->livepointers, i)); + arrayfree(lv->livepointers); + + for(i = 0; i < arraylength(lv->argslivepointers); i++) + free(*(Bvec**)arrayget(lv->argslivepointers, i)); + arrayfree(lv->argslivepointers); + + for(i = 0; i < arraylength(lv->deadvalues); i++) + free(*(Bvec**)arrayget(lv->deadvalues, i)); + arrayfree(lv->deadvalues); + + for(i = 0; i < arraylength(lv->argsdeadvalues); i++) + free(*(Bvec**)arrayget(lv->argsdeadvalues, i)); + arrayfree(lv->argsdeadvalues); + + for(i = 0; i < arraylength(lv->cfg); i++) { + free(lv->uevar[i]); + free(lv->varkill[i]); + free(lv->livein[i]); + free(lv->liveout[i]); + } + + free(lv->uevar); + free(lv->varkill); + free(lv->livein); + free(lv->liveout); + + free(lv); +} + +static void +printeffects(Prog *p, Bvec *uevar, Bvec *varkill) +{ + print("effects of %P", p); + print("\nuevar: "); + bvprint(uevar); + print("\nvarkill: "); + bvprint(varkill); + print("\n"); +} + +// Pretty print a variable node. Uses Pascal like conventions for pointers and +// addresses to avoid confusing the C like conventions used in the node variable +// names. +static void +printnode(Node *node) +{ + char *p; + char *a; + + p = haspointers(node->type) ? "^" : ""; + a = node->addrtaken ? "@" : ""; + print(" %N%s%s", node, p, a); +} + +// Pretty print a list of variables. The vars argument is an array of Node*s. +static void +printvars(char *name, Bvec *bv, Array *vars) +{ + int32 i; + + print("%s:", name); + for(i = 0; i < arraylength(vars); i++) + if(bvget(bv, i)) + printnode(*(Node**)arrayget(vars, i)); + print("\n"); +} + +// Prints a basic block annotated with the information computed by liveness +// analysis. +static void +livenessprintblock(Liveness *lv, BasicBlock *bb) +{ + BasicBlock *pred; + BasicBlock *succ; + Prog *prog; + Bvec *live; + int i; + int32 pos; + + print("basic block %d\n", bb->rpo); + + print("\tpred:"); + for(i = 0; i < arraylength(bb->pred); i++) { + pred = *(BasicBlock**)arrayget(bb->pred, i); + print(" %d", pred->rpo); + } + print("\n"); + + print("\tsucc:"); + for(i = 0; i < arraylength(bb->succ); i++) { + succ = *(BasicBlock**)arrayget(bb->succ, i); + print(" %d", succ->rpo); + } + print("\n"); + + printvars("\tuevar", lv->uevar[bb->rpo], lv->vars); + printvars("\tvarkill", lv->varkill[bb->rpo], lv->vars); + printvars("\tlivein", lv->livein[bb->rpo], lv->vars); + printvars("\tliveout", lv->liveout[bb->rpo], lv->vars); + + print("\tprog:\n"); + for(prog = bb->first;; prog = prog->link) { + print("\t\t%P", prog); + if(prog->as == APCDATA && prog->from.offset == PCDATA_StackMapIndex) { + pos = prog->to.offset; + live = *(Bvec**)arrayget(lv->livepointers, pos); + print(" "); + bvprint(live); + } + print("\n"); + if(prog == bb->last) + break; + } +} + +// Prints a control flow graph annotated with any information computed by +// liveness analysis. +static void +livenessprintcfg(Liveness *lv) +{ + BasicBlock *bb; + int32 i; + + for(i = 0; i < arraylength(lv->cfg); i++) { + bb = *(BasicBlock**)arrayget(lv->cfg, i); + livenessprintblock(lv, bb); + } +} + +static void +checkauto(Node *fn, Prog *p, Node *n, char *where) +{ + NodeList *ll; + int found; + char *fnname; + char *nname; + + found = 0; + for(ll = fn->dcl; ll != nil; ll = ll->next) { + if(ll->n->op == ONAME && ll->n->class == PAUTO) { + if(n == ll->n) { + found = 1; + break; + } + } + } + if(found) + return; + fnname = fn->nname->sym->name ? fn->nname->sym->name : "<unknown>"; + nname = n->sym->name ? n->sym->name : "<unknown>"; + print("D_AUTO '%s' not found: name is '%s' function is '%s' class is %d\n", where, nname, fnname, n->class); + print("Here '%P'\nlooking for node %p\n", p, n); + for(ll = fn->dcl; ll != nil; ll = ll->next) + print("node=%lx, node->class=%d\n", (uintptr)ll->n, ll->n->class); + yyerror("checkauto: invariant lost"); +} + +static void +checkparam(Node *fn, Prog *p, Node *n, char *where) +{ + NodeList *ll; + int found; + char *fnname; + char *nname; + + if(isfunny(n)) + return; + found = 0; + for(ll = fn->dcl; ll != nil; ll = ll->next) { + if(ll->n->op == ONAME && ((ll->n->class & ~PHEAP) == PPARAM || + (ll->n->class & ~PHEAP) == PPARAMOUT)) { + if(n == ll->n) { + found = 1; + break; + } + } + } + if(found) + return; + if(n->sym) { + fnname = fn->nname->sym->name ? fn->nname->sym->name : "<unknown>"; + nname = n->sym->name ? n->sym->name : "<unknown>"; + print("D_PARAM '%s' not found: name='%s' function='%s' class is %d\n", where, nname, fnname, n->class); + print("Here '%P'\nlooking for node %p\n", p, n); + for(ll = fn->dcl; ll != nil; ll = ll->next) + print("node=%p, node->class=%d\n", ll->n, ll->n->class); + } + yyerror("checkparam: invariant lost"); +} + +static void +checkprog(Node *fn, Prog *p) +{ + if(p->from.type == D_AUTO) + checkauto(fn, p, p->from.node, "from"); + if(p->from.type == D_PARAM) + checkparam(fn, p, p->from.node, "from"); + if(p->to.type == D_AUTO) + checkauto(fn, p, p->to.node, "to"); + if(p->to.type == D_PARAM) + checkparam(fn, p, p->to.node, "to"); +} + +// Check instruction invariants. We assume that the nodes corresponding to the +// sources and destinations of memory operations will be declared in the +// function. This is not strictly true, as is the case for the so-called funny +// nodes and there are special cases to skip over that stuff. The analysis will +// fail if this invariant blindly changes. +static void +checkptxt(Node *fn, Prog *firstp) +{ + Prog *p; + + for(p = firstp; p != P; p = p->link) { + if(0) + print("analyzing '%P'\n", p); + switch(p->as) { + case ADATA: + case AGLOBL: + case ANAME: + case ASIGNAME: + case ATYPE: + continue; + } + checkprog(fn, p); + } +} + +static void +twobitwalktype1(Type *t, vlong *xoffset, Bvec *bv) +{ + vlong fieldoffset; + vlong i; + vlong o; + Type *t1; + + if(t->align > 0 && (*xoffset % t->align) != 0) + fatal("twobitwalktype1: invalid initial alignment, %T", t); + + switch(t->etype) { + case TINT8: + case TUINT8: + case TINT16: + case TUINT16: + case TINT32: + case TUINT32: + case TINT64: + case TUINT64: + case TINT: + case TUINT: + case TUINTPTR: + case TBOOL: + case TFLOAT32: + case TFLOAT64: + case TCOMPLEX64: + case TCOMPLEX128: + *xoffset += t->width; + break; + + case TPTR32: + case TPTR64: + case TUNSAFEPTR: + case TFUNC: + case TCHAN: + case TMAP: + if(*xoffset % widthptr != 0) + fatal("twobitwalktype1: invalid alignment, %T", t); + bvset(bv, (*xoffset / widthptr) * BitsPerPointer); + *xoffset += t->width; + break; + + case TSTRING: + // struct { byte *str; intgo len; } + if(*xoffset % widthptr != 0) + fatal("twobitwalktype1: invalid alignment, %T", t); + bvset(bv, (*xoffset / widthptr) * BitsPerPointer); + *xoffset += t->width; + break; + + case TINTER: + // struct { Itab *tab; union { void *ptr, uintptr val } data; } + // or, when isnilinter(t)==true: + // struct { Type *type; union { void *ptr, uintptr val } data; } + if(*xoffset % widthptr != 0) + fatal("twobitwalktype1: invalid alignment, %T", t); + bvset(bv, ((*xoffset / widthptr) * BitsPerPointer) + 1); + if(isnilinter(t)) + bvset(bv, ((*xoffset / widthptr) * BitsPerPointer)); + *xoffset += t->width; + break; + + case TARRAY: + // The value of t->bound is -1 for slices types and >0 for + // for fixed array types. All other values are invalid. + if(t->bound < -1) + fatal("twobitwalktype1: invalid bound, %T", t); + if(isslice(t)) { + // struct { byte *array; uintgo len; uintgo cap; } + if(*xoffset % widthptr != 0) + fatal("twobitwalktype1: invalid TARRAY alignment, %T", t); + bvset(bv, (*xoffset / widthptr) * BitsPerPointer); + *xoffset += t->width; + } else if(!haspointers(t->type)) + *xoffset += t->width; + else + for(i = 0; i < t->bound; i++) + twobitwalktype1(t->type, xoffset, bv); + break; + + case TSTRUCT: + o = 0; + for(t1 = t->type; t1 != T; t1 = t1->down) { + fieldoffset = t1->width; + *xoffset += fieldoffset - o; + twobitwalktype1(t1->type, xoffset, bv); + o = fieldoffset + t1->type->width; + } + *xoffset += t->width - o; + break; + + default: + fatal("twobitwalktype1: unexpected type, %T", t); + } +} + +// Returns the number of words of local variables. +static int32 +localswords(void) +{ + return stkptrsize / widthptr; +} + +// Returns the number of words of in and out arguments. +static int32 +argswords(void) +{ + return curfn->type->argwid / widthptr; +} + +// Generates live pointer value maps for arguments and local variables. The +// this argument and the in arguments are always assumed live. The vars +// argument is an array of Node*s. +static void +twobitlivepointermap(Liveness *lv, Bvec *liveout, Array *vars, Bvec *args, Bvec *locals) +{ + Node *node; + Type *thisargtype; + Type *inargtype; + vlong xoffset; + int32 i; + + for(i = 0; i < arraylength(vars); i++) { + node = *(Node**)arrayget(vars, i); + switch(node->class) { + case PAUTO: + if(bvget(liveout, i) && haspointers(node->type)) { + xoffset = node->xoffset + stkptrsize; + twobitwalktype1(node->type, &xoffset, locals); + } + break; + case PPARAM: + case PPARAMOUT: + if(bvget(liveout, i) && haspointers(node->type)) { + xoffset = node->xoffset; + twobitwalktype1(node->type, &xoffset, args); + } + break; + } + } + // In various and obscure circumstances, such as methods with an unused + // receiver, the this argument and in arguments are omitted from the + // node list. We must explicitly preserve these values to ensure that + // the addresses printed in backtraces are valid. + thisargtype = getthisx(lv->fn->type); + if(thisargtype != nil) { + xoffset = 0; + twobitwalktype1(thisargtype, &xoffset, args); + } + inargtype = getinargx(lv->fn->type); + if(inargtype != nil) { + xoffset = 0; + twobitwalktype1(inargtype, &xoffset, args); + } +} + + +// Generates dead value maps for arguments and local variables. Dead values of +// any type are tracked, not just pointers. The this argument and the in +// arguments are never assumed dead. The vars argument is an array of Node*s. +static void +twobitdeadvaluemap(Liveness *lv, Bvec *liveout, Array *vars, Bvec *args, Bvec *locals) +{ + Node *node; + /* + Type *thisargtype; + Type *inargtype; + */ + vlong xoffset; + int32 i; + + for(i = 0; i < arraylength(vars); i++) { + node = *(Node**)arrayget(vars, i); + switch(node->class) { + case PAUTO: + if(!bvget(liveout, i)) { + xoffset = node->xoffset + stkptrsize; + twobitwalktype1(node->type, &xoffset, locals); + } + break; + case PPARAM: + case PPARAMOUT: + if(!bvget(liveout, i)) { + xoffset = node->xoffset; + twobitwalktype1(node->type, &xoffset, args); + } + break; + } + } + USED(lv); + /* + thisargtype = getinargx(lv->fn->type); + if(thisargtype != nil) { + xoffset = 0; + twobitwalktype1(thisargtype, &xoffset, args); + } + inargtype = getinargx(lv->fn->type); + if(inargtype != nil) { + xoffset = 0; + twobitwalktype1(inargtype, &xoffset, args); + } + */ +} + +// Construct a disembodied instruction. +static Prog* +unlinkedprog(int as) +{ + Prog *p; + + p = mal(sizeof(*p)); + clearp(p); + p->as = as; + return p; +} + +// Construct a new PCDATA instruction associated with and for the purposes of +// covering an existing instruction. +static Prog* +newpcdataprog(Prog *prog, int32 index) +{ + Node from; + Node to; + Prog *pcdata; + + nodconst(&from, types[TINT32], PCDATA_StackMapIndex); + nodconst(&to, types[TINT32], index); + pcdata = unlinkedprog(APCDATA); + pcdata->lineno = prog->lineno; + naddr(&from, &pcdata->from, 0); + naddr(&to, &pcdata->to, 0); + return pcdata; +} + +// Returns true for instructions that are safe points that must be annotated +// with liveness information. +static int +issafepoint(Prog *prog) +{ + return prog->as == ATEXT || prog->as == ACALL; +} + +// Initializes the sets for solving the live variables. Visits all the +// instructions in each basic block to summarizes the information at each basic +// block +static void +livenessprologue(Liveness *lv) +{ + BasicBlock *bb; + Bvec *uevar; + Bvec *varkill; + Prog *prog; + int32 i; + int32 nvars; + + nvars = arraylength(lv->vars); + uevar = bvalloc(nvars); + varkill = bvalloc(nvars); + for(i = 0; i < arraylength(lv->cfg); i++) { + bb = *(BasicBlock**)arrayget(lv->cfg, i); + // Walk the block instructions backward and update the block + // effects with the each prog effects. + for(prog = bb->last; prog != nil; prog = prog->opt) { + progeffects(prog, lv->vars, uevar, varkill); + if(0) printeffects(prog, uevar, varkill); + bvor(lv->varkill[i], lv->varkill[i], varkill); + bvandnot(lv->uevar[i], lv->uevar[i], varkill); + bvor(lv->uevar[i], lv->uevar[i], uevar); + } + } + free(uevar); + free(varkill); +} + +// Solve the liveness dataflow equations. +static void +livenesssolve(Liveness *lv) +{ + BasicBlock *bb; + BasicBlock *succ; + Bvec *newlivein; + Bvec *newliveout; + int32 rpo; + int32 i; + int32 j; + int change; + + // These temporary bitvectors exist to avoid successive allocations and + // frees within the loop. + newlivein = bvalloc(arraylength(lv->vars)); + newliveout = bvalloc(arraylength(lv->vars)); + + // Iterate through the blocks in reverse round-robin fashion. A work + // queue might be slightly faster. As is, the number of iterations is + // so low that it hardly seems to be worth the complexity. + change = 1; + while(change != 0) { + change = 0; + // Walk blocks in the general direction of propagation. This + // improves convergence. + for(i = arraylength(lv->cfg) - 1; i >= 0; i--) { + // out[b] = \bigcup_{s \in succ[b]} in[s] + bb = *(BasicBlock**)arrayget(lv->cfg, i); + rpo = bb->rpo; + bvresetall(newliveout); + for(j = 0; j < arraylength(bb->succ); j++) { + succ = *(BasicBlock**)arrayget(bb->succ, j); + bvor(newliveout, newliveout, lv->livein[succ->rpo]); + } + if(bvcmp(lv->liveout[rpo], newliveout)) { + change = 1; + bvcopy(lv->liveout[rpo], newliveout); + } + // in[b] = uevar[b] \cup (out[b] \setminus varkill[b]) + bvandnot(newlivein, lv->liveout[rpo], lv->varkill[rpo]); + bvor(lv->livein[rpo], newlivein, lv->uevar[rpo]); + } + } + + free(newlivein); + free(newliveout); +} + +// Visits all instructions in a basic block and computes a bit vector of live +// variables at each safe point locations. +static void +livenessepilogue(Liveness *lv) +{ + BasicBlock *bb; + Bvec *livein; + Bvec *liveout; + Bvec *uevar; + Bvec *varkill; + Bvec *args; + Bvec *locals; + Prog *p; + int32 i; + int32 nvars; + int32 pos; + + nvars = arraylength(lv->vars); + livein = bvalloc(nvars); + liveout = bvalloc(nvars); + uevar = bvalloc(nvars); + varkill = bvalloc(nvars); + + for(i = 0; i < arraylength(lv->cfg); i++) { + bb = *(BasicBlock**)arrayget(lv->cfg, i); + bvcopy(livein, lv->liveout[bb->rpo]); + // Walk forward through the basic block instructions and + // allocate and empty map for those instructions that need them + for(p = bb->last; p != nil; p = p->opt) { + if(!issafepoint(p)) + continue; + + // Allocate a bit vector for each class and facet of + // value we are tracking. + + // Live stuff first. + args = bvalloc(argswords() * BitsPerPointer); + arrayadd(lv->argslivepointers, &args); + locals = bvalloc(localswords() * BitsPerPointer); + arrayadd(lv->livepointers, &locals); + + // Dead stuff second. + args = bvalloc(argswords() * BitsPerPointer); + arrayadd(lv->argsdeadvalues, &args); + locals = bvalloc(localswords() * BitsPerPointer); + arrayadd(lv->deadvalues, &locals); + } + + // walk backward, emit pcdata and populate the maps + pos = arraylength(lv->livepointers) - 1; + if(pos < 0) { + // the first block we encounter should have the ATEXT so + // at no point should pos ever be less than zero. + fatal("livenessepilogue"); + } + + for(p = bb->last; p != nil; p = p->opt) { + // Propagate liveness information + progeffects(p, lv->vars, uevar, varkill); + bvcopy(liveout, livein); + bvandnot(livein, liveout, varkill); + bvor(livein, livein, uevar); + if(printnoise){ + print("%P\n", p); + printvars("uevar", uevar, lv->vars); + printvars("varkill", varkill, lv->vars); + printvars("livein", livein, lv->vars); + printvars("liveout", liveout, lv->vars); + } + if(issafepoint(p)) { + // Found an interesting instruction, record the + // corresponding liveness information. Only + // CALL instructions need a PCDATA annotation. + // The TEXT instruction annotation is implicit. + if(p->as == ACALL) { + if(isdeferreturn(p)) { + // Because this runtime call + // modifies its return address + // to return back to itself, + // emitting a PCDATA before the + // call instruction will result + // in an off by one error during + // a stack walk. Fortunately, + // the compiler inserts a no-op + // instruction before this call + // so we can reliably anchor the + // PCDATA to that instruction. + splicebefore(lv, bb, newpcdataprog(p->opt, pos), p->opt); + } else { + splicebefore(lv, bb, newpcdataprog(p, pos), p); + } + } + + // Record live pointers. + args = *(Bvec**)arrayget(lv->argslivepointers, pos); + locals = *(Bvec**)arrayget(lv->livepointers, pos); + twobitlivepointermap(lv, liveout, lv->vars, args, locals); + + // Record dead values. + args = *(Bvec**)arrayget(lv->argsdeadvalues, pos); + locals = *(Bvec**)arrayget(lv->deadvalues, pos); + twobitdeadvaluemap(lv, liveout, lv->vars, args, locals); + + pos--; + } + } + } + + free(livein); + free(liveout); + free(uevar); + free(varkill); +} + +// Dumps an array of bitmaps to a symbol as a sequence of uint32 values. The +// first word dumped is the total number of bitmaps. The second word is the +// length of the bitmaps. All bitmaps are assumed to be of equal length. The +// words that are followed are the raw bitmap words. The arr argument is an +// array of Node*s. +static void +twobitwritesymbol(Array *arr, Sym *sym, Bvec *check) +{ + Bvec *bv; + int off; + uint32 bit; + uint32 word; + uint32 checkword; + int32 i; + int32 j; + int32 len; + int32 pos; + + len = arraylength(arr); + // Dump the length of the bitmap array. + off = duint32(sym, 0, len); + for(i = 0; i < len; i++) { + bv = *(Bvec**)arrayget(arr, i); + // If we have been provided a check bitmap we can use it + // to confirm that the bitmap we are dumping is a subset + // of the check bitmap. + if(check != nil) { + for(j = 0; j < bv->n; j += 32) { + word = bv->b[j/32]; + checkword = check->b[j/32]; + if(word != checkword) { + // Found a mismatched word, find + // the mismatched bit. + for(pos = 0; pos < 32; pos++) { + bit = 1 << pos; + if((word & bit) && !(checkword & bit)) { + print("twobitwritesymbol: expected %032b to be a subset of %032b\n", word, checkword); + fatal("mismatch at bit position %d\n", pos); + } + } + } + } + } + // Dump the length of the bitmap. + off = duint32(sym, off, bv->n); + // Dump the words of the bitmap. + for(j = 0; j < bv->n; j += 32) { + word = bv->b[j/32]; + off = duint32(sym, off, word); + } + } + ggloblsym(sym, off, 0, 1); +} + +static void +printprog(Prog *p) +{ + while(p != nil) { + print("%P\n", p); + p = p->link; + } +} + +// Entry pointer for liveness analysis. Constructs a complete CFG, solves for +// the liveness of pointer variables in the function, and emits a runtime data +// structure read by the garbage collector. +void +liveness(Node *fn, Prog *firstp, Sym *argssym, Sym *livesym, Sym *deadsym) +{ + Array *cfg; + Array *vars; + Liveness *lv; + + if(0) print("curfn->nname->sym->name is %s\n", curfn->nname->sym->name); + if(0) printprog(firstp); + checkptxt(fn, firstp); + + // Construct the global liveness state. + cfg = newcfg(firstp); + if(0) printcfg(cfg); + vars = getvariables(fn); + lv = newliveness(fn, firstp, cfg, vars); + + // Run the dataflow framework. + livenessprologue(lv); + if(0) livenessprintcfg(lv); + livenesssolve(lv); + if(0) livenessprintcfg(lv); + livenessepilogue(lv); + + // Emit the map data structures + twobitwritesymbol(lv->livepointers, livesym, nil); + twobitwritesymbol(lv->argslivepointers, argssym, nil); + if(deadsym != nil) + twobitwritesymbol(lv->deadvalues, deadsym, nil); + + // Free everything. + freeliveness(lv); + arrayfree(vars); + freecfg(cfg); +} diff --git a/src/pkg/runtime/funcdata.h b/src/pkg/runtime/funcdata.h index 166263ef9..04766b9da 100644 --- a/src/pkg/runtime/funcdata.h +++ b/src/pkg/runtime/funcdata.h @@ -8,9 +8,11 @@ // as well as the compilers. #define PCDATA_ArgSize 0 /* argument size at CALL instruction */ +#define PCDATA_StackMapIndex 1 -#define FUNCDATA_GCArgs 0 /* garbage collector blocks */ -#define FUNCDATA_GCLocals 1 +#define FUNCDATA_ArgsPointerMaps 2 /* garbage collector blocks */ +#define FUNCDATA_LocalsPointerMaps 3 +#define FUNCDATA_DeadPointerMaps 4 // To be used in assembly. #define ARGSIZE(n) PCDATA $PCDATA_ArgSize, $n diff --git a/src/pkg/runtime/mgc0.c b/src/pkg/runtime/mgc0.c index 8275af504..f0ac6dcb8 100644 --- a/src/pkg/runtime/mgc0.c +++ b/src/pkg/runtime/mgc0.c @@ -1367,6 +1367,33 @@ struct BitVector uint32 data[]; }; +typedef struct StackMap StackMap; +struct StackMap +{ + int32 n; + uint32 data[]; +}; + +static BitVector* +stackmapdata(StackMap *stackmap, int32 n) +{ + BitVector *bv; + uint32 *ptr; + uint32 words; + int32 i; + + if(n < 0 || n >= stackmap->n) { + runtime·throw("stackmapdata: index out of range"); + } + ptr = stackmap->data; + for(i = 0; i < n; i++) { + bv = (BitVector*)ptr; + words = ((bv->n + 31) / 32) + 1; + ptr += words; + } + return (BitVector*)ptr; +} + // Scans an interface data value when the interface type indicates // that it is a pointer. static void @@ -1422,101 +1449,69 @@ scanbitvector(byte *scanp, BitVector *bv, bool afterprologue, Scanbuf *sbuf) } } -static void -addstackroots(G *gp) -{ - M *mp; - int32 n; - Stktop *stk; - uintptr sp, guard; - void *base; - uintptr size; - - if(gp == g) - runtime·throw("can't scan our own stack"); - if((mp = gp->m) != nil && mp->helpgc) - runtime·throw("can't scan gchelper stack"); - if(gp->syscallstack != (uintptr)nil) { - // Scanning another goroutine that is about to enter or might - // have just exited a system call. It may be executing code such - // as schedlock and may have needed to start a new stack segment. - // Use the stack segment and stack pointer at the time of - // the system call instead, since that won't change underfoot. - sp = gp->syscallsp; - stk = (Stktop*)gp->syscallstack; - guard = gp->syscallguard; - } else { - // Scanning another goroutine's stack. - // The goroutine is usually asleep (the world is stopped). - sp = gp->sched.sp; - stk = (Stktop*)gp->stackbase; - guard = gp->stackguard; - // For function about to start, context argument is a root too. - if(gp->sched.ctxt != 0 && runtime·mlookup(gp->sched.ctxt, &base, &size, nil)) - addroot((Obj){base, size, 0}); - } - if(ScanStackByFrames) { - USED(sp); - USED(stk); - USED(guard); - addroot((Obj){(byte*)gp, PtrSize, (uintptr)gptrProg}); - } else { - n = 0; - while(stk) { - if(sp < guard-StackGuard || (uintptr)stk < sp) { - runtime·printf("scanstack inconsistent: g%D#%d sp=%p not in [%p,%p]\n", gp->goid, n, sp, guard-StackGuard, stk); - runtime·throw("scanstack"); - } - addroot((Obj){(byte*)sp, (uintptr)stk - sp, (uintptr)defaultProg | PRECISE | LOOP}); - sp = stk->gobuf.sp; - guard = stk->stackguard; - stk = (Stktop*)stk->stackbase; - n++; - } - } -} - +// Scan a stack frame: local variables and function arguments/results. static void scanframe(Stkframe *frame, void *arg) { - BitVector *args, *locals; + Func *f; Scanbuf *sbuf; + StackMap *stackmap; + BitVector *bv; uintptr size; + uintptr targetpc; + int32 pcdata; bool afterprologue; + f = frame->fn; + targetpc = frame->pc; + if(targetpc != f->entry) + targetpc--; + pcdata = runtime·pcdatavalue(f, PCDATA_StackMapIndex, targetpc); + if(pcdata == -1) { + // We do not have a valid pcdata value but there might be a + // stackmap for this function. It is likely that we are looking + // at the function prologue, assume so and hope for the best. + pcdata = 0; + } + sbuf = arg; // Scan local variables if stack frame has been allocated. // Use pointer information if known. afterprologue = (frame->varp > (byte*)frame->sp); if(afterprologue) { - locals = runtime·funcdata(frame->fn, FUNCDATA_GCLocals); - if(locals == nil) { + stackmap = runtime·funcdata(f, FUNCDATA_LocalsPointerMaps); + if(stackmap == nil) { // No locals information, scan everything. size = frame->varp - (byte*)frame->sp; *sbuf->obj.pos++ = (Obj){frame->varp - size, size, 0}; if(sbuf->obj.pos == sbuf->obj.end) flushobjbuf(sbuf); - } else if(locals->n < 0) { - // Locals size information, scan just the - // locals. - size = -locals->n; + } else if(stackmap->n < 0) { + // Locals size information, scan just the locals. + size = -stackmap->n; *sbuf->obj.pos++ = (Obj){frame->varp - size, size, 0}; if(sbuf->obj.pos == sbuf->obj.end) - flushobjbuf(sbuf); - } else if(locals->n > 0) { - // Locals bitmap information, scan just the - // pointers in locals. - size = (locals->n*PtrSize) / BitsPerPointer; - scanbitvector(frame->varp - size, locals, afterprologue, sbuf); + flushobjbuf(sbuf); } else if(stackmap->n > 0) { + // Locals bitmap information, scan just the pointers in + // locals. + if(pcdata < 0 || pcdata >= stackmap->n) { + // don't know where we are + runtime·printf("pcdata is %d and %d stack map entries\n", pcdata, stackmap->n); + runtime·throw("addframeroots: bad symbol table"); + } + bv = stackmapdata(stackmap, pcdata); + size = (bv->n * PtrSize) / BitsPerPointer; + scanbitvector(frame->varp - size, bv, afterprologue, sbuf); } } // Scan arguments. // Use pointer information if known. - args = runtime·funcdata(frame->fn, FUNCDATA_GCArgs); - if(args != nil && args->n > 0) - scanbitvector(frame->argp, args, false, sbuf); - else { + stackmap = runtime·funcdata(f, FUNCDATA_ArgsPointerMaps); + if(stackmap != nil) { + bv = stackmapdata(stackmap, pcdata); + scanbitvector(frame->argp, bv, false, sbuf); + } else { *sbuf->obj.pos++ = (Obj){frame->argp, frame->arglen, 0}; if(sbuf->obj.pos == sbuf->obj.end) flushobjbuf(sbuf); @@ -1550,6 +1545,60 @@ scanstack(G* gp, void *scanbuf) } static void +addstackroots(G *gp) +{ + M *mp; + int32 n; + Stktop *stk; + uintptr sp, guard; + void *base; + uintptr size; + + if(gp == g) + runtime·throw("can't scan our own stack"); + if((mp = gp->m) != nil && mp->helpgc) + runtime·throw("can't scan gchelper stack"); + if(gp->syscallstack != (uintptr)nil) { + // Scanning another goroutine that is about to enter or might + // have just exited a system call. It may be executing code such + // as schedlock and may have needed to start a new stack segment. + // Use the stack segment and stack pointer at the time of + // the system call instead, since that won't change underfoot. + sp = gp->syscallsp; + stk = (Stktop*)gp->syscallstack; + guard = gp->syscallguard; + } else { + // Scanning another goroutine's stack. + // The goroutine is usually asleep (the world is stopped). + sp = gp->sched.sp; + stk = (Stktop*)gp->stackbase; + guard = gp->stackguard; + // For function about to start, context argument is a root too. + if(gp->sched.ctxt != 0 && runtime·mlookup(gp->sched.ctxt, &base, &size, nil)) + addroot((Obj){base, size, 0}); + } + if(ScanStackByFrames) { + USED(sp); + USED(stk); + USED(guard); + addroot((Obj){(byte*)gp, PtrSize, (uintptr)gptrProg}); + } else { + n = 0; + while(stk) { + if(sp < guard-StackGuard || (uintptr)stk < sp) { + runtime·printf("scanstack inconsistent: g%D#%d sp=%p not in [%p,%p]\n", gp->goid, n, sp, guard-StackGuard, stk); + runtime·throw("scanstack"); + } + addroot((Obj){(byte*)sp, (uintptr)stk - sp, (uintptr)defaultProg | PRECISE | LOOP}); + sp = stk->gobuf.sp; + guard = stk->stackguard; + stk = (Stktop*)stk->stackbase; + n++; + } + } +} + +static void addfinroots(void *v) { uintptr size; |