summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAustin Clements <austin@google.com>2014-11-13 13:51:44 -0500
committerAustin Clements <austin@google.com>2014-11-13 13:51:44 -0500
commit9cabb766eb4acbe2c11ad0084659710919f40c0d (patch)
tree55332117bae9812bea83b9fae47908eaad80d1c4
parentf2627f236f22bc9f623fa6c6d63c10aa73ea533f (diff)
downloadgo-9cabb766eb4acbe2c11ad0084659710919f40c0d.tar.gz
[dev.power64] 9g: implement regopt
This adds registerization support to 9g equivalent to what the other compilers have. LGTM=rsc R=rsc, dave CC=golang-codereviews https://codereview.appspot.com/174980043
-rw-r--r--src/cmd/9g/opt.h49
-rw-r--r--src/cmd/9g/reg.c1212
2 files changed, 1232 insertions, 29 deletions
diff --git a/src/cmd/9g/opt.h b/src/cmd/9g/opt.h
index d3cbcb957..7f15b5a69 100644
--- a/src/cmd/9g/opt.h
+++ b/src/cmd/9g/opt.h
@@ -70,24 +70,40 @@ struct Reg
{
Flow f;
- Bits set; // variables written by this instruction.
- Bits use1; // variables read by prog->from.
- Bits use2; // variables read by prog->to.
+ Bits set; // regopt variables written by this instruction.
+ Bits use1; // regopt variables read by prog->from.
+ Bits use2; // regopt variables read by prog->to.
+ // refahead/refbehind are the regopt variables whose current
+ // value may be used in the following/preceding instructions
+ // up to a CALL (or the value is clobbered).
Bits refbehind;
Bits refahead;
+ // calahead/calbehind are similar, but for variables in
+ // instructions that are reachable after hitting at least one
+ // CALL.
Bits calbehind;
Bits calahead;
Bits regdiff;
Bits act;
- int32 regu; // register used bitmap
+ uint64 regu; // register used bitmap
};
#define R ((Reg*)0)
/*c2go extern Reg *R; */
#define NRGN 600
/*c2go enum { NRGN = 600 }; */
+
+// A Rgn represents a single regopt variable over a region of code
+// where a register could potentially be dedicated to that variable.
+// The code encompassed by a Rgn is defined by the flow graph,
+// starting at enter, flood-filling forward while varno is refahead
+// and backward while varno is refbehind, and following branches. A
+// single variable may be represented by multiple disjoint Rgns and
+// each Rgn may choose a different register for that variable.
+// Registers are allocated to regions greedily in order of descending
+// cost.
struct Rgn
{
Reg* enter;
@@ -104,7 +120,7 @@ EXTERN Rgn* rgp;
EXTERN int nregion;
EXTERN int nvar;
EXTERN int32 regbits;
-EXTERN int32 exregbits;
+EXTERN int32 exregbits; // TODO(austin) not used; remove
EXTERN Bits externs;
EXTERN Bits params;
EXTERN Bits consts;
@@ -118,10 +134,8 @@ EXTERN struct
{
int32 ncvtreg;
int32 nspill;
- int32 nreload;
int32 ndelmov;
int32 nvar;
- int32 naddr;
} ostats;
/*
@@ -133,10 +147,10 @@ void addmove(Reg*, int, int, int);
Bits mkvar(Reg*, Adr*);
void prop(Reg*, Bits, Bits);
void synch(Reg*, Bits);
-uint32 allreg(uint32, Rgn*);
+uint64 allreg(uint64, Rgn*);
void paint1(Reg*, int);
-uint32 paint2(Reg*, int);
-void paint3(Reg*, int, int32, int);
+uint64 paint2(Reg*, int, int);
+void paint3(Reg*, int, uint64, int);
void addreg(Adr*, int);
void dumpone(Flow*, int);
void dumpit(char*, Flow*, int);
@@ -160,8 +174,8 @@ typedef struct ProgInfo ProgInfo;
struct ProgInfo
{
uint32 flags; // the bits below
- uint64 reguse; // required registers used by this instruction
- uint64 regset; // required registers set by this instruction
+ uint64 reguse; // registers implicitly used by this instruction
+ uint64 regset; // registers implicitly set by this instruction
uint64 regindex; // registers used by addressing mode
};
@@ -182,20 +196,21 @@ enum
SizeF = 1<<7, // float aka float32
SizeD = 1<<8, // double aka float64
- // Left side: address taken, read, write.
+ // Left side (Prog.from): address taken, read, write.
LeftAddr = 1<<9,
LeftRead = 1<<10,
LeftWrite = 1<<11,
-
- // Register in middle; never written.
+
+ // Register in middle (Prog.reg); only ever read.
RegRead = 1<<12,
CanRegRead = 1<<13,
-
- // Right side: address taken, read, write.
+
+ // Right side (Prog.to): address taken, read, write.
RightAddr = 1<<14,
RightRead = 1<<15,
RightWrite = 1<<16,
+ // Instruction updates whichever of from/to is type D_OREG
PostInc = 1<<17,
// Instruction kinds
diff --git a/src/cmd/9g/reg.c b/src/cmd/9g/reg.c
index bbebf3fe0..b911a2399 100644
--- a/src/cmd/9g/reg.c
+++ b/src/cmd/9g/reg.c
@@ -33,14 +33,1197 @@
#include "gg.h"
#include "opt.h"
+#define NREGVAR 64 /* 32 general + 32 floating */
+#define REGBITS ((uint64)0xffffffffffffffffull)
+/*c2go enum {
+ NREGVAR = 64,
+ REGBITS = 0xffffffffffffffff,
+};
+*/
+
+static Reg* firstr;
+static int first = 1;
+
+int
+rcmp(const void *a1, const void *a2)
+{
+ Rgn *p1, *p2;
+ int c1, c2;
+
+ p1 = (Rgn*)a1;
+ p2 = (Rgn*)a2;
+ c1 = p2->cost;
+ c2 = p1->cost;
+ if(c1 -= c2)
+ return c1;
+ return p2->varno - p1->varno;
+}
+
+static void
+setaddrs(Bits bit)
+{
+ int i, n;
+ Var *v;
+ Node *node;
+
+ while(bany(&bit)) {
+ // convert each bit to a variable
+ i = bnum(bit);
+ node = var[i].node;
+ n = var[i].name;
+ biclr(&bit, i);
+
+ // disable all pieces of that variable
+ for(i=0; i<nvar; i++) {
+ v = var+i;
+ if(v->node == node && v->name == n)
+ v->addr = 2;
+ }
+ }
+}
+
+static char* regname[] = {
+ ".R0",
+ ".R1",
+ ".R2",
+ ".R3",
+ ".R4",
+ ".R5",
+ ".R6",
+ ".R7",
+ ".R8",
+ ".R9",
+ ".R10",
+ ".R11",
+ ".R12",
+ ".R13",
+ ".R14",
+ ".R15",
+ ".R16",
+ ".R17",
+ ".R18",
+ ".R19",
+ ".R20",
+ ".R21",
+ ".R22",
+ ".R23",
+ ".R24",
+ ".R25",
+ ".R26",
+ ".R27",
+ ".R28",
+ ".R29",
+ ".R30",
+ ".R31",
+ ".F0",
+ ".F1",
+ ".F2",
+ ".F3",
+ ".F4",
+ ".F5",
+ ".F6",
+ ".F7",
+ ".F8",
+ ".F9",
+ ".F10",
+ ".F11",
+ ".F12",
+ ".F13",
+ ".F14",
+ ".F15",
+ ".F16",
+ ".F17",
+ ".F18",
+ ".F19",
+ ".F20",
+ ".F21",
+ ".F22",
+ ".F23",
+ ".F24",
+ ".F25",
+ ".F26",
+ ".F27",
+ ".F28",
+ ".F29",
+ ".F30",
+ ".F31",
+};
+
+static Node* regnodes[NREGVAR];
+
+static void walkvardef(Node *n, Reg *r, int active);
+
void
-regopt(Prog *p)
+regopt(Prog *firstp)
{
- USED(p);
- // TODO(minux)
+ Reg *r, *r1;
+ Prog *p;
+ Graph *g;
+ ProgInfo info;
+ int i, z, active;
+ uint64 vreg, usedreg;
+ Bits bit;
+
+ if(first) {
+ fmtinstall('Q', Qconv);
+ first = 0;
+ }
+
+ mergetemp(firstp);
+
+ /*
+ * control flow is more complicated in generated go code
+ * than in generated c code. define pseudo-variables for
+ * registers, so we have complete register usage information.
+ */
+ nvar = NREGVAR;
+ memset(var, 0, NREGVAR*sizeof var[0]);
+ for(i=0; i<NREGVAR; i++) {
+ if(regnodes[i] == N)
+ regnodes[i] = newname(lookup(regname[i]));
+ var[i].node = regnodes[i];
+ }
+
+ // Exclude registers with fixed functions
+ regbits = (1<<D_R0)|RtoB(REGSP)|RtoB(REGG);
+ // Also exclude floating point registers with fixed constants
+ regbits |= FtoB(D_F0+27)|FtoB(D_F0+28)|FtoB(D_F0+29)|FtoB(D_F0+30)|FtoB(D_F0+31);
+ externs = zbits;
+ params = zbits;
+ consts = zbits;
+ addrs = zbits;
+ ivar = zbits;
+ ovar = zbits;
+
+ /*
+ * pass 1
+ * build aux data structure
+ * allocate pcs
+ * find use and set of variables
+ */
+ g = flowstart(firstp, sizeof(Reg));
+ if(g == nil) {
+ for(i=0; i<nvar; i++)
+ var[i].node->opt = nil;
+ return;
+ }
+
+ firstr = (Reg*)g->start;
+
+ for(r = firstr; r != R; r = (Reg*)r->f.link) {
+ p = r->f.prog;
+ if(p->as == AVARDEF || p->as == AVARKILL)
+ continue;
+ proginfo(&info, p);
+
+ // Avoid making variables for direct-called functions.
+ if(p->as == ABL && p->to.name == D_EXTERN)
+ continue;
+
+ // from vs to doesn't matter for registers
+ r->use1.b[0] |= info.reguse | info.regindex;
+ r->set.b[0] |= info.regset;
+
+ // Compute used register for from
+ bit = mkvar(r, &p->from);
+ if(info.flags & LeftAddr)
+ setaddrs(bit);
+ if(info.flags & LeftRead)
+ for(z=0; z<BITS; z++)
+ r->use1.b[z] |= bit.b[z];
+
+ // Compute used register for reg
+ if(info.flags & RegRead) {
+ if(p->from.type != D_FREG)
+ r->use1.b[0] |= RtoB(p->reg);
+ else
+ r->use1.b[0] |= FtoB(D_F0+p->reg);
+ }
+
+ // Currently we never generate three register forms.
+ // If we do, this will need to change.
+ if(p->from3.type != D_NONE)
+ fatal("regopt not implemented for from3");
+
+ // Compute used register for to
+ bit = mkvar(r, &p->to);
+ if(info.flags & RightAddr)
+ setaddrs(bit);
+ if(info.flags & RightRead)
+ for(z=0; z<BITS; z++)
+ r->use2.b[z] |= bit.b[z];
+ if(info.flags & RightWrite)
+ for(z=0; z<BITS; z++)
+ r->set.b[z] |= bit.b[z];
+ }
+
+ for(i=0; i<nvar; i++) {
+ Var *v = var+i;
+ if(v->addr) {
+ bit = blsh(i);
+ for(z=0; z<BITS; z++)
+ addrs.b[z] |= bit.b[z];
+ }
+
+ if(debug['R'] && debug['v'])
+ print("bit=%2d addr=%d et=%-6E w=%-2d s=%N + %lld\n",
+ i, v->addr, v->etype, v->width, v->node, v->offset);
+ }
+
+ if(debug['R'] && debug['v'])
+ dumpit("pass1", &firstr->f, 1);
+
+ /*
+ * pass 2
+ * find looping structure
+ */
+ flowrpo(g);
+
+ if(debug['R'] && debug['v'])
+ dumpit("pass2", &firstr->f, 1);
+
+ /*
+ * pass 2.5
+ * iterate propagating fat vardef covering forward
+ * r->act records vars with a VARDEF since the last CALL.
+ * (r->act will be reused in pass 5 for something else,
+ * but we'll be done with it by then.)
+ */
+ active = 0;
+ for(r = firstr; r != R; r = (Reg*)r->f.link) {
+ r->f.active = 0;
+ r->act = zbits;
+ }
+ for(r = firstr; r != R; r = (Reg*)r->f.link) {
+ p = r->f.prog;
+ if(p->as == AVARDEF && isfat(p->to.node->type) && p->to.node->opt != nil) {
+ active++;
+ walkvardef(p->to.node, r, active);
+ }
+ }
+
+ /*
+ * pass 3
+ * iterate propagating usage
+ * back until flow graph is complete
+ */
+loop1:
+ change = 0;
+ for(r = firstr; r != R; r = (Reg*)r->f.link)
+ r->f.active = 0;
+ for(r = firstr; r != R; r = (Reg*)r->f.link)
+ if(r->f.prog->as == ARET)
+ prop(r, zbits, zbits);
+loop11:
+ /* pick up unreachable code */
+ i = 0;
+ for(r = firstr; r != R; r = r1) {
+ r1 = (Reg*)r->f.link;
+ if(r1 && r1->f.active && !r->f.active) {
+ prop(r, zbits, zbits);
+ i = 1;
+ }
+ }
+ if(i)
+ goto loop11;
+ if(change)
+ goto loop1;
+
+ if(debug['R'] && debug['v'])
+ dumpit("pass3", &firstr->f, 1);
+
+ /*
+ * pass 4
+ * iterate propagating register/variable synchrony
+ * forward until graph is complete
+ */
+loop2:
+ change = 0;
+ for(r = firstr; r != R; r = (Reg*)r->f.link)
+ r->f.active = 0;
+ synch(firstr, zbits);
+ if(change)
+ goto loop2;
+
+ if(debug['R'] && debug['v'])
+ dumpit("pass4", &firstr->f, 1);
+
+ /*
+ * pass 4.5
+ * move register pseudo-variables into regu.
+ */
+ for(r = firstr; r != R; r = (Reg*)r->f.link) {
+ r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
+
+ r->set.b[0] &= ~REGBITS;
+ r->use1.b[0] &= ~REGBITS;
+ r->use2.b[0] &= ~REGBITS;
+ r->refbehind.b[0] &= ~REGBITS;
+ r->refahead.b[0] &= ~REGBITS;
+ r->calbehind.b[0] &= ~REGBITS;
+ r->calahead.b[0] &= ~REGBITS;
+ r->regdiff.b[0] &= ~REGBITS;
+ r->act.b[0] &= ~REGBITS;
+ }
+
+ if(debug['R'] && debug['v'])
+ dumpit("pass4.5", &firstr->f, 1);
+
+ /*
+ * pass 5
+ * isolate regions
+ * calculate costs (paint1)
+ */
+ r = firstr;
+ if(r) {
+ for(z=0; z<BITS; z++)
+ bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
+ ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
+ if(bany(&bit) && !r->f.refset) {
+ // should never happen - all variables are preset
+ if(debug['w'])
+ print("%L: used and not set: %Q\n", r->f.prog->lineno, bit);
+ r->f.refset = 1;
+ }
+ }
+ for(r = firstr; r != R; r = (Reg*)r->f.link)
+ r->act = zbits;
+ rgp = region;
+ nregion = 0;
+ for(r = firstr; r != R; r = (Reg*)r->f.link) {
+ for(z=0; z<BITS; z++)
+ bit.b[z] = r->set.b[z] &
+ ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
+ if(bany(&bit) && !r->f.refset) {
+ if(debug['w'])
+ print("%L: set and not used: %Q\n", r->f.prog->lineno, bit);
+ r->f.refset = 1;
+ excise(&r->f);
+ }
+ for(z=0; z<BITS; z++)
+ bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
+ while(bany(&bit)) {
+ i = bnum(bit);
+ rgp->enter = r;
+ rgp->varno = i;
+ change = 0;
+ paint1(r, i);
+ biclr(&bit, i);
+ if(change <= 0)
+ continue;
+ rgp->cost = change;
+ nregion++;
+ if(nregion >= NRGN) {
+ if(debug['R'] && debug['v'])
+ print("too many regions\n");
+ goto brk;
+ }
+ rgp++;
+ }
+ }
+brk:
+ qsort(region, nregion, sizeof(region[0]), rcmp);
+
+ if(debug['R'] && debug['v'])
+ dumpit("pass5", &firstr->f, 1);
+
+ /*
+ * pass 6
+ * determine used registers (paint2)
+ * replace code (paint3)
+ */
+ rgp = region;
+ if(debug['R'] && debug['v'])
+ print("\nregisterizing\n");
+ for(i=0; i<nregion; i++) {
+ if(debug['R'] && debug['v'])
+ print("region %d: cost %d varno %d enter %d\n", i, rgp->cost, rgp->varno, rgp->enter->f.prog->pc);
+ bit = blsh(rgp->varno);
+ usedreg = paint2(rgp->enter, rgp->varno, 0);
+ vreg = allreg(usedreg, rgp);
+ if(rgp->regno != 0) {
+ if(debug['R'] && debug['v']) {
+ Var *v;
+
+ v = var + rgp->varno;
+ print("registerize %N+%lld (bit=%2d et=%2E) in %R usedreg=%llx vreg=%llx\n",
+ v->node, v->offset, rgp->varno, v->etype, rgp->regno, usedreg, vreg);
+ }
+ paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
+ }
+ rgp++;
+ }
+
+ /*
+ * free aux structures. peep allocates new ones.
+ */
+ for(i=0; i<nvar; i++)
+ var[i].node->opt = nil;
+ flowend(g);
+ firstr = R;
+
+ if(debug['R'] && debug['v']) {
+ // Rebuild flow graph, since we inserted instructions
+ g = flowstart(firstp, sizeof(Reg));
+ firstr = (Reg*)g->start;
+ dumpit("pass6", &firstr->f, 1);
+ flowend(g);
+ firstr = R;
+ }
+
+ /*
+ * pass 7
+ * peep-hole on basic block
+ */
+ if(!debug['R'] || debug['P'])
+ peep(firstp);
+
+ /*
+ * eliminate nops
+ */
+ for(p=firstp; p!=P; p=p->link) {
+ while(p->link != P && p->link->as == ANOP)
+ p->link = p->link->link;
+ if(p->to.type == D_BRANCH)
+ while(p->to.u.branch != P && p->to.u.branch->as == ANOP)
+ p->to.u.branch = p->to.u.branch->link;
+ }
+
+ if(debug['R']) {
+ if(ostats.ncvtreg ||
+ ostats.nspill ||
+ ostats.ndelmov ||
+ ostats.nvar ||
+ 0)
+ print("\nstats\n");
+
+ if(ostats.ncvtreg)
+ print(" %4d cvtreg\n", ostats.ncvtreg);
+ if(ostats.nspill)
+ print(" %4d spill\n", ostats.nspill);
+ if(ostats.ndelmov)
+ print(" %4d delmov\n", ostats.ndelmov);
+ if(ostats.nvar)
+ print(" %4d var\n", ostats.nvar);
+
+ memset(&ostats, 0, sizeof(ostats));
+ }
+
return;
}
+static void
+walkvardef(Node *n, Reg *r, int active)
+{
+ Reg *r1, *r2;
+ int bn;
+ Var *v;
+
+ for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) {
+ if(r1->f.active == active)
+ break;
+ r1->f.active = active;
+ if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n)
+ break;
+ for(v=n->opt; v!=nil; v=v->nextinnode) {
+ bn = v - var;
+ biset(&r1->act, bn);
+ }
+ if(r1->f.prog->as == ABL)
+ break;
+ }
+
+ for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1)
+ if(r2->f.s2 != nil)
+ walkvardef(n, (Reg*)r2->f.s2, active);
+}
+
+/*
+ * add mov b,rn
+ * just after r
+ */
+void
+addmove(Reg *r, int bn, int rn, int f)
+{
+ Prog *p, *p1, *p2;
+ Adr *a;
+ Var *v;
+
+ p1 = mal(sizeof(*p1));
+ *p1 = zprog;
+ p = r->f.prog;
+
+ // If there's a stack fixup coming (ADD $n,R1 after BL newproc or BL deferproc),
+ // delay the load until after the fixup.
+ p2 = p->link;
+ if(p2 && p2->as == AADD && p2->to.reg == REGSP && p2->to.type == D_REG)
+ p = p2;
+
+ p1->link = p->link;
+ p->link = p1;
+ p1->lineno = p->lineno;
+
+ v = var + bn;
+
+ a = &p1->to;
+ a->name = v->name;
+ a->node = v->node;
+ a->sym = linksym(v->node->sym);
+ a->offset = v->offset;
+ a->etype = v->etype;
+ a->type = D_OREG;
+ if(a->etype == TARRAY || a->sym == nil)
+ a->type = D_CONST;
+
+ if(v->addr)
+ fatal("addmove: shouldn't be doing this %A\n", a);
+
+ switch(v->etype) {
+ default:
+ print("What is this %E\n", v->etype);
+
+ case TINT8:
+ p1->as = AMOVB;
+ break;
+ case TBOOL:
+ case TUINT8:
+//print("movbu %E %d %S\n", v->etype, bn, v->sym);
+ p1->as = AMOVBZ;
+ break;
+ case TINT16:
+ p1->as = AMOVH;
+ break;
+ case TUINT16:
+ p1->as = AMOVHZ;
+ break;
+ case TINT32:
+ p1->as = AMOVW;
+ break;
+ case TUINT32:
+ case TPTR32:
+ p1->as = AMOVWZ;
+ break;
+ case TINT64:
+ case TUINT64:
+ case TPTR64:
+ p1->as = AMOVD;
+ break;
+ case TFLOAT32:
+ p1->as = AFMOVS;
+ break;
+ case TFLOAT64:
+ p1->as = AFMOVD;
+ break;
+ }
+
+ p1->from.type = D_REG;
+ p1->from.reg = rn;
+ if(rn >= NREG) {
+ p1->from.type = D_FREG;
+ p1->from.reg = rn-NREG;
+ }
+ if(!f) {
+ p1->from = *a;
+ *a = zprog.from;
+ a->type = D_REG;
+ a->reg = rn;
+ if(rn >= NREG) {
+ a->type = D_FREG;
+ a->reg = rn-NREG;
+ }
+ if(v->etype == TUINT8 || v->etype == TBOOL)
+ p1->as = AMOVBZ;
+ if(v->etype == TUINT16)
+ p1->as = AMOVHZ;
+ }
+ if(debug['R'])
+ print("%P\t.a%P\n", p, p1);
+ ostats.nspill++;
+}
+
+static int
+overlap(int64 o1, int w1, int64 o2, int w2)
+{
+ int64 t1, t2;
+
+ t1 = o1+w1;
+ t2 = o2+w2;
+
+ if(!(t1 > o2 && t2 > o1))
+ return 0;
+
+ return 1;
+}
+
+Bits
+mkvar(Reg *r, Adr *a)
+{
+ USED(r);
+ Var *v;
+ int i, t, n, et, z, flag;
+ int64 w;
+ int64 o;
+ Bits bit;
+ Node *node;
+
+ // mark registers used
+ t = a->type;
+ switch(t) {
+ default:
+ print("type %d %d %D\n", t, a->name, a);
+ goto none;
+
+ case D_NONE:
+ goto none;
+
+ case D_BRANCH:
+ case D_CONST:
+ case D_FCONST:
+ case D_SCONST:
+ case D_SPR:
+ case D_OREG:
+ break;
+
+ case D_REG:
+ if(a->reg != NREG) {
+ bit = zbits;
+ bit.b[0] = RtoB(a->reg);
+ return bit;
+ }
+ break;
+
+ case D_FREG:
+ if(a->reg != NREG) {
+ bit = zbits;
+ bit.b[0] = FtoB(D_F0+a->reg);
+ return bit;
+ }
+ break;
+ }
+
+ switch(a->name) {
+ default:
+ goto none;
+
+ case D_EXTERN:
+ case D_STATIC:
+ case D_AUTO:
+ case D_PARAM:
+ n = a->name;
+ break;
+ }
+
+ node = a->node;
+ if(node == N || node->op != ONAME || node->orig == N)
+ goto none;
+ node = node->orig;
+ if(node->orig != node)
+ fatal("%D: bad node", a);
+ if(node->sym == S || node->sym->name[0] == '.')
+ goto none;
+ et = a->etype;
+ o = a->offset;
+ w = a->width;
+ if(w < 0)
+ fatal("bad width %lld for %D", w, a);
+
+ flag = 0;
+ for(i=0; i<nvar; i++) {
+ v = var+i;
+ if(v->node == node && v->name == n) {
+ if(v->offset == o)
+ if(v->etype == et)
+ if(v->width == w)
+ return blsh(i);
+
+ // if they overlap, disable both
+ if(overlap(v->offset, v->width, o, w)) {
+ v->addr = 1;
+ flag = 1;
+ }
+ }
+ }
+
+ switch(et) {
+ case 0:
+ case TFUNC:
+ goto none;
+ }
+
+ if(nvar >= NVAR) {
+ if(debug['w'] > 1 && node != N)
+ fatal("variable not optimized: %#N", node);
+
+ // If we're not tracking a word in a variable, mark the rest as
+ // having its address taken, so that we keep the whole thing
+ // live at all calls. otherwise we might optimize away part of
+ // a variable but not all of it.
+ for(i=0; i<nvar; i++) {
+ v = var+i;
+ if(v->node == node)
+ v->addr = 1;
+ }
+ goto none;
+ }
+
+ i = nvar;
+ nvar++;
+ v = var+i;
+ v->offset = o;
+ v->name = n;
+ v->etype = et;
+ v->width = w;
+ v->addr = flag; // funny punning
+ v->node = node;
+
+ // node->opt is the head of a linked list
+ // of Vars within the given Node, so that
+ // we can start at a Var and find all the other
+ // Vars in the same Go variable.
+ v->nextinnode = node->opt;
+ node->opt = v;
+
+ bit = blsh(i);
+ if(n == D_EXTERN || n == D_STATIC)
+ for(z=0; z<BITS; z++)
+ externs.b[z] |= bit.b[z];
+ if(n == D_PARAM)
+ for(z=0; z<BITS; z++)
+ params.b[z] |= bit.b[z];
+
+ if(node->class == PPARAM)
+ for(z=0; z<BITS; z++)
+ ivar.b[z] |= bit.b[z];
+ if(node->class == PPARAMOUT)
+ for(z=0; z<BITS; z++)
+ ovar.b[z] |= bit.b[z];
+
+ // Treat values with their address taken as live at calls,
+ // because the garbage collector's liveness analysis in ../gc/plive.c does.
+ // These must be consistent or else we will elide stores and the garbage
+ // collector will see uninitialized data.
+ // The typical case where our own analysis is out of sync is when the
+ // node appears to have its address taken but that code doesn't actually
+ // get generated and therefore doesn't show up as an address being
+ // taken when we analyze the instruction stream.
+ // One instance of this case is when a closure uses the same name as
+ // an outer variable for one of its own variables declared with :=.
+ // The parser flags the outer variable as possibly shared, and therefore
+ // sets addrtaken, even though it ends up not being actually shared.
+ // If we were better about _ elision, _ = &x would suffice too.
+ // The broader := in a closure problem is mentioned in a comment in
+ // closure.c:/^typecheckclosure and dcl.c:/^oldname.
+ if(node->addrtaken)
+ v->addr = 1;
+
+ // Disable registerization for globals, because:
+ // (1) we might panic at any time and we want the recovery code
+ // to see the latest values (issue 1304).
+ // (2) we don't know what pointers might point at them and we want
+ // loads via those pointers to see updated values and vice versa (issue 7995).
+ //
+ // Disable registerization for results if using defer, because the deferred func
+ // might recover and return, causing the current values to be used.
+ if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT))
+ v->addr = 1;
+
+ if(debug['R'])
+ print("bit=%2d et=%2E w=%lld+%lld %#N %D flag=%d\n", i, et, o, w, node, a, v->addr);
+ ostats.nvar++;
+
+ return bit;
+
+none:
+ return zbits;
+}
+
+void
+prop(Reg *r, Bits ref, Bits cal)
+{
+ Reg *r1, *r2;
+ int z, i, j;
+ Var *v, *v1;
+
+ for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) {
+ for(z=0; z<BITS; z++) {
+ ref.b[z] |= r1->refahead.b[z];
+ if(ref.b[z] != r1->refahead.b[z]) {
+ r1->refahead.b[z] = ref.b[z];
+ change++;
+ }
+ cal.b[z] |= r1->calahead.b[z];
+ if(cal.b[z] != r1->calahead.b[z]) {
+ r1->calahead.b[z] = cal.b[z];
+ change++;
+ }
+ }
+ switch(r1->f.prog->as) {
+ case ABL:
+ if(noreturn(r1->f.prog))
+ break;
+
+ // Mark all input variables (ivar) as used, because that's what the
+ // liveness bitmaps say. The liveness bitmaps say that so that a
+ // panic will not show stale values in the parameter dump.
+ // Mark variables with a recent VARDEF (r1->act) as used,
+ // so that the optimizer flushes initializations to memory,
+ // so that if a garbage collection happens during this CALL,
+ // the collector will see initialized memory. Again this is to
+ // match what the liveness bitmaps say.
+ for(z=0; z<BITS; z++) {
+ cal.b[z] |= ref.b[z] | externs.b[z] | ivar.b[z] | r1->act.b[z];
+ ref.b[z] = 0;
+ }
+
+ // cal.b is the current approximation of what's live across the call.
+ // Every bit in cal.b is a single stack word. For each such word,
+ // find all the other tracked stack words in the same Go variable
+ // (struct/slice/string/interface) and mark them live too.
+ // This is necessary because the liveness analysis for the garbage
+ // collector works at variable granularity, not at word granularity.
+ // It is fundamental for slice/string/interface: the garbage collector
+ // needs the whole value, not just some of the words, in order to
+ // interpret the other bits correctly. Specifically, slice needs a consistent
+ // ptr and cap, string needs a consistent ptr and len, and interface
+ // needs a consistent type word and data word.
+ for(z=0; z<BITS; z++) {
+ if(cal.b[z] == 0)
+ continue;
+ for(i=0; i<64; i++) {
+ if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0)
+ continue;
+ v = var+z*64+i;
+ if(v->node->opt == nil) // v represents fixed register, not Go variable
+ continue;
+
+ // v->node->opt is the head of a linked list of Vars
+ // corresponding to tracked words from the Go variable v->node.
+ // Walk the list and set all the bits.
+ // For a large struct this could end up being quadratic:
+ // after the first setting, the outer loop (for z, i) would see a 1 bit
+ // for all of the remaining words in the struct, and for each such
+ // word would go through and turn on all the bits again.
+ // To avoid the quadratic behavior, we only turn on the bits if
+ // v is the head of the list or if the head's bit is not yet turned on.
+ // This will set the bits at most twice, keeping the overall loop linear.
+ v1 = v->node->opt;
+ j = v1 - var;
+ if(v == v1 || !btest(&cal, j)) {
+ for(; v1 != nil; v1 = v1->nextinnode) {
+ j = v1 - var;
+ biset(&cal, j);
+ }
+ }
+ }
+ }
+ break;
+
+ case ATEXT:
+ for(z=0; z<BITS; z++) {
+ cal.b[z] = 0;
+ ref.b[z] = 0;
+ }
+ break;
+
+ case ARET:
+ for(z=0; z<BITS; z++) {
+ cal.b[z] = externs.b[z] | ovar.b[z];
+ ref.b[z] = 0;
+ }
+ break;
+ }
+ for(z=0; z<BITS; z++) {
+ ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
+ r1->use1.b[z] | r1->use2.b[z];
+ cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
+ r1->refbehind.b[z] = ref.b[z];
+ r1->calbehind.b[z] = cal.b[z];
+ }
+ if(r1->f.active)
+ break;
+ r1->f.active = 1;
+ }
+ for(; r != r1; r = (Reg*)r->f.p1)
+ for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link)
+ prop(r2, r->refbehind, r->calbehind);
+}
+
+void
+synch(Reg *r, Bits dif)
+{
+ Reg *r1;
+ int z;
+
+ for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) {
+ for(z=0; z<BITS; z++) {
+ dif.b[z] = (dif.b[z] &
+ ~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
+ r1->set.b[z] | r1->regdiff.b[z];
+ if(dif.b[z] != r1->regdiff.b[z]) {
+ r1->regdiff.b[z] = dif.b[z];
+ change++;
+ }
+ }
+ if(r1->f.active)
+ break;
+ r1->f.active = 1;
+ for(z=0; z<BITS; z++)
+ dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
+ if(r1->f.s2 != nil)
+ synch((Reg*)r1->f.s2, dif);
+ }
+}
+
+uint64
+allreg(uint64 b, Rgn *r)
+{
+ Var *v;
+ int i;
+
+ v = var + r->varno;
+ r->regno = 0;
+ switch(v->etype) {
+
+ default:
+ fatal("unknown etype %d/%E", bitno(b), v->etype);
+ break;
+
+ case TINT8:
+ case TUINT8:
+ case TINT16:
+ case TUINT16:
+ case TINT32:
+ case TUINT32:
+ case TINT64:
+ case TUINT64:
+ case TINT:
+ case TUINT:
+ case TUINTPTR:
+ case TBOOL:
+ case TPTR32:
+ case TPTR64:
+ i = BtoR(~b);
+ if(i && r->cost > 0) {
+ r->regno = i;
+ return RtoB(i);
+ }
+ break;
+
+ case TFLOAT32:
+ case TFLOAT64:
+ i = BtoF(~b);
+ if(i && r->cost > 0) {
+ r->regno = i;
+ return FtoB(i);
+ }
+ break;
+ }
+ return 0;
+}
+
+void
+paint1(Reg *r, int bn)
+{
+ Reg *r1;
+ int z;
+ uint64 bb;
+
+ z = bn/64;
+ bb = 1LL<<(bn%64);
+ if(r->act.b[z] & bb)
+ return;
+ for(;;) {
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ r1 = (Reg*)r->f.p1;
+ if(r1 == R)
+ break;
+ if(!(r1->refahead.b[z] & bb))
+ break;
+ if(r1->act.b[z] & bb)
+ break;
+ r = r1;
+ }
+
+ if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) {
+ change -= CLOAD * r->f.loop;
+ }
+ for(;;) {
+ r->act.b[z] |= bb;
+
+ if(r->f.prog->as != ANOP) { // don't give credit for NOPs
+ if(r->use1.b[z] & bb)
+ change += CREF * r->f.loop;
+ if((r->use2.b[z]|r->set.b[z]) & bb)
+ change += CREF * r->f.loop;
+ }
+
+ if(STORE(r) & r->regdiff.b[z] & bb) {
+ change -= CLOAD * r->f.loop;
+ }
+
+ if(r->refbehind.b[z] & bb)
+ for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
+ if(r1->refahead.b[z] & bb)
+ paint1(r1, bn);
+
+ if(!(r->refahead.b[z] & bb))
+ break;
+ r1 = (Reg*)r->f.s2;
+ if(r1 != R)
+ if(r1->refbehind.b[z] & bb)
+ paint1(r1, bn);
+ r = (Reg*)r->f.s1;
+ if(r == R)
+ break;
+ if(r->act.b[z] & bb)
+ break;
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ }
+}
+
+uint64
+paint2(Reg *r, int bn, int depth)
+{
+ Reg *r1;
+ int z;
+ uint64 bb, vreg;
+
+ z = bn/64;
+ bb = 1LL << (bn%64);
+ vreg = regbits;
+ if(!(r->act.b[z] & bb))
+ return vreg;
+ for(;;) {
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ r1 = (Reg*)r->f.p1;
+ if(r1 == R)
+ break;
+ if(!(r1->refahead.b[z] & bb))
+ break;
+ if(!(r1->act.b[z] & bb))
+ break;
+ r = r1;
+ }
+ for(;;) {
+ if(debug['R'] && debug['v'])
+ print(" paint2 %d %P\n", depth, r->f.prog);
+
+ r->act.b[z] &= ~bb;
+
+ vreg |= r->regu;
+
+ if(r->refbehind.b[z] & bb)
+ for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
+ if(r1->refahead.b[z] & bb)
+ vreg |= paint2(r1, bn, depth+1);
+
+ if(!(r->refahead.b[z] & bb))
+ break;
+ r1 = (Reg*)r->f.s2;
+ if(r1 != R)
+ if(r1->refbehind.b[z] & bb)
+ vreg |= paint2(r1, bn, depth+1);
+ r = (Reg*)r->f.s1;
+ if(r == R)
+ break;
+ if(!(r->act.b[z] & bb))
+ break;
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ }
+ return vreg;
+}
+
+void
+paint3(Reg *r, int bn, uint64 rb, int rn)
+{
+ Reg *r1;
+ Prog *p;
+ int z;
+ uint64 bb;
+
+ z = bn/64;
+ bb = 1LL << (bn%64);
+ if(r->act.b[z] & bb)
+ return;
+ for(;;) {
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ r1 = (Reg*)r->f.p1;
+ if(r1 == R)
+ break;
+ if(!(r1->refahead.b[z] & bb))
+ break;
+ if(r1->act.b[z] & bb)
+ break;
+ r = r1;
+ }
+
+ if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
+ addmove(r, bn, rn, 0);
+ for(;;) {
+ r->act.b[z] |= bb;
+ p = r->f.prog;
+
+ if(r->use1.b[z] & bb) {
+ if(debug['R'] && debug['v'])
+ print("%P", p);
+ addreg(&p->from, rn);
+ if(debug['R'] && debug['v'])
+ print(" ===change== %P\n", p);
+ }
+ if((r->use2.b[z]|r->set.b[z]) & bb) {
+ if(debug['R'] && debug['v'])
+ print("%P", p);
+ addreg(&p->to, rn);
+ if(debug['R'] && debug['v'])
+ print(" ===change== %P\n", p);
+ }
+
+ if(STORE(r) & r->regdiff.b[z] & bb)
+ addmove(r, bn, rn, 1);
+ r->regu |= rb;
+
+ if(r->refbehind.b[z] & bb)
+ for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link)
+ if(r1->refahead.b[z] & bb)
+ paint3(r1, bn, rb, rn);
+
+ if(!(r->refahead.b[z] & bb))
+ break;
+ r1 = (Reg*)r->f.s2;
+ if(r1 != R)
+ if(r1->refbehind.b[z] & bb)
+ paint3(r1, bn, rb, rn);
+ r = (Reg*)r->f.s1;
+ if(r == R)
+ break;
+ if(r->act.b[z] & bb)
+ break;
+ if(!(r->refbehind.b[z] & bb))
+ break;
+ }
+}
+
+void
+addreg(Adr *a, int rn)
+{
+ a->sym = nil;
+ a->node = nil;
+ a->name = D_NONE;
+ a->type = D_REG;
+ a->reg = rn;
+ if(rn >= NREG) {
+ a->type = D_FREG;
+ a->reg = rn-NREG;
+ }
+
+ ostats.ncvtreg++;
+}
+
/*
* track register variables including external registers:
* bit reg
@@ -56,7 +1239,7 @@ regopt(Prog *p)
uint64
RtoB(int r)
{
- if(r >= D_R0 && r <= D_R0+31)
+ if(r > D_R0 && r <= D_R0+31)
return 1ULL << (r - D_R0);
return 0;
}
@@ -64,7 +1247,7 @@ RtoB(int r)
int
BtoR(uint64 b)
{
- b &= 0xffffffff;
+ b &= 0xffffffffull;
if(b == 0)
return 0;
return bitno(b) + D_R0;
@@ -139,6 +1322,7 @@ void
dumpit(char *str, Flow *r0, int isreg)
{
Flow *r, *r1;
+ int s1v, s2v;
print("\n%s\n", str);
for(r = r0; r != nil; r = r->link) {
@@ -150,12 +1334,16 @@ dumpit(char *str, Flow *r0, int isreg)
print(" %.4ud", (int)r1->prog->pc);
print("\n");
}
-// r1 = r->s1;
-// if(r1 != R) {
-// print(" succ:");
-// for(; r1 != R; r1 = r1->s1)
-// print(" %.4ud", (int)r1->prog->pc);
-// print("\n");
-// }
+ // If at least one successor is "interesting", print both
+ s1v = (r->s1 != nil) && (r->s1->prog != r->prog->link);
+ s2v = (r->s2 != nil) && (r->s2->prog != r->prog->link);
+ if(s1v || s2v) {
+ print(" succ:");
+ if(r->s1 != nil)
+ print(" %.4ud", (int)r->s1->prog->pc);
+ if(r->s2 != nil)
+ print(" %.4ud", (int)r->s2->prog->pc);
+ print("\n");
+ }
}
}