From 5c78ce286c9360ebad7519958794865084ab3fbd Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Mon, 24 Nov 2014 11:42:15 -0500 Subject: [dev.cc] 9g: peephole optimizer This was based on the 9c peephole optimizer, modified to work with code generated by gc and use the proginfo infrastructure in gc. LGTM=rsc R=rsc, bradfitz, minux CC=golang-codereviews https://codereview.appspot.com/179190043 --- src/cmd/9g/peep.c | 896 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 892 insertions(+), 4 deletions(-) diff --git a/src/cmd/9g/peep.c b/src/cmd/9g/peep.c index ec314d633..1f430220e 100644 --- a/src/cmd/9g/peep.c +++ b/src/cmd/9g/peep.c @@ -33,12 +33,295 @@ #include "gg.h" #include "opt.h" +static int regzer(Addr *a); +static int subprop(Flow*); +static int copyprop(Flow*); +static int copy1(Addr*, Addr*, Flow*, int); +static int copyas(Addr*, Addr*); +static int copyau(Addr*, Addr*); +static int copysub(Addr*, Addr*, Addr*, int); +static int copysub1(Prog*, Addr*, Addr*, int); +static int copyau1(Prog *p, Addr *v); + +static uint32 gactive; + void -peep(Prog *p) +peep(Prog *firstp) { - USED(p); - // TODO(minux) - return; + Graph *g; + Flow *r, *r1; + Prog *p, *p1; + int t; + + g = flowstart(firstp, sizeof(Flow)); + if(g == nil) + return; + gactive = 0; + +loop1: + if(debug['P'] && debug['v']) + dumpit("loop1", g->start, 0); + + t = 0; + for(r=g->start; r!=nil; r=r->link) { + p = r->prog; + // TODO(austin) Handle smaller moves. arm and amd64 + // distinguish between moves that moves that *must* + // sign/zero extend and moves that don't care so they + // can eliminate moves that don't care without + // breaking moves that do care. This might let us + // simplify or remove the next peep loop, too. + if(p->as == AMOVD || p->as == AFMOVD) + if(regtyp(&p->to)) { + // Try to eliminate reg->reg moves + if(regtyp(&p->from)) + if(p->from.type == p->to.type) { + if(copyprop(r)) { + excise(r); + t++; + } else + if(subprop(r) && copyprop(r)) { + excise(r); + t++; + } + } + // Convert uses to $0 to uses of R0 and + // propagate R0 + if(regzer(&p->from)) + if(p->to.type == D_REG) { + p->from.type = D_REG; + p->from.reg = REGZERO; + if(copyprop(r)) { + excise(r); + t++; + } else + if(subprop(r) && copyprop(r)) { + excise(r); + t++; + } + } + } + } + if(t) + goto loop1; + + /* + * look for MOVB x,R; MOVB R,R (for small MOVs not handled above) + */ + for(r=g->start; r!=nil; r=r->link) { + p = r->prog; + switch(p->as) { + default: + continue; + case AMOVH: + case AMOVHZ: + case AMOVB: + case AMOVBZ: + case AMOVW: + case AMOVWZ: + if(p->to.type != D_REG) + continue; + break; + } + r1 = r->link; + if(r1 == nil) + continue; + p1 = r1->prog; + if(p1->as != p->as) + continue; + if(p1->from.type != D_REG || p1->from.reg != p->to.reg) + continue; + if(p1->to.type != D_REG || p1->to.reg != p->to.reg) + continue; + excise(r1); + } + + if(debug['D'] > 1) + goto ret; /* allow following code improvement to be suppressed */ + + /* + * look for OP x,y,R; CMP R, $0 -> OPCC x,y,R + * when OP can set condition codes correctly + */ + for(r=g->start; r!=nil; r=r->link) { + p = r->prog; + switch(p->as) { + case ACMP: + case ACMPW: /* always safe? */ + if(!regzer(&p->to)) + continue; + r1 = r->s1; + if(r1 == nil) + continue; + switch(r1->prog->as) { + default: + continue; + case ABCL: + case ABC: + /* the conditions can be complex and these are currently little used */ + continue; + case ABEQ: + case ABGE: + case ABGT: + case ABLE: + case ABLT: + case ABNE: + case ABVC: + case ABVS: + break; + } + r1 = r; + do + r1 = uniqp(r1); + while (r1 != nil && r1->prog->as == ANOP); + if(r1 == nil) + continue; + p1 = r1->prog; + if(p1->to.type != D_REG || p1->to.reg != p->from.reg) + continue; + switch(p1->as) { + case ASUB: + case AADD: + case AXOR: + case AOR: + /* irregular instructions */ + if(p1->from.type == D_CONST) + continue; + break; + } + switch(p1->as) { + default: + continue; + case AMOVW: + case AMOVD: + if(p1->from.type != D_REG) + continue; + continue; + case AANDCC: + case AANDNCC: + case AORCC: + case AORNCC: + case AXORCC: + case ASUBCC: + case ASUBECC: + case ASUBMECC: + case ASUBZECC: + case AADDCC: + case AADDCCC: + case AADDECC: + case AADDMECC: + case AADDZECC: + case ARLWMICC: + case ARLWNMCC: + /* don't deal with floating point instructions for now */ +/* + case AFABS: + case AFADD: + case AFADDS: + case AFCTIW: + case AFCTIWZ: + case AFDIV: + case AFDIVS: + case AFMADD: + case AFMADDS: + case AFMOVD: + case AFMSUB: + case AFMSUBS: + case AFMUL: + case AFMULS: + case AFNABS: + case AFNEG: + case AFNMADD: + case AFNMADDS: + case AFNMSUB: + case AFNMSUBS: + case AFRSP: + case AFSUB: + case AFSUBS: + case ACNTLZW: + case AMTFSB0: + case AMTFSB1: +*/ + case AADD: + case AADDV: + case AADDC: + case AADDCV: + case AADDME: + case AADDMEV: + case AADDE: + case AADDEV: + case AADDZE: + case AADDZEV: + case AAND: + case AANDN: + case ADIVW: + case ADIVWV: + case ADIVWU: + case ADIVWUV: + case ADIVD: + case ADIVDV: + case ADIVDU: + case ADIVDUV: + case AEQV: + case AEXTSB: + case AEXTSH: + case AEXTSW: + case AMULHW: + case AMULHWU: + case AMULLW: + case AMULLWV: + case AMULHD: + case AMULHDU: + case AMULLD: + case AMULLDV: + case ANAND: + case ANEG: + case ANEGV: + case ANOR: + case AOR: + case AORN: + case AREM: + case AREMV: + case AREMU: + case AREMUV: + case AREMD: + case AREMDV: + case AREMDU: + case AREMDUV: + case ARLWMI: + case ARLWNM: + case ASLW: + case ASRAW: + case ASRW: + case ASLD: + case ASRAD: + case ASRD: + case ASUB: + case ASUBV: + case ASUBC: + case ASUBCV: + case ASUBME: + case ASUBMEV: + case ASUBE: + case ASUBEV: + case ASUBZE: + case ASUBZEV: + case AXOR: + t = variant2as(p1->as, as2variant(p1->as) | V_CC); + break; + } + if(debug['D']) + print("cmp %P; %P -> ", p1, p); + p1->as = t; + if(debug['D']) + print("%P\n", p1); + excise(r); + continue; + } + } + +ret: + flowend(g); } void @@ -56,6 +339,22 @@ excise(Flow *r) ostats.ndelmov++; } +/* + * regzer returns 1 if a's value is 0 (a is R0 or $0) + */ +static int +regzer(Addr *a) +{ + if(a->type == D_CONST) + if(a->sym == nil && a->reg == NREG) + if(a->offset == 0) + return 1; + if(a->type == D_REG) + if(a->reg == REGZERO) + return 1; + return 0; +} + int regtyp(Adr *a) { @@ -63,11 +362,600 @@ regtyp(Adr *a) default: return 0; case D_REG: + if(a->reg == REGZERO) + return 0; case D_FREG: return 1; } } +/* + * the idea is to substitute + * one register for another + * from one MOV to another + * MOV a, R1 + * ADD b, R1 / no use of R2 + * MOV R1, R2 + * would be converted to + * MOV a, R2 + * ADD b, R2 + * MOV R2, R1 + * hopefully, then the former or latter MOV + * will be eliminated by copy propagation. + * + * r0 (the argument, not the register) is the MOV at the end of the + * above sequences. This returns 1 if it modified any instructions. + */ +static int +subprop(Flow *r0) +{ + Prog *p; + Addr *v1, *v2; + Flow *r; + int t; + ProgInfo info; + + p = r0->prog; + v1 = &p->from; + if(!regtyp(v1)) + return 0; + v2 = &p->to; + if(!regtyp(v2)) + return 0; + for(r=uniqp(r0); r!=nil; r=uniqp(r)) { + if(uniqs(r) == nil) + break; + p = r->prog; + if(p->as == AVARDEF || p->as == AVARKILL) + continue; + proginfo(&info, p); + if(info.flags & Call) + return 0; + + if((info.flags & (RightRead|RightWrite)) == RightWrite) { + if(p->to.type == v1->type) + if(p->to.reg == v1->reg) + goto gotit; + } + + if(copyau(&p->from, v2) || + copyau1(p, v2) || + copyau(&p->to, v2)) + break; + if(copysub(&p->from, v1, v2, 0) || + copysub1(p, v1, v2, 0) || + copysub(&p->to, v1, v2, 0)) + break; + } + return 0; + +gotit: + copysub(&p->to, v1, v2, 1); + if(debug['P']) { + print("gotit: %D->%D\n%P", v1, v2, r->prog); + if(p->from.type == v2->type) + print(" excise"); + print("\n"); + } + for(r=uniqs(r); r!=r0; r=uniqs(r)) { + p = r->prog; + copysub(&p->from, v1, v2, 1); + copysub1(p, v1, v2, 1); + copysub(&p->to, v1, v2, 1); + if(debug['P']) + print("%P\n", r->prog); + } + t = v1->reg; + v1->reg = v2->reg; + v2->reg = t; + if(debug['P']) + print("%P last\n", r->prog); + return 1; +} + +/* + * The idea is to remove redundant copies. + * v1->v2 F=0 + * (use v2 s/v2/v1/)* + * set v1 F=1 + * use v2 return fail (v1->v2 move must remain) + * ----------------- + * v1->v2 F=0 + * (use v2 s/v2/v1/)* + * set v1 F=1 + * set v2 return success (caller can remove v1->v2 move) + */ +static int +copyprop(Flow *r0) +{ + Prog *p; + Addr *v1, *v2; + + p = r0->prog; + v1 = &p->from; + v2 = &p->to; + if(copyas(v1, v2)) { + if(debug['P']) + print("eliminating self-move\n", r0->prog); + return 1; + } + gactive++; + if(debug['P']) + print("trying to eliminate %D->%D move from:\n%P\n", v1, v2, r0->prog); + return copy1(v1, v2, r0->s1, 0); +} + +// copy1 replaces uses of v2 with v1 starting at r and returns 1 if +// all uses were rewritten. +static int +copy1(Addr *v1, Addr *v2, Flow *r, int f) +{ + int t; + Prog *p; + + if(r->active == gactive) { + if(debug['P']) + print("act set; return 1\n"); + return 1; + } + r->active = gactive; + if(debug['P']) + print("copy1 replace %D with %D f=%d\n", v2, v1, f); + for(; r != nil; r = r->s1) { + p = r->prog; + if(debug['P']) + print("%P", p); + if(!f && uniqp(r) == nil) { + // Multiple predecessors; conservatively + // assume v1 was set on other path + f = 1; + if(debug['P']) + print("; merge; f=%d", f); + } + t = copyu(p, v2, nil); + switch(t) { + case 2: /* rar, can't split */ + if(debug['P']) + print("; %D rar; return 0\n", v2); + return 0; + + case 3: /* set */ + if(debug['P']) + print("; %D set; return 1\n", v2); + return 1; + + case 1: /* used, substitute */ + case 4: /* use and set */ + if(f) { + if(!debug['P']) + return 0; + if(t == 4) + print("; %D used+set and f=%d; return 0\n", v2, f); + else + print("; %D used and f=%d; return 0\n", v2, f); + return 0; + } + if(copyu(p, v2, v1)) { + if(debug['P']) + print("; sub fail; return 0\n"); + return 0; + } + if(debug['P']) + print("; sub %D->%D\n => %P", v2, v1, p); + if(t == 4) { + if(debug['P']) + print("; %D used+set; return 1\n", v2); + return 1; + } + break; + } + if(!f) { + t = copyu(p, v1, nil); + if(!f && (t == 2 || t == 3 || t == 4)) { + f = 1; + if(debug['P']) + print("; %D set and !f; f=%d", v1, f); + } + } + if(debug['P']) + print("\n"); + if(r->s2) + if(!copy1(v1, v2, r->s2, f)) + return 0; + } + return 1; +} + +// If s==nil, copyu returns the set/use of v in p; otherwise, it +// modifies p to replace reads of v with reads of s and returns 0 for +// success or non-zero for failure. +// +// If s==nil, copy returns one of the following values: +// 1 if v only used +// 2 if v is set and used in one address (read-alter-rewrite; +// can't substitute) +// 3 if v is only set +// 4 if v is set in one address and used in another (so addresses +// can be rewritten independently) +// 0 otherwise (not touched) +int +copyu(Prog *p, Addr *v, Addr *s) +{ + if(p->from3.type != D_NONE) + // 9g never generates a from3 + print("copyu: from3 (%D) not implemented\n", p->from3); + + switch(p->as) { + + default: + print("copyu: can't find %A\n", p->as); + return 2; + + case ANOP: /* read p->from, write p->to */ + case AMOVH: + case AMOVHZ: + case AMOVB: + case AMOVBZ: + case AMOVW: + case AMOVWZ: + case AMOVD: + + case ANEG: + case ANEGCC: + case AADDME: + case AADDMECC: + case AADDZE: + case AADDZECC: + case ASUBME: + case ASUBMECC: + case ASUBZE: + case ASUBZECC: + + case AFCTIW: + case AFCTIWZ: + case AFCTID: + case AFCTIDZ: + case AFCFID: + case AFCFIDCC: + case AFMOVS: + case AFMOVD: + case AFRSP: + case AFNEG: + case AFNEGCC: + if(s != nil) { + if(copysub(&p->from, v, s, 1)) + return 1; + // Update only indirect uses of v in p->to + if(!copyas(&p->to, v)) + if(copysub(&p->to, v, s, 1)) + return 1; + return 0; + } + if(copyas(&p->to, v)) { + // Fix up implicit from + if(p->from.type == D_NONE) + p->from = p->to; + if(copyau(&p->from, v)) + return 4; + return 3; + } + if(copyau(&p->from, v)) + return 1; + if(copyau(&p->to, v)) + // p->to only indirectly uses v + return 1; + return 0; + + case AMOVBU: /* rar p->from, write p->to or read p->from, rar p->to */ + case AMOVBZU: + case AMOVHU: + case AMOVHZU: + case AMOVWZU: + case AMOVDU: + if(p->from.type == D_OREG) { + if(copyas(&p->from, v)) + // No s!=nil check; need to fail + // anyway in that case + return 2; + if(s != nil) { + if(copysub(&p->to, v, s, 1)) + return 1; + return 0; + } + if(copyas(&p->to, v)) + return 3; + } else if (p->to.type == D_OREG) { + if(copyas(&p->to, v)) + return 2; + if(s != nil) { + if(copysub(&p->from, v, s, 1)) + return 1; + return 0; + } + if(copyau(&p->from, v)) + return 1; + } else { + print("copyu: bad %P\n", p); + } + return 0; + + case ARLWMI: /* read p->from, read p->reg, rar p->to */ + case ARLWMICC: + if(copyas(&p->to, v)) + return 2; + /* fall through */ + + case AADD: /* read p->from, read p->reg, write p->to */ + case AADDC: + case AADDE: + case ASUB: + case ASLW: + case ASRW: + case ASRAW: + case ASLD: + case ASRD: + case ASRAD: + case AOR: + case AORCC: + case AORN: + case AORNCC: + case AAND: + case AANDCC: + case AANDN: + case AANDNCC: + case ANAND: + case ANANDCC: + case ANOR: + case ANORCC: + case AXOR: + case AMULHW: + case AMULHWU: + case AMULLW: + case AMULLD: + case ADIVW: + case ADIVD: + case ADIVWU: + case ADIVDU: + case AREM: + case AREMU: + case AREMD: + case AREMDU: + case ARLWNM: + case ARLWNMCC: + + case AFADDS: + case AFADD: + case AFSUBS: + case AFSUB: + case AFMULS: + case AFMUL: + case AFDIVS: + case AFDIV: + if(s != nil) { + if(copysub(&p->from, v, s, 1)) + return 1; + if(copysub1(p, v, s, 1)) + return 1; + // Update only indirect uses of v in p->to + if(!copyas(&p->to, v)) + if(copysub(&p->to, v, s, 1)) + return 1; + return 0; + } + if(copyas(&p->to, v)) { + if(p->reg == NREG) + // Fix up implicit reg (e.g., ADD + // R3,R4 -> ADD R3,R4,R4) so we can + // update reg and to separately. + p->reg = p->to.reg; + if(copyau(&p->from, v)) + return 4; + if(copyau1(p, v)) + return 4; + return 3; + } + if(copyau(&p->from, v)) + return 1; + if(copyau1(p, v)) + return 1; + if(copyau(&p->to, v)) + return 1; + return 0; + + case ABEQ: + case ABGT: + case ABGE: + case ABLT: + case ABLE: + case ABNE: + case ABVC: + case ABVS: + return 0; + + case ACHECKNIL: /* read p->from */ + case ACMP: /* read p->from, read p->to */ + case ACMPU: + case ACMPW: + case ACMPWU: + case AFCMPO: + case AFCMPU: + if(s != nil) { + if(copysub(&p->from, v, s, 1)) + return 1; + return copysub(&p->to, v, s, 1); + } + if(copyau(&p->from, v)) + return 1; + if(copyau(&p->to, v)) + return 1; + return 0; + + case ABR: /* read p->to */ + // 9g never generates a branch to a GPR (this isn't + // even a normal instruction; liblink turns it in to a + // mov and a branch). + if(s != nil) { + if(copysub(&p->to, v, s, 1)) + return 1; + return 0; + } + if(copyau(&p->to, v)) + return 1; + return 0; + + case ARETURN: /* funny */ + if(s != nil) + return 0; + // All registers die at this point, so claim + // everything is set (and not used). + return 3; + + case ABL: /* funny */ + if(v->type == D_REG) { + if(v->reg <= REGEXT && v->reg > exregoffset) + return 2; + if(v->reg == REGARG) + return 2; + } + if(v->type == D_FREG) { + if(v->reg <= FREGEXT && v->reg > exfregoffset) + return 2; + } + if(p->from.type == D_REG && v->type == D_REG && p->from.reg == v->reg) + return 2; + + if(s != nil) { + if(copysub(&p->to, v, s, 1)) + return 1; + return 0; + } + if(copyau(&p->to, v)) + return 4; + return 3; + + case ADUFFZERO: + // R0 is zero, used by DUFFZERO, cannot be substituted. + // R3 is ptr to memory, used and set, cannot be substituted. + if(v->type == D_REG) { + if(v->reg == 0) + return 1; + if(v->reg == 3) + return 2; + } + return 0; + + case ADUFFCOPY: + // R3, R4 are ptr to src, dst, used and set, cannot be substituted. + // R5 is scratch, set by DUFFCOPY, cannot be substituted. + if(v->type == D_REG) { + if(v->reg == 3 || v->reg == 4) + return 2; + if(v->reg == 5) + return 3; + } + return 0; + + case ATEXT: /* funny */ + if(v->type == D_REG) + if(v->reg == REGARG) + return 3; + return 0; + + case APCDATA: + case AFUNCDATA: + case AVARDEF: + case AVARKILL: + return 0; + } +} + +int +a2type(Prog *p) +{ + ProgInfo info; + proginfo(&info, p); + if(info.flags & (SizeB|SizeW|SizeL|SizeQ)) + return D_REG; + if(info.flags & (SizeF|SizeD)) + return D_FREG; + return D_NONE; +} + +// copyas returns 1 if a and v address the same register. +// +// If a is the from operand, this means this operation reads the +// register in v. If a is the to operand, this means this operation +// writes the register in v. +static int +copyas(Addr *a, Addr *v) +{ + if(regtyp(v)) + if(a->type == v->type) + if(a->reg == v->reg) + return 1; + return 0; +} + +// copyau returns 1 if a either directly or indirectly addresses the +// same register as v. +// +// If a is the from operand, this means this operation reads the +// register in v. If a is the to operand, this means the operation +// either reads or writes the register in v (if !copyas(a, v), then +// the operation reads the register in v). +static int +copyau(Addr *a, Addr *v) +{ + if(copyas(a, v)) + return 1; + if(v->type == D_REG) + if(a->type == D_OREG || (a->type == D_CONST && a->reg != NREG)) + if(v->reg == a->reg) + return 1; + return 0; +} + +// copyau1 returns 1 if p->reg references the same register as v and v +// is a direct reference. +static int +copyau1(Prog *p, Addr *v) +{ + if(regtyp(v)) + if(p->from.type == v->type || p->to.type == v->type) + if(p->reg == v->reg) { + // Whether p->reg is a GPR or an FPR is + // implied by the instruction (both are + // numbered from 0). But the type should + // match v->type. Sanity check this. + if(a2type(p) != v->type) + print("botch a2type %P\n", p); + return 1; + } + return 0; +} + +// copysub replaces v with s in a if f!=0 or indicates it if could if f==0. +// Returns 1 on failure to substitute (it always succeeds on power64). +static int +copysub(Addr *a, Addr *v, Addr *s, int f) +{ + if(f) + if(copyau(a, v)) + a->reg = s->reg; + return 0; +} + +// copysub1 replaces v with s in p1->reg if f!=0 or indicates if it could if f==0. +// Returns 1 on failure to substitute (it always succeeds on power64). +static int +copysub1(Prog *p1, Addr *v, Addr *s, int f) +{ + if(f) + if(copyau1(p1, v)) + p1->reg = s->reg; + return 0; +} + int sameaddr(Addr *a, Addr *v) { -- cgit v1.2.1