diff options
Diffstat (limited to 'src/liblink/sched9.c')
-rw-r--r-- | src/liblink/sched9.c | 835 |
1 files changed, 835 insertions, 0 deletions
diff --git a/src/liblink/sched9.c b/src/liblink/sched9.c new file mode 100644 index 000000000..a9083df9b --- /dev/null +++ b/src/liblink/sched9.c @@ -0,0 +1,835 @@ +// cmd/9l/sched.c from Vita Nuova. +// +// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. +// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net) +// Portions Copyright © 1997-1999 Vita Nuova Limited +// Portions Copyright © 2000-2008 Vita Nuova Holdings Limited (www.vitanuova.com) +// Portions Copyright © 2004,2006 Bruce Ellis +// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net) +// Revisions Copyright © 2000-2008 Lucent Technologies Inc. and others +// Portions Copyright © 2009 The Go Authors. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// +build ignore + +#include "l.h" + +enum +{ + E_ICC = 1<<0, + E_FCC = 1<<1, + E_MEM = 1<<2, + E_MEMSP = 1<<3, /* uses offset and size */ + E_MEMSB = 1<<4, /* uses offset and size */ + E_LR = 1<<5, + E_CR = 1<<6, + E_CTR = 1<<7, + E_XER = 1<<8, + + E_CR0 = 0xF<<0, + E_CR1 = 0xF<<4, + + ANYMEM = E_MEM|E_MEMSP|E_MEMSB, + ALL = ~0, +}; + +typedef struct Sch Sch; +typedef struct Dep Dep; + +struct Dep +{ + ulong ireg; + ulong freg; + ulong cc; + ulong cr; +}; +struct Sch +{ + Prog p; + Dep set; + Dep used; + long soffset; + char size; + char comp; +}; + +void regused(Sch*, Prog*); +int depend(Sch*, Sch*); +int conflict(Sch*, Sch*); +int offoverlap(Sch*, Sch*); +void dumpbits(Sch*, Dep*); + +void +sched(Prog *p0, Prog *pe) +{ + Prog *p, *q; + Sch sch[NSCHED], *s, *t, *u, *se, stmp; + + if(!debug['Q']) + return; + /* + * build side structure + */ + s = sch; + for(p=p0;; p=p->link) { + memset(s, 0, sizeof(*s)); + s->p = *p; + regused(s, p); + if(debug['X']) { + Bprint(&bso, "%P\tset", &s->p); + dumpbits(s, &s->set); + Bprint(&bso, "; used"); + dumpbits(s, &s->used); + if(s->comp) + Bprint(&bso, "; compound"); + if(s->p.mark & LOAD) + Bprint(&bso, "; load"); + if(s->p.mark & BRANCH) + Bprint(&bso, "; branch"); + if(s->p.mark & FCMP) + Bprint(&bso, "; fcmp"); + Bprint(&bso, "\n"); + } + s++; + if(p == pe) + break; + } + se = s; + + for(s=se-1; s>=sch; s--) { + + /* + * load delay. interlocked. + */ + if(s->p.mark & LOAD) { + if(s >= se-1) + continue; + if(!conflict(s, (s+1))) + continue; + /* + * s is load, s+1 is immediate use of result + * t is the trial instruction to insert between s and s+1 + */ + for(t=s-1; t>=sch; t--) { + if(t->p.mark & BRANCH) + goto no2; + if(t->p.mark & FCMP) + if((s+1)->p.mark & BRANCH) + goto no2; + if(t->p.mark & LOAD) + if(conflict(t, (s+1))) + goto no2; + for(u=t+1; u<=s; u++) + if(depend(u, t)) + goto no2; + goto out2; + no2:; + } + if(debug['X']) + Bprint(&bso, "?l%P\n", &s->p); + continue; + out2: + if(debug['X']) { + Bprint(&bso, "!l%P\n", &t->p); + Bprint(&bso, "%P\n", &s->p); + } + stmp = *t; + memmove(t, t+1, (uchar*)s - (uchar*)t); + *s = stmp; + s--; + continue; + } + + /* + * fop2 delay. + */ + if(s->p.mark & FCMP) { + if(s >= se-1) + continue; + if(!((s+1)->p.mark & BRANCH)) + continue; + /* t is the trial instruction to use */ + for(t=s-1; t>=sch; t--) { + for(u=t+1; u<=s; u++) + if(depend(u, t)) + goto no3; + goto out3; + no3:; + } + if(debug['X']) + Bprint(&bso, "?f%P\n", &s->p); + continue; + out3: + if(debug['X']) { + Bprint(&bso, "!f%P\n", &t->p); + Bprint(&bso, "%P\n", &s->p); + } + stmp = *t; + memmove(t, t+1, (uchar*)s - (uchar*)t); + *s = stmp; + s--; + continue; + } + } + + /* + * put it all back + */ + for(s=sch, p=p0; s<se; s++, p=q) { + q = p->link; + if(q != s->p.link) { + *p = s->p; + p->link = q; + } + } + if(debug['X']) + Bprint(&bso, "\n"); +} + +void +regused(Sch *s, Prog *realp) +{ + int c, ar, ad, ld, sz, nr, upd; + ulong m; + Prog *p; + + p = &s->p; + s->comp = compound(p); + if(s->comp) { + s->set.ireg |= 1<<REGTMP; + s->used.ireg |= 1<<REGTMP; + } + ar = 0; /* dest is really reference */ + ad = 0; /* source/dest is really address */ + ld = 0; /* opcode is load instruction */ + sz = 32*4; /* size of load/store for overlap computation */ + nr = 0; /* source/dest is not really reg */ + upd = 0; /* move with update; changes reg */ + +/* + * flags based on opcode + */ + switch(p->as) { + case ATEXT: + curtext = realp; + autosize = p->to.offset + 8; + ad = 1; + break; + case ABL: + s->set.cc |= E_LR; + ar = 1; + ad = 1; + break; + case ABR: + ar = 1; + ad = 1; + break; + case ACMP: + case ACMPU: + case ACMPW: + case ACMPWU: + s->set.cc |= E_ICC; + if(p->reg == 0) + s->set.cr |= E_CR0; + else + s->set.cr |= (0xF<<((p->reg&7)*4)); + ar = 1; + break; + case AFCMPO: + case AFCMPU: + s->set.cc |= E_FCC; + if(p->reg == 0) + s->set.cr |= E_CR0; + else + s->set.cr |= (0xF<<((p->reg&7)*4)); + ar = 1; + break; + case ACRAND: + case ACRANDN: + case ACREQV: + case ACRNAND: + case ACRNOR: + case ACROR: + case ACRORN: + case ACRXOR: + s->used.cr |= 1<<p->from.reg; + s->set.cr |= 1<<p->to.reg; + nr = 1; + break; + case ABCL: /* tricky */ + s->used.cc |= E_FCC|E_ICC; + s->used.cr = ALL; + s->set.cc |= E_LR; + ar = 1; + break; + case ABC: /* tricky */ + s->used.cc |= E_FCC|E_ICC; + s->used.cr = ALL; + ar = 1; + break; + case ABEQ: + case ABGE: + case ABGT: + case ABLE: + case ABLT: + case ABNE: + case ABVC: + case ABVS: + s->used.cc |= E_ICC; + s->used.cr |= E_CR0; + ar = 1; + break; + case ALSW: + case AMOVMW: + /* could do better */ + sz = 32*4; + ld = 1; + break; + case AMOVBU: + case AMOVBZU: + upd = 1; + sz = 1; + ld = 1; + break; + case AMOVB: + case AMOVBZ: + sz = 1; + ld = 1; + break; + case AMOVHU: + case AMOVHZU: + upd = 1; + sz = 2; + ld = 1; + break; + case AMOVH: + case AMOVHBR: + case AMOVHZ: + sz = 2; + ld = 1; + break; + case AFMOVSU: + case AMOVWU: + case AMOVWZU: + upd = 1; + sz = 4; + ld = 1; + break; + case AFMOVS: + case AMOVW: + case AMOVWZ: + case AMOVWBR: + case ALWAR: + sz = 4; + ld = 1; + break; + case AFMOVDU: + upd = 1; + sz = 8; + ld = 1; + break; + case AFMOVD: + sz = 8; + ld = 1; + break; + case AFMOVDCC: + sz = 8; + ld = 1; + s->set.cc |= E_FCC; + s->set.cr |= E_CR1; + break; + case AMOVFL: + case AMOVCRFS: + case AMTFSB0: + case AMTFSB0CC: + case AMTFSB1: + case AMTFSB1CC: + s->set.ireg = ALL; + s->set.freg = ALL; + s->set.cc = ALL; + s->set.cr = ALL; + break; + case AADDCC: + case AADDVCC: + case AADDCCC: + case AADDCVCC: + case AADDMECC: + case AADDMEVCC: + case AADDECC: + case AADDEVCC: + case AADDZECC: + case AADDZEVCC: + case AANDCC: + case AANDNCC: + case ACNTLZWCC: + case ADIVWCC: + case ADIVWVCC: + case ADIVWUCC: + case ADIVWUVCC: + case AEQVCC: + case AEXTSBCC: + case AEXTSHCC: + case AMULHWCC: + case AMULHWUCC: + case AMULLWCC: + case AMULLWVCC: + case ANANDCC: + case ANEGCC: + case ANEGVCC: + case ANORCC: + case AORCC: + case AORNCC: + case AREMCC: + case AREMVCC: + case AREMUCC: + case AREMUVCC: + case ARLWMICC: + case ARLWNMCC: + case ASLWCC: + case ASRAWCC: + case ASRWCC: + case ASTWCCC: + case ASUBCC: + case ASUBVCC: + case ASUBCCC: + case ASUBCVCC: + case ASUBMECC: + case ASUBMEVCC: + case ASUBECC: + case ASUBEVCC: + case ASUBZECC: + case ASUBZEVCC: + case AXORCC: + s->set.cc |= E_ICC; + s->set.cr |= E_CR0; + break; + case AFABSCC: + case AFADDCC: + case AFADDSCC: + case AFCTIWCC: + case AFCTIWZCC: + case AFDIVCC: + case AFDIVSCC: + case AFMADDCC: + case AFMADDSCC: + case AFMSUBCC: + case AFMSUBSCC: + case AFMULCC: + case AFMULSCC: + case AFNABSCC: + case AFNEGCC: + case AFNMADDCC: + case AFNMADDSCC: + case AFNMSUBCC: + case AFNMSUBSCC: + case AFRSPCC: + case AFSUBCC: + case AFSUBSCC: + s->set.cc |= E_FCC; + s->set.cr |= E_CR1; + break; + } + +/* + * flags based on 'to' field + */ + c = p->to.class; + if(c == 0) { + c = aclass(&p->to) + 1; + p->to.class = c; + } + c--; + switch(c) { + default: + print("unknown class %d %D\n", c, &p->to); + + case C_NONE: + case C_ZCON: + case C_SCON: + case C_UCON: + case C_LCON: + case C_ADDCON: + case C_ANDCON: + case C_SBRA: + case C_LBRA: + break; + case C_CREG: + c = p->to.reg; + if(c == NREG) + s->set.cr = ALL; + else + s->set.cr |= (0xF << ((p->from.reg&7)*4)); + s->set.cc = ALL; + break; + case C_SPR: + case C_FPSCR: + case C_MSR: + case C_XER: + s->set.ireg = ALL; + s->set.freg = ALL; + s->set.cc = ALL; + s->set.cr = ALL; + break; + case C_LR: + s->set.cc |= E_LR; + break; + case C_CTR: + s->set.cc |= E_CTR; + break; + case C_ZOREG: + case C_SOREG: + case C_LOREG: + c = p->to.reg; + s->used.ireg |= 1<<c; + if(upd) + s->set.ireg |= 1<<c; + if(ad) + break; + s->size = sz; + s->soffset = regoff(&p->to); + + m = ANYMEM; + if(c == REGSB) + m = E_MEMSB; + if(c == REGSP) + m = E_MEMSP; + + if(ar) + s->used.cc |= m; + else + s->set.cc |= m; + break; + case C_SACON: + case C_LACON: + s->used.ireg |= 1<<REGSP; + if(upd) + s->set.ireg |= 1<<c; + break; + case C_SECON: + case C_LECON: + s->used.ireg |= 1<<REGSB; + if(upd) + s->set.ireg |= 1<<c; + break; + case C_REG: + if(nr) + break; + if(ar) + s->used.ireg |= 1<<p->to.reg; + else + s->set.ireg |= 1<<p->to.reg; + break; + case C_FREG: + if(ar) + s->used.freg |= 1<<p->to.reg; + else + s->set.freg |= 1<<p->to.reg; + break; + case C_SAUTO: + case C_LAUTO: + s->used.ireg |= 1<<REGSP; + if(upd) + s->set.ireg |= 1<<c; + if(ad) + break; + s->size = sz; + s->soffset = regoff(&p->to); + + if(ar) + s->used.cc |= E_MEMSP; + else + s->set.cc |= E_MEMSP; + break; + case C_SEXT: + case C_LEXT: + s->used.ireg |= 1<<REGSB; + if(upd) + s->set.ireg |= 1<<c; + if(ad) + break; + s->size = sz; + s->soffset = regoff(&p->to); + + if(ar) + s->used.cc |= E_MEMSB; + else + s->set.cc |= E_MEMSB; + break; + } + +/* + * flags based on 'from' field + */ + c = p->from.class; + if(c == 0) { + c = aclass(&p->from) + 1; + p->from.class = c; + } + c--; + switch(c) { + default: + print("unknown class %d %D\n", c, &p->from); + + case C_NONE: + case C_ZCON: + case C_SCON: + case C_UCON: + case C_LCON: + case C_ADDCON: + case C_ANDCON: + case C_SBRA: + case C_LBRA: + c = p->from.reg; + if(c != NREG) + s->used.ireg |= 1<<c; + break; + case C_CREG: + c = p->from.reg; + if(c == NREG) + s->used.cr = ALL; + else + s->used.cr |= (0xF << ((p->from.reg&7)*4)); + s->used.cc = ALL; + break; + case C_SPR: + case C_FPSCR: + case C_MSR: + case C_XER: + s->set.ireg = ALL; + s->set.freg = ALL; + s->set.cc = ALL; + s->set.cr = ALL; + break; + case C_LR: + s->used.cc |= E_LR; + break; + case C_CTR: + s->used.cc |= E_CTR; + break; + case C_ZOREG: + case C_SOREG: + case C_LOREG: + c = p->from.reg; + s->used.ireg |= 1<<c; + if(ld) + p->mark |= LOAD; + if(ad) + break; + s->size = sz; + s->soffset = regoff(&p->from); + + m = ANYMEM; + if(c == REGSB) + m = E_MEMSB; + if(c == REGSP) + m = E_MEMSP; + + s->used.cc |= m; + break; + case C_SACON: + case C_LACON: + s->used.ireg |= 1<<REGSP; + break; + case C_SECON: + case C_LECON: + s->used.ireg |= 1<<REGSB; + break; + case C_REG: + if(nr) + break; + s->used.ireg |= 1<<p->from.reg; + break; + case C_FREG: + s->used.freg |= 1<<p->from.reg; + break; + case C_SAUTO: + case C_LAUTO: + s->used.ireg |= 1<<REGSP; + if(ld) + p->mark |= LOAD; + if(ad) + break; + s->size = sz; + s->soffset = regoff(&p->from); + + s->used.cc |= E_MEMSP; + break; + case C_SEXT: + case C_LEXT: + s->used.ireg |= 1<<REGSB; + if(ld) + p->mark |= LOAD; + if(ad) + break; + s->size = sz; + s->soffset = regoff(&p->from); + + s->used.cc |= E_MEMSB; + break; + } + + c = p->reg; + if(c != NREG) { + if(p->from.type == D_FREG || p->to.type == D_FREG) + s->used.freg |= 1<<c; + else + s->used.ireg |= 1<<c; + } +} + +/* + * test to see if 2 instrictions can be + * interchanged without changing semantics + */ +int +depend(Sch *sa, Sch *sb) +{ + ulong x; + + if(sa->set.ireg & (sb->set.ireg|sb->used.ireg)) + return 1; + if(sb->set.ireg & sa->used.ireg) + return 1; + + if(sa->set.freg & (sb->set.freg|sb->used.freg)) + return 1; + if(sb->set.freg & sa->used.freg) + return 1; + + if(sa->set.cr & (sb->set.cr|sb->used.cr)) + return 1; + if(sb->set.cr & sa->used.cr) + return 1; + + + x = (sa->set.cc & (sb->set.cc|sb->used.cc)) | + (sb->set.cc & sa->used.cc); + if(x) { + /* + * allow SB and SP to pass each other. + * allow SB to pass SB iff doffsets are ok + * anything else conflicts + */ + if(x != E_MEMSP && x != E_MEMSB) + return 1; + x = sa->set.cc | sb->set.cc | + sa->used.cc | sb->used.cc; + if(x & E_MEM) + return 1; + if(offoverlap(sa, sb)) + return 1; + } + + return 0; +} + +int +offoverlap(Sch *sa, Sch *sb) +{ + + if(sa->soffset < sb->soffset) { + if(sa->soffset+sa->size > sb->soffset) + return 1; + return 0; + } + if(sb->soffset+sb->size > sa->soffset) + return 1; + return 0; +} + +/* + * test 2 adjacent instructions + * and find out if inserted instructions + * are desired to prevent stalls. + * first instruction is a load instruction. + */ +int +conflict(Sch *sa, Sch *sb) +{ + + if(sa->set.ireg & sb->used.ireg) + return 1; + if(sa->set.freg & sb->used.freg) + return 1; + if(sa->set.cr & sb->used.cr) + return 1; + return 0; +} + +int +compound(Prog *p) +{ + Optab *o; + + o = oplook(p); + if(o->size != 4) + return 1; + if(p->to.type == D_REG && p->to.reg == REGSB) + return 1; + return 0; +} + +void +dumpbits(Sch *s, Dep *d) +{ + int i; + + for(i=0; i<32; i++) + if(d->ireg & (1<<i)) + Bprint(&bso, " R%d", i); + for(i=0; i<32; i++) + if(d->freg & (1<<i)) + Bprint(&bso, " F%d", i); + for(i=0; i<32; i++) + if(d->cr & (1<<i)) + Bprint(&bso, " C%d", i); + for(i=0; i<32; i++) + switch(d->cc & (1<<i)) { + default: + break; + case E_ICC: + Bprint(&bso, " ICC"); + break; + case E_FCC: + Bprint(&bso, " FCC"); + break; + case E_LR: + Bprint(&bso, " LR"); + break; + case E_CR: + Bprint(&bso, " CR"); + break; + case E_CTR: + Bprint(&bso, " CTR"); + break; + case E_XER: + Bprint(&bso, " XER"); + break; + case E_MEM: + Bprint(&bso, " MEM%d", s->size); + break; + case E_MEMSB: + Bprint(&bso, " SB%d", s->size); + break; + case E_MEMSP: + Bprint(&bso, " SP%d", s->size); + break; + } +} |