summaryrefslogtreecommitdiff
path: root/src/cmd/9g/ggen.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmd/9g/ggen.c')
-rw-r--r--src/cmd/9g/ggen.c1034
1 files changed, 1034 insertions, 0 deletions
diff --git a/src/cmd/9g/ggen.c b/src/cmd/9g/ggen.c
new file mode 100644
index 000000000..c41d8eb41
--- /dev/null
+++ b/src/cmd/9g/ggen.c
@@ -0,0 +1,1034 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#undef EXTERN
+#define EXTERN
+#include <u.h>
+#include <libc.h>
+#include "gg.h"
+#include "opt.h"
+
+static Prog *appendpp(Prog *p, int as, int ftype, int freg, vlong foffset, int ttype, int treg, vlong toffset);
+static Prog *zerorange(Prog *p, vlong frame, vlong lo, vlong hi);
+
+void
+defframe(Prog *ptxt)
+{
+ uint32 frame;
+ Prog *p;
+ vlong hi, lo;
+ NodeList *l;
+ Node *n;
+
+ // fill in argument size
+ ptxt->to.offset = rnd(curfn->type->argwid, widthptr);
+
+ // fill in final stack size
+ ptxt->to.offset <<= 32;
+ frame = rnd(stksize+maxarg, widthreg);
+ ptxt->to.offset |= frame;
+
+ // insert code to zero ambiguously live variables
+ // so that the garbage collector only sees initialized values
+ // when it looks for pointers.
+ p = ptxt;
+ lo = hi = 0;
+ // iterate through declarations - they are sorted in decreasing xoffset order.
+ for(l=curfn->dcl; l != nil; l = l->next) {
+ n = l->n;
+ if(!n->needzero)
+ continue;
+ if(n->class != PAUTO)
+ fatal("needzero class %d", n->class);
+ if(n->type->width % widthptr != 0 || n->xoffset % widthptr != 0 || n->type->width == 0)
+ fatal("var %lN has size %d offset %d", n, (int)n->type->width, (int)n->xoffset);
+
+ if(lo != hi && n->xoffset + n->type->width >= lo - 2*widthreg) {
+ // merge with range we already have
+ lo = n->xoffset;
+ continue;
+ }
+ // zero old range
+ p = zerorange(p, frame, lo, hi);
+
+ // set new range
+ hi = n->xoffset + n->type->width;
+ lo = n->xoffset;
+ }
+ // zero final range
+ zerorange(p, frame, lo, hi);
+}
+
+static Prog*
+zerorange(Prog *p, vlong frame, vlong lo, vlong hi)
+{
+ vlong cnt, i;
+ Prog *p1;
+ Node *f;
+
+ cnt = hi - lo;
+ if(cnt == 0)
+ return p;
+ if(cnt < 4*widthptr) {
+ for(i = 0; i < cnt; i += widthptr)
+ p = appendpp(p, AMOVD, D_REG, REGZERO, 0, D_OREG, REGSP, 8+frame+lo+i);
+ } else if(cnt <= 128*widthptr) {
+ p = appendpp(p, AADD, D_CONST, NREG, 8+frame+lo-8, D_REG, REGRT1, 0);
+ p->reg = REGSP;
+ p = appendpp(p, ADUFFZERO, D_NONE, NREG, 0, D_OREG, NREG, 0);
+ f = sysfunc("duffzero");
+ naddr(f, &p->to, 1);
+ afunclit(&p->to, f);
+ p->to.offset = 4*(128-cnt/widthptr);
+ } else {
+ p = appendpp(p, AMOVD, D_CONST, NREG, 8+frame+lo-8, D_REG, REGTMP, 0);
+ p = appendpp(p, AADD, D_REG, REGTMP, 0, D_REG, REGRT1, 0);
+ p->reg = REGSP;
+ p = appendpp(p, AMOVD, D_CONST, NREG, cnt, D_REG, REGTMP, 0);
+ p = appendpp(p, AADD, D_REG, REGTMP, 0, D_REG, REGRT2, 0);
+ p->reg = REGRT1;
+ p1 = p = appendpp(p, AMOVDU, D_REG, REGZERO, 0, D_OREG, REGRT1, widthptr);
+ p = appendpp(p, ACMP, D_REG, REGRT1, 0, D_REG, REGRT2, 0);
+ p = appendpp(p, ABNE, D_NONE, NREG, 0, D_BRANCH, NREG, 0);
+ patch(p, p1);
+ }
+ return p;
+}
+
+static Prog*
+appendpp(Prog *p, int as, int ftype, int freg, vlong foffset, int ttype, int treg, vlong toffset)
+{
+ Prog *q;
+ q = mal(sizeof(*q));
+ clearp(q);
+ q->as = as;
+ q->lineno = p->lineno;
+ q->from.type = ftype;
+ q->from.reg = freg;
+ q->from.offset = foffset;
+ q->to.type = ttype;
+ q->to.reg = treg;
+ q->to.offset = toffset;
+ q->link = p->link;
+ p->link = q;
+ return q;
+}
+
+// Sweep the prog list to mark any used nodes.
+void
+markautoused(Prog *p)
+{
+ for (; p; p = p->link) {
+ if (p->as == ATYPE || p->as == AVARDEF || p->as == AVARKILL)
+ continue;
+
+ if (p->from.node)
+ p->from.node->used = 1;
+
+ if (p->to.node)
+ p->to.node->used = 1;
+ }
+}
+
+// Fixup instructions after allocauto (formerly compactframe) has moved all autos around.
+void
+fixautoused(Prog *p)
+{
+ Prog **lp;
+
+ for (lp=&p; (p=*lp) != P; ) {
+ if (p->as == ATYPE && p->from.node && p->from.name == D_AUTO && !p->from.node->used) {
+ *lp = p->link;
+ continue;
+ }
+ if ((p->as == AVARDEF || p->as == AVARKILL) && p->to.node && !p->to.node->used) {
+ // Cannot remove VARDEF instruction, because - unlike TYPE handled above -
+ // VARDEFs are interspersed with other code, and a jump might be using the
+ // VARDEF as a target. Replace with a no-op instead. A later pass will remove
+ // the no-ops.
+ p->to.type = D_NONE;
+ p->to.node = N;
+ p->as = ANOP;
+ continue;
+ }
+ if (p->from.name == D_AUTO && p->from.node)
+ p->from.offset += p->from.node->stkdelta;
+
+ if (p->to.name == D_AUTO && p->to.node)
+ p->to.offset += p->to.node->stkdelta;
+
+ lp = &p->link;
+ }
+}
+
+/*
+ * generate: BL reg, f
+ * where both reg and f are registers.
+ * On power, f must be moved to CTR first.
+ */
+static void
+ginsBL(Node *reg, Node *f)
+{
+ Prog *p;
+ p = gins(AMOVD, f, N);
+ p->to.type = D_SPR;
+ p->to.offset = D_CTR;
+ p = gins(ABL, reg, N);
+ p->to.type = D_SPR;
+ p->to.offset = D_CTR;
+}
+
+/*
+ * generate:
+ * call f
+ * proc=-1 normal call but no return
+ * proc=0 normal call
+ * proc=1 goroutine run in new proc
+ * proc=2 defer call save away stack
+ * proc=3 normal call to C pointer (not Go func value)
+ */
+void
+ginscall(Node *f, int proc)
+{
+ Prog *p;
+ Node reg, con, reg2;
+ Node r1;
+
+ if(f->type != T)
+ setmaxarg(f->type);
+
+ switch(proc) {
+ default:
+ fatal("ginscall: bad proc %d", proc);
+ break;
+
+ case 0: // normal call
+ case -1: // normal call but no return
+ if(f->op == ONAME && f->class == PFUNC) {
+ if(f == deferreturn) {
+ // Deferred calls will appear to be returning to
+ // the CALL deferreturn(SB) that we are about to emit.
+ // However, the stack trace code will show the line
+ // of the instruction byte before the return PC.
+ // To avoid that being an unrelated instruction,
+ // insert a Power64 NOP that we will have the right line number.
+ // Power64 NOP is really or r0, r0, r0; use that description
+ // because the NOP pseudo-instruction would be removed by
+ // the linker.
+ nodreg(&reg, types[TINT], D_R0);
+ gins(AOR, &reg, &reg);
+ }
+ p = gins(ABL, N, f);
+ afunclit(&p->to, f);
+ if(proc == -1 || noreturn(p))
+ gins(AUNDEF, N, N);
+ break;
+ }
+ nodreg(&reg, types[tptr], D_R0+REGENV);
+ nodreg(&r1, types[tptr], D_R0+3);
+ gmove(f, &reg);
+ reg.op = OINDREG;
+ gmove(&reg, &r1);
+ reg.op = OREGISTER;
+ ginsBL(&reg, &r1);
+ break;
+
+ case 3: // normal call of c function pointer
+ ginsBL(N, f);
+ break;
+
+ case 1: // call in new proc (go)
+ case 2: // deferred call (defer)
+ nodconst(&con, types[TINT64], argsize(f->type));
+ nodreg(&reg, types[TINT64], D_R0+3);
+ nodreg(&reg2, types[TINT64], D_R0+4);
+ gmove(f, &reg);
+
+ p = gins(ASUB, N, N);
+ p->from.type = D_CONST;
+ p->from.offset = 3 * 8;
+ p->to.type = D_REG;
+ p->to.reg = REGSP;
+
+ gmove(&con, &reg2);
+ p = gins(AMOVW, &reg2, N);
+ p->to.type = D_OREG;
+ p->to.reg = REGSP;
+ p->to.offset = 8;
+
+ p = gins(AMOVD, &reg, N);
+ p->to.type = D_OREG;
+ p->to.reg = REGSP;
+ p->to.offset = 16;
+
+ if(proc == 1)
+ ginscall(newproc, 0);
+ else {
+ if(!hasdefer)
+ fatal("hasdefer=0 but has defer");
+ ginscall(deferproc, 0);
+ }
+
+ p = gins(AADD, N, N);
+ p->from.type = D_CONST;
+ p->from.offset = 3 * 8;
+ p->to.type = D_REG;
+ p->to.reg = REGSP;
+
+ if(proc == 2) {
+ nodreg(&reg, types[TINT64], D_R0+3);
+ p = gins(ACMP, &reg, N);
+ p->to.type = D_REG;
+ p->to.reg = D_R0;
+ p = gbranch(ABEQ, T, +1);
+ cgen_ret(N);
+ patch(p, pc);
+ }
+ break;
+ }
+}
+
+/*
+ * n is call to interface method.
+ * generate res = n.
+ */
+void
+cgen_callinter(Node *n, Node *res, int proc)
+{
+ Node *i, *f;
+ Node tmpi, nodi, nodo, nodr, nodsp;
+ Prog *p;
+
+ i = n->left;
+ if(i->op != ODOTINTER)
+ fatal("cgen_callinter: not ODOTINTER %O", i->op);
+
+ f = i->right; // field
+ if(f->op != ONAME)
+ fatal("cgen_callinter: not ONAME %O", f->op);
+
+ i = i->left; // interface
+
+ if(!i->addable) {
+ tempname(&tmpi, i->type);
+ cgen(i, &tmpi);
+ i = &tmpi;
+ }
+
+ genlist(n->list); // assign the args
+
+ // i is now addable, prepare an indirected
+ // register to hold its address.
+ igen(i, &nodi, res); // REG = &inter
+
+ nodindreg(&nodsp, types[tptr], D_R0+REGSP);
+ nodsp.xoffset = widthptr;
+ nodi.type = types[tptr];
+ nodi.xoffset += widthptr;
+ cgen(&nodi, &nodsp); // 0(SP) = 8(REG) -- i.data
+
+ regalloc(&nodo, types[tptr], res);
+ nodi.type = types[tptr];
+ nodi.xoffset -= widthptr;
+ cgen(&nodi, &nodo); // REG = 0(REG) -- i.tab
+ regfree(&nodi);
+
+ regalloc(&nodr, types[tptr], &nodo);
+ if(n->left->xoffset == BADWIDTH)
+ fatal("cgen_callinter: badwidth");
+ cgen_checknil(&nodo); // in case offset is huge
+ nodo.op = OINDREG;
+ nodo.xoffset = n->left->xoffset + 3*widthptr + 8;
+ if(proc == 0) {
+ // plain call: use direct c function pointer - more efficient
+ cgen(&nodo, &nodr); // REG = 32+offset(REG) -- i.tab->fun[f]
+ proc = 3;
+ } else {
+ // go/defer. generate go func value.
+ p = gins(AMOVD, &nodo, &nodr); // REG = &(32+offset(REG)) -- i.tab->fun[f]
+ p->from.type = D_CONST;
+ }
+
+ nodr.type = n->left->type;
+ ginscall(&nodr, proc);
+
+ regfree(&nodr);
+ regfree(&nodo);
+}
+
+/*
+ * generate function call;
+ * proc=0 normal call
+ * proc=1 goroutine run in new proc
+ * proc=2 defer call save away stack
+ */
+void
+cgen_call(Node *n, int proc)
+{
+ Type *t;
+ Node nod, afun;
+
+ if(n == N)
+ return;
+
+ if(n->left->ullman >= UINF) {
+ // if name involves a fn call
+ // precompute the address of the fn
+ tempname(&afun, types[tptr]);
+ cgen(n->left, &afun);
+ }
+
+ genlist(n->list); // assign the args
+ t = n->left->type;
+
+ // call tempname pointer
+ if(n->left->ullman >= UINF) {
+ regalloc(&nod, types[tptr], N);
+ cgen_as(&nod, &afun);
+ nod.type = t;
+ ginscall(&nod, proc);
+ regfree(&nod);
+ return;
+ }
+
+ // call pointer
+ if(n->left->op != ONAME || n->left->class != PFUNC) {
+ regalloc(&nod, types[tptr], N);
+ cgen_as(&nod, n->left);
+ nod.type = t;
+ ginscall(&nod, proc);
+ regfree(&nod);
+ return;
+ }
+
+ // call direct
+ n->left->method = 1;
+ ginscall(n->left, proc);
+}
+
+/*
+ * call to n has already been generated.
+ * generate:
+ * res = return value from call.
+ */
+void
+cgen_callret(Node *n, Node *res)
+{
+ Node nod;
+ Type *fp, *t;
+ Iter flist;
+
+ t = n->left->type;
+ if(t->etype == TPTR32 || t->etype == TPTR64)
+ t = t->type;
+
+ fp = structfirst(&flist, getoutarg(t));
+ if(fp == T)
+ fatal("cgen_callret: nil");
+
+ memset(&nod, 0, sizeof(nod));
+ nod.op = OINDREG;
+ nod.val.u.reg = D_R0+REGSP;
+ nod.addable = 1;
+
+ nod.xoffset = fp->width + widthptr; // +widthptr: saved LR at 0(R1)
+ nod.type = fp->type;
+ cgen_as(res, &nod);
+}
+
+/*
+ * call to n has already been generated.
+ * generate:
+ * res = &return value from call.
+ */
+void
+cgen_aret(Node *n, Node *res)
+{
+ Node nod1, nod2;
+ Type *fp, *t;
+ Iter flist;
+
+ t = n->left->type;
+ if(isptr[t->etype])
+ t = t->type;
+
+ fp = structfirst(&flist, getoutarg(t));
+ if(fp == T)
+ fatal("cgen_aret: nil");
+
+ memset(&nod1, 0, sizeof(nod1));
+ nod1.op = OINDREG;
+ nod1.val.u.reg = D_R0 + REGSP;
+ nod1.addable = 1;
+
+ nod1.xoffset = fp->width + widthptr; // +widthptr: saved lr at 0(SP)
+ nod1.type = fp->type;
+
+ if(res->op != OREGISTER) {
+ regalloc(&nod2, types[tptr], res);
+ agen(&nod1, &nod2);
+ gins(AMOVD, &nod2, res);
+ regfree(&nod2);
+ } else
+ agen(&nod1, res);
+}
+
+/*
+ * generate return.
+ * n->left is assignments to return values.
+ */
+void
+cgen_ret(Node *n)
+{
+ Prog *p;
+
+ if(n != N)
+ genlist(n->list); // copy out args
+ if(hasdefer)
+ ginscall(deferreturn, 0);
+ genlist(curfn->exit);
+ p = gins(ARET, N, N);
+ if(n != N && n->op == ORETJMP) {
+ p->to.name = D_EXTERN;
+ p->to.type = D_CONST;
+ p->to.sym = linksym(n->left->sym);
+ }
+}
+
+void
+cgen_asop(Node *n)
+{
+ USED(n);
+ fatal("cgen_asop"); // no longer used
+}
+
+int
+samereg(Node *a, Node *b)
+{
+ if(a == N || b == N)
+ return 0;
+ if(a->op != OREGISTER)
+ return 0;
+ if(b->op != OREGISTER)
+ return 0;
+ if(a->val.u.reg != b->val.u.reg)
+ return 0;
+ return 1;
+}
+
+/*
+ * generate division.
+ * generates one of:
+ * res = nl / nr
+ * res = nl % nr
+ * according to op.
+ */
+void
+dodiv(int op, Node *nl, Node *nr, Node *res)
+{
+ int a, check;
+ Type *t, *t0;
+ Node tl, tr, tl2, tr2, nm1, nz, tm;
+ Prog *p1, *p2;
+
+ // Have to be careful about handling
+ // most negative int divided by -1 correctly.
+ // The hardware will generate undefined result.
+ // Also need to explicitly trap on division on zero,
+ // the hardware will silently generate undefined result.
+ // DIVW will leave unpredicable result in higher 32-bit,
+ // so always use DIVD/DIVDU.
+ t = nl->type;
+ t0 = t;
+ check = 0;
+ if(issigned[t->etype]) {
+ check = 1;
+ if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -(1ULL<<(t->width*8-1)))
+ check = 0;
+ else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1)
+ check = 0;
+ }
+ if(t->width < 8) {
+ if(issigned[t->etype])
+ t = types[TINT64];
+ else
+ t = types[TUINT64];
+ check = 0;
+ }
+
+ a = optoas(ODIV, t);
+
+ regalloc(&tl, t0, N);
+ regalloc(&tr, t0, N);
+ if(nl->ullman >= nr->ullman) {
+ cgen(nl, &tl);
+ cgen(nr, &tr);
+ } else {
+ cgen(nr, &tr);
+ cgen(nl, &tl);
+ }
+ if(t != t0) {
+ // Convert
+ tl2 = tl;
+ tr2 = tr;
+ tl.type = t;
+ tr.type = t;
+ gmove(&tl2, &tl);
+ gmove(&tr2, &tr);
+ }
+
+ // Handle divide-by-zero panic.
+ p1 = gins(optoas(OCMP, t), &tr, N);
+ p1->to.type = D_REG;
+ p1->to.reg = REGZERO;
+ p1 = gbranch(optoas(ONE, t), T, +1);
+ if(panicdiv == N)
+ panicdiv = sysfunc("panicdivide");
+ ginscall(panicdiv, -1);
+ patch(p1, pc);
+
+ if(check) {
+ nodconst(&nm1, t, -1);
+ gins(optoas(OCMP, t), &tr, &nm1);
+ p1 = gbranch(optoas(ONE, t), T, +1);
+ if(op == ODIV) {
+ // a / (-1) is -a.
+ gins(optoas(OMINUS, t), N, &tl);
+ gmove(&tl, res);
+ } else {
+ // a % (-1) is 0.
+ nodconst(&nz, t, 0);
+ gmove(&nz, res);
+ }
+ p2 = gbranch(AJMP, T, 0);
+ patch(p1, pc);
+ }
+ p1 = gins(a, &tr, &tl);
+ if(op == ODIV) {
+ regfree(&tr);
+ gmove(&tl, res);
+ } else {
+ // A%B = A-(A/B*B)
+ regalloc(&tm, t, N);
+ // patch div to use the 3 register form
+ // TODO(minux): add gins3?
+ p1->reg = p1->to.reg;
+ p1->to.reg = tm.val.u.reg;
+ gins(optoas(OMUL, t), &tr, &tm);
+ regfree(&tr);
+ gins(optoas(OSUB, t), &tm, &tl);
+ regfree(&tm);
+ gmove(&tl, res);
+ }
+ regfree(&tl);
+ if(check)
+ patch(p2, pc);
+}
+
+/*
+ * generate division according to op, one of:
+ * res = nl / nr
+ * res = nl % nr
+ */
+void
+cgen_div(int op, Node *nl, Node *nr, Node *res)
+{
+ Node n1, n2, n3;
+ int w, a;
+ Magic m;
+
+ // TODO(minux): enable division by magic multiply (also need to fix longmod below)
+ //if(nr->op != OLITERAL)
+ goto longdiv;
+ w = nl->type->width*8;
+
+ // Front end handled 32-bit division. We only need to handle 64-bit.
+ // try to do division by multiply by (2^w)/d
+ // see hacker's delight chapter 10
+ switch(simtype[nl->type->etype]) {
+ default:
+ goto longdiv;
+
+ case TUINT64:
+ m.w = w;
+ m.ud = mpgetfix(nr->val.u.xval);
+ umagic(&m);
+ if(m.bad)
+ break;
+ if(op == OMOD)
+ goto longmod;
+
+ cgenr(nl, &n1, N);
+ nodconst(&n2, nl->type, m.um);
+ regalloc(&n3, nl->type, res);
+ cgen_hmul(&n1, &n2, &n3);
+
+ if(m.ua) {
+ // need to add numerator accounting for overflow
+ gins(optoas(OADD, nl->type), &n1, &n3);
+ nodconst(&n2, nl->type, 1);
+ gins(optoas(ORROTC, nl->type), &n2, &n3);
+ nodconst(&n2, nl->type, m.s-1);
+ gins(optoas(ORSH, nl->type), &n2, &n3);
+ } else {
+ nodconst(&n2, nl->type, m.s);
+ gins(optoas(ORSH, nl->type), &n2, &n3); // shift dx
+ }
+
+ gmove(&n3, res);
+ regfree(&n1);
+ regfree(&n3);
+ return;
+
+ case TINT64:
+ m.w = w;
+ m.sd = mpgetfix(nr->val.u.xval);
+ smagic(&m);
+ if(m.bad)
+ break;
+ if(op == OMOD)
+ goto longmod;
+
+ cgenr(nl, &n1, res);
+ nodconst(&n2, nl->type, m.sm);
+ regalloc(&n3, nl->type, N);
+ cgen_hmul(&n1, &n2, &n3);
+
+ if(m.sm < 0) {
+ // need to add numerator
+ gins(optoas(OADD, nl->type), &n1, &n3);
+ }
+
+ nodconst(&n2, nl->type, m.s);
+ gins(optoas(ORSH, nl->type), &n2, &n3); // shift n3
+
+ nodconst(&n2, nl->type, w-1);
+ gins(optoas(ORSH, nl->type), &n2, &n1); // -1 iff num is neg
+ gins(optoas(OSUB, nl->type), &n1, &n3); // added
+
+ if(m.sd < 0) {
+ // this could probably be removed
+ // by factoring it into the multiplier
+ gins(optoas(OMINUS, nl->type), N, &n3);
+ }
+
+ gmove(&n3, res);
+ regfree(&n1);
+ regfree(&n3);
+ return;
+ }
+ goto longdiv;
+
+longdiv:
+ // division and mod using (slow) hardware instruction
+ dodiv(op, nl, nr, res);
+ return;
+
+longmod:
+ // mod using formula A%B = A-(A/B*B) but
+ // we know that there is a fast algorithm for A/B
+ regalloc(&n1, nl->type, res);
+ cgen(nl, &n1);
+ regalloc(&n2, nl->type, N);
+ cgen_div(ODIV, &n1, nr, &n2);
+ a = optoas(OMUL, nl->type);
+ if(w == 8) {
+ // use 2-operand 16-bit multiply
+ // because there is no 2-operand 8-bit multiply
+ //a = AIMULW;
+ }
+ if(!smallintconst(nr)) {
+ regalloc(&n3, nl->type, N);
+ cgen(nr, &n3);
+ gins(a, &n3, &n2);
+ regfree(&n3);
+ } else
+ gins(a, nr, &n2);
+ gins(optoas(OSUB, nl->type), &n2, &n1);
+ gmove(&n1, res);
+ regfree(&n1);
+ regfree(&n2);
+}
+
+/*
+ * generate high multiply:
+ * res = (nl*nr) >> width
+ */
+void
+cgen_hmul(Node *nl, Node *nr, Node *res)
+{
+ int w;
+ Node n1, n2, *tmp;
+ Type *t;
+ Prog *p;
+
+ // largest ullman on left.
+ if(nl->ullman < nr->ullman) {
+ tmp = nl;
+ nl = nr;
+ nr = tmp;
+ }
+ t = nl->type;
+ w = t->width * 8;
+ cgenr(nl, &n1, res);
+ cgenr(nr, &n2, N);
+ switch(simtype[t->etype]) {
+ case TINT8:
+ case TINT16:
+ case TINT32:
+ gins(optoas(OMUL, t), &n2, &n1);
+ p = gins(ASRAD, N, &n1);
+ p->from.type = D_CONST;
+ p->from.offset = w;
+ break;
+ case TUINT8:
+ case TUINT16:
+ case TUINT32:
+ gins(optoas(OMUL, t), &n2, &n1);
+ p = gins(ASRD, N, &n1);
+ p->from.type = D_CONST;
+ p->from.offset = w;
+ break;
+ case TINT64:
+ case TUINT64:
+ if(issigned[t->etype])
+ p = gins(AMULHD, &n2, &n1);
+ else
+ p = gins(AMULHDU, &n2, &n1);
+ break;
+ default:
+ fatal("cgen_hmul %T", t);
+ break;
+ }
+ cgen(&n1, res);
+ regfree(&n1);
+ regfree(&n2);
+}
+
+/*
+ * generate shift according to op, one of:
+ * res = nl << nr
+ * res = nl >> nr
+ */
+void
+cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res)
+{
+ Node n1, n2, n3, n4, n5;
+ int a;
+ Prog *p1;
+ uvlong sc;
+ Type *tcount;
+
+ a = optoas(op, nl->type);
+
+ if(nr->op == OLITERAL) {
+ regalloc(&n1, nl->type, res);
+ cgen(nl, &n1);
+ sc = mpgetfix(nr->val.u.xval);
+ if(sc >= nl->type->width*8) {
+ // large shift gets 2 shifts by width-1
+ nodconst(&n3, types[TUINT32], nl->type->width*8-1);
+ gins(a, &n3, &n1);
+ gins(a, &n3, &n1);
+ } else
+ gins(a, nr, &n1);
+ gmove(&n1, res);
+ regfree(&n1);
+ goto ret;
+ }
+
+ if(nl->ullman >= UINF) {
+ tempname(&n4, nl->type);
+ cgen(nl, &n4);
+ nl = &n4;
+ }
+ if(nr->ullman >= UINF) {
+ tempname(&n5, nr->type);
+ cgen(nr, &n5);
+ nr = &n5;
+ }
+
+ // Allow either uint32 or uint64 as shift type,
+ // to avoid unnecessary conversion from uint32 to uint64
+ // just to do the comparison.
+ tcount = types[simtype[nr->type->etype]];
+ if(tcount->etype < TUINT32)
+ tcount = types[TUINT32];
+
+ regalloc(&n1, nr->type, N); // to hold the shift type in CX
+ regalloc(&n3, tcount, &n1); // to clear high bits of CX
+
+ regalloc(&n2, nl->type, res);
+ if(nl->ullman >= nr->ullman) {
+ cgen(nl, &n2);
+ cgen(nr, &n1);
+ gmove(&n1, &n3);
+ } else {
+ cgen(nr, &n1);
+ gmove(&n1, &n3);
+ cgen(nl, &n2);
+ }
+ regfree(&n3);
+
+ // test and fix up large shifts
+ if(!bounded) {
+ nodconst(&n3, tcount, nl->type->width*8);
+ gins(optoas(OCMP, tcount), &n1, &n3);
+ p1 = gbranch(optoas(OLT, tcount), T, +1);
+ if(op == ORSH && issigned[nl->type->etype]) {
+ nodconst(&n3, types[TUINT32], nl->type->width*8-1);
+ gins(a, &n3, &n2);
+ } else {
+ nodconst(&n3, nl->type, 0);
+ gmove(&n3, &n2);
+ }
+ patch(p1, pc);
+ }
+
+ gins(a, &n1, &n2);
+
+ gmove(&n2, res);
+
+ regfree(&n1);
+ regfree(&n2);
+
+ret:
+ ;
+}
+
+void
+clearfat(Node *nl)
+{
+ uint64 w, c, q, t;
+ Node dst, end, r0, *f;
+ Prog *p, *pl;
+
+ /* clear a fat object */
+ if(debug['g']) {
+ print("clearfat %N (%T, size: %lld)\n", nl, nl->type, nl->type->width);
+ }
+
+ w = nl->type->width;
+ // Avoid taking the address for simple enough types.
+ //if(componentgen(N, nl))
+ // return;
+
+ c = w % 8; // bytes
+ q = w / 8; // dwords
+
+ if(reg[REGRT1] > 0)
+ fatal("R%d in use during clearfat", REGRT1);
+
+ nodreg(&r0, types[TUINT64], 0); // r0 is always zero
+ nodreg(&dst, types[tptr], D_R0+REGRT1);
+ reg[REGRT1]++;
+ agen(nl, &dst);
+
+ if(q > 128) {
+ p = gins(ASUB, N, &dst);
+ p->from.type = D_CONST;
+ p->from.offset = 8;
+
+ regalloc(&end, types[tptr], N);
+ p = gins(AMOVD, &dst, &end);
+ p->from.type = D_CONST;
+ p->from.offset = q*8;
+
+ p = gins(AMOVDU, &r0, &dst);
+ p->to.type = D_OREG;
+ p->to.offset = 8;
+ pl = p;
+
+ p = gins(ACMP, &dst, &end);
+ patch(gbranch(ABNE, T, 0), pl);
+
+ regfree(&end);
+ } else if(q >= 4) {
+ p = gins(ASUB, N, &dst);
+ p->from.type = D_CONST;
+ p->from.offset = 8;
+ f = sysfunc("duffzero");
+ p = gins(ADUFFZERO, N, f);
+ afunclit(&p->to, f);
+ // 4 and 128 = magic constants: see ../../runtime/asm_power64x.s
+ p->to.offset = 4*(128-q);
+ } else
+ for(t = 0; t < q; t++) {
+ p = gins(AMOVD, &r0, &dst);
+ p->to.type = D_OREG;
+ p->to.offset = 8*t;
+ }
+
+ for(t = 0; t < c; t++) {
+ p = gins(AMOVB, &r0, &dst);
+ p->to.type = D_OREG;
+ p->to.offset = t;
+ }
+ reg[REGRT1]--;
+}
+
+// Called after regopt and peep have run.
+// Expand CHECKNIL pseudo-op into actual nil pointer check.
+void
+expandchecks(Prog *firstp)
+{
+ Prog *p, *p1, *p2;
+
+ for(p = firstp; p != P; p = p->link) {
+ if(debug_checknil && ctxt->debugvlog)
+ print("expandchecks: %P\n", p);
+ if(p->as != ACHECKNIL)
+ continue;
+ if(debug_checknil && p->lineno > 1) // p->lineno==1 in generated wrappers
+ warnl(p->lineno, "generated nil check");
+ if(p->from.type != D_REG)
+ fatal("invalid nil check %P\n", p);
+ /*
+ // check is
+ // TD $4, R0, arg (R0 is always zero)
+ // eqv. to:
+ // tdeq r0, arg
+ // NOTE: this needs special runtime support to make SIGTRAP recoverable.
+ reg = p->from.reg;
+ p->as = ATD;
+ p->from = p->to = p->from3 = zprog.from;
+ p->from.type = D_CONST;
+ p->from.offset = 4;
+ p->from.reg = NREG;
+ p->reg = 0;
+ p->to.type = D_REG;
+ p->to.reg = reg;
+ */
+ // check is
+ // CMP arg, R0
+ // BNE 2(PC) [likely]
+ // MOVD R0, 0(R0)
+ p1 = mal(sizeof *p1);
+ p2 = mal(sizeof *p2);
+ clearp(p1);
+ clearp(p2);
+ p1->link = p2;
+ p2->link = p->link;
+ p->link = p1;
+ p1->lineno = p->lineno;
+ p2->lineno = p->lineno;
+ p1->pc = 9999;
+ p2->pc = 9999;
+ p->as = ACMP;
+ p->to.type = D_REG;
+ p->to.reg = REGZERO;
+ p1->as = ABNE;
+ //p1->from.type = D_CONST;
+ //p1->from.offset = 1; // likely
+ p1->to.type = D_BRANCH;
+ p1->to.u.branch = p2->link;
+ // crash by write to memory address 0.
+ p2->as = AMOVD;
+ p2->from.type = D_REG;
+ p2->from.reg = 0;
+ p2->to.type = D_OREG;
+ p2->to.reg = 0;
+ p2->to.offset = 0;
+ }
+}