summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIlia Mirkin <imirkin@alum.mit.edu>2015-03-25 18:00:00 -0400
committerEmil Velikov <emil.l.velikov@gmail.com>2015-04-08 18:45:32 +0100
commitc8d962c205bc5d6f81f3d083d90ecaf66f78b1e8 (patch)
treefec54c69a3af6d8f3e3655fff8f2b1c1fc90a47b
parent407365e375ca7ba78d95c2e3f189a837201bcc22 (diff)
downloadmesa-c8d962c205bc5d6f81f3d083d90ecaf66f78b1e8.tar.gz
nv50/ir: take postFactor into account when doing peephole optimizations
Multiply operations can have a post-factor on them, which other ops don't support. Only perform the peephole optimizations when there is no post-factor involved. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89758 Cc: "10.4 10.5" <mesa-stable@lists.freedesktop.org> Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu> (cherry picked from commit 49b86007aa2bb599ada6cdbed7ff56246917f12e)
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp12
1 files changed, 8 insertions, 4 deletions
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 21d20caffdf..9e2a933daa4 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -422,7 +422,9 @@ ConstantFolding::expr(Instruction *i,
b->data.f32 = 0.0f;
}
switch (i->dType) {
- case TYPE_F32: res.data.f32 = a->data.f32 * b->data.f32; break;
+ case TYPE_F32:
+ res.data.f32 = a->data.f32 * b->data.f32 * exp2f(i->postFactor);
+ break;
case TYPE_F64: res.data.f64 = a->data.f64 * b->data.f64; break;
case TYPE_S32:
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
@@ -550,6 +552,7 @@ ConstantFolding::expr(Instruction *i,
i->src(0).mod = Modifier(0);
i->src(1).mod = Modifier(0);
+ i->postFactor = 0;
i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.u32));
i->setSrc(1, NULL);
@@ -653,7 +656,7 @@ ConstantFolding::tryCollapseChainedMULs(Instruction *mul2,
Instruction *insn;
Instruction *mul1 = NULL; // mul1 before mul2
int e = 0;
- float f = imm2.reg.data.f32;
+ float f = imm2.reg.data.f32 * exp2f(mul2->postFactor);
ImmediateValue imm1;
assert(mul2->op == OP_MUL && mul2->dType == TYPE_F32);
@@ -753,9 +756,10 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
i->op = OP_MOV;
i->setSrc(0, new_ImmediateValue(prog, 0u));
i->src(0).mod = Modifier(0);
+ i->postFactor = 0;
i->setSrc(1, NULL);
} else
- if (imm0.isInteger(1) || imm0.isInteger(-1)) {
+ if (!i->postFactor && (imm0.isInteger(1) || imm0.isInteger(-1))) {
if (imm0.isNegative())
i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG);
i->op = i->src(t).mod.getOp();
@@ -768,7 +772,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
i->src(0).mod = 0;
i->setSrc(1, NULL);
} else
- if (imm0.isInteger(2) || imm0.isInteger(-2)) {
+ if (!i->postFactor && (imm0.isInteger(2) || imm0.isInteger(-2))) {
if (imm0.isNegative())
i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG);
i->op = OP_ADD;