summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoland Scheidegger <sroland@tungstengraphics.com>2008-04-12 02:40:44 +0200
committerRoland Scheidegger <sroland@tungstengraphics.com>2008-04-12 02:40:44 +0200
commit18404076e32f2ee63d4aba1382611c5608e1ab12 (patch)
treeb9e916ec8837fb806ffd28441f1b5a954d86409a
parent39dca05d1d5c66fd481c8e94d1edc5741f45a2fa (diff)
downloadmesa-18404076e32f2ee63d4aba1382611c5608e1ab12.tar.gz
r200: fix XPD vertex program instruction when using temps as inputs
due to the two read ports limit into temp memory may need the MAD_2 instruction for the second instruction of the decomposed XPD. While here, also try to avoid MAD_2 for MAD if all 3 inputs are temps but the temps aren't actually distinct.
-rw-r--r--src/mesa/drivers/dri/r200/r200_vertprog.c16
1 files changed, 13 insertions, 3 deletions
diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c
index 6089d617c6b..70881fd7572 100644
--- a/src/mesa/drivers/dri/r200/r200_vertprog.c
+++ b/src/mesa/drivers/dri/r200/r200_vertprog.c
@@ -744,9 +744,16 @@ static GLboolean r200_translate_vertex_program(GLcontext *ctx, struct r200_verte
goto next;
case OPCODE_MAD:
+ /* only 2 read ports into temp memory thus may need the macro op MAD_2
+ instead (requiring 2 clocks) if all inputs are in temp memory
+ (and, only if they actually reference 3 distinct temps) */
hw_op=(src[0].File == PROGRAM_TEMPORARY &&
src[1].File == PROGRAM_TEMPORARY &&
- src[2].File == PROGRAM_TEMPORARY) ? R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
+ src[2].File == PROGRAM_TEMPORARY &&
+ (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index)) &&
+ (((src[0].RelAddr << 8) | src[0].Index) != ((src[2].RelAddr << 8) | src[2].Index)) &&
+ (((src[1].RelAddr << 8) | src[1].Index) != ((src[2].RelAddr << 8) | src[2].Index))) ?
+ R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
t_dst_mask(dst.WriteMask));
@@ -874,8 +881,11 @@ else {
case OPCODE_XPD:
/* mul r0, r1.yzxw, r2.zxyw
mad r0, -r2.yzxw, r1.zxyw, r0
- NOTE: might need MAD_2
*/
+ hw_op=(src[0].File == PROGRAM_TEMPORARY &&
+ src[1].File == PROGRAM_TEMPORARY &&
+ (((src[0].RelAddr << 8) | src[0].Index) != ((src[1].RelAddr << 8) | src[1].Index))) ?
+ R200_VPI_OUT_OP_MAD_2 : R200_VPI_OUT_OP_MAD;
o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MUL,
(u_temp_i << R200_VPI_OUT_REG_INDEX_SHIFT) | R200_VSF_OUT_CLASS_TMP,
@@ -901,7 +911,7 @@ else {
o_inst++;
u_temp_i--;
- o_inst->op = MAKE_VSF_OP(R200_VPI_OUT_OP_MAD, t_dst(&dst),
+ o_inst->op = MAKE_VSF_OP(hw_op, t_dst(&dst),
t_dst_mask(dst.WriteMask));
o_inst->src0 = MAKE_VSF_SOURCE(t_src_index(vp, &src[1]),