summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary/tgsi
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/auxiliary/tgsi')
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt32
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_build.c166
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_build.h32
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c706
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.h4
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_parse.c8
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_parse.h2
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.c53
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.h3
9 files changed, 646 insertions, 360 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
index eb492076b7d..080fd4c7310 100644
--- a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
+++ b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
@@ -1129,3 +1129,35 @@ TGSI Instruction Specification
target Label of target instruction.
+
+3 Other tokens
+===============
+
+
+3.1 Declaration Semantic
+-------------------------
+
+
+ Follows Declaration token if Semantic bit is set.
+
+ Since its purpose is to link a shader with other stages of the pipeline,
+ it is valid to follow only those Declaration tokens that declare a register
+ either in INPUT or OUTPUT file.
+
+ SemanticName field contains the semantic name of the register being declared.
+ There is no default value.
+
+ SemanticIndex is an optional subscript that can be used to distinguish
+ different register declarations with the same semantic name. The default value
+ is 0.
+
+ The meanings of the individual semantic names are explained in the following
+ sections.
+
+
+3.1.1 FACE
+
+ Valid only in a fragment shader INPUT declaration.
+
+ FACE.x is negative when the primitive is back facing. FACE.x is positive
+ when the primitive is front facing.
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index 4fa10e2f7e3..9791e58db3b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -419,6 +419,7 @@ tgsi_default_instruction( void )
instruction.NrTokens = 1;
instruction.Opcode = TGSI_OPCODE_MOV;
instruction.Saturate = TGSI_SAT_NONE;
+ instruction.Predicate = 0;
instruction.NumDstRegs = 1;
instruction.NumSrcRegs = 1;
instruction.Padding = 0;
@@ -428,12 +429,12 @@ tgsi_default_instruction( void )
}
struct tgsi_instruction
-tgsi_build_instruction(
- unsigned opcode,
- unsigned saturate,
- unsigned num_dst_regs,
- unsigned num_src_regs,
- struct tgsi_header *header )
+tgsi_build_instruction(unsigned opcode,
+ unsigned saturate,
+ unsigned predicate,
+ unsigned num_dst_regs,
+ unsigned num_src_regs,
+ struct tgsi_header *header)
{
struct tgsi_instruction instruction;
@@ -445,6 +446,7 @@ tgsi_build_instruction(
instruction = tgsi_default_instruction();
instruction.Opcode = opcode;
instruction.Saturate = saturate;
+ instruction.Predicate = predicate;
instruction.NumDstRegs = num_dst_regs;
instruction.NumSrcRegs = num_src_regs;
@@ -472,9 +474,9 @@ tgsi_default_full_instruction( void )
unsigned i;
full_instruction.Instruction = tgsi_default_instruction();
+ full_instruction.InstructionPredicate = tgsi_default_instruction_predicate();
full_instruction.InstructionExtLabel = tgsi_default_instruction_ext_label();
full_instruction.InstructionExtTexture = tgsi_default_instruction_ext_texture();
- full_instruction.InstructionExtPredicate = tgsi_default_instruction_ext_predicate();
for( i = 0; i < TGSI_FULL_MAX_DST_REGISTERS; i++ ) {
full_instruction.FullDstRegisters[i] = tgsi_default_full_dst_register();
}
@@ -504,14 +506,34 @@ tgsi_build_full_instruction(
instruction = (struct tgsi_instruction *) &tokens[size];
size++;
- *instruction = tgsi_build_instruction(
- full_inst->Instruction.Opcode,
- full_inst->Instruction.Saturate,
- full_inst->Instruction.NumDstRegs,
- full_inst->Instruction.NumSrcRegs,
- header );
+ *instruction = tgsi_build_instruction(full_inst->Instruction.Opcode,
+ full_inst->Instruction.Saturate,
+ full_inst->Instruction.Predicate,
+ full_inst->Instruction.NumDstRegs,
+ full_inst->Instruction.NumSrcRegs,
+ header);
prev_token = (struct tgsi_token *) instruction;
+ if (full_inst->Instruction.Predicate) {
+ struct tgsi_instruction_predicate *instruction_predicate;
+
+ if (maxsize <= size) {
+ return 0;
+ }
+ instruction_predicate = (struct tgsi_instruction_predicate *)&tokens[size];
+ size++;
+
+ *instruction_predicate =
+ tgsi_build_instruction_predicate(full_inst->InstructionPredicate.Index,
+ full_inst->InstructionPredicate.Negate,
+ full_inst->InstructionPredicate.SwizzleX,
+ full_inst->InstructionPredicate.SwizzleY,
+ full_inst->InstructionPredicate.SwizzleZ,
+ full_inst->InstructionPredicate.SwizzleW,
+ instruction,
+ header);
+ }
+
if( tgsi_compare_instruction_ext_label(
full_inst->InstructionExtLabel,
tgsi_default_instruction_ext_label() ) ) {
@@ -550,29 +572,6 @@ tgsi_build_full_instruction(
prev_token = (struct tgsi_token *) instruction_ext_texture;
}
- if (tgsi_compare_instruction_ext_predicate(full_inst->InstructionExtPredicate,
- tgsi_default_instruction_ext_predicate())) {
- struct tgsi_instruction_ext_predicate *instruction_ext_predicate;
-
- if (maxsize <= size) {
- return 0;
- }
- instruction_ext_predicate = (struct tgsi_instruction_ext_predicate *)&tokens[size];
- size++;
-
- *instruction_ext_predicate =
- tgsi_build_instruction_ext_predicate(full_inst->InstructionExtPredicate.SrcIndex,
- full_inst->InstructionExtPredicate.Negate,
- full_inst->InstructionExtPredicate.SwizzleX,
- full_inst->InstructionExtPredicate.SwizzleY,
- full_inst->InstructionExtPredicate.SwizzleZ,
- full_inst->InstructionExtPredicate.SwizzleW,
- prev_token,
- instruction,
- header);
- prev_token = (struct tgsi_token *)instruction_ext_predicate;
- }
-
for( i = 0; i < full_inst->Instruction.NumDstRegs; i++ ) {
const struct tgsi_full_dst_register *reg = &full_inst->FullDstRegisters[i];
struct tgsi_dst_register *dst_register;
@@ -746,6 +745,47 @@ tgsi_build_full_instruction(
return size;
}
+struct tgsi_instruction_predicate
+tgsi_default_instruction_predicate(void)
+{
+ struct tgsi_instruction_predicate instruction_predicate;
+
+ instruction_predicate.SwizzleX = TGSI_SWIZZLE_X;
+ instruction_predicate.SwizzleY = TGSI_SWIZZLE_Y;
+ instruction_predicate.SwizzleZ = TGSI_SWIZZLE_Z;
+ instruction_predicate.SwizzleW = TGSI_SWIZZLE_W;
+ instruction_predicate.Negate = 0;
+ instruction_predicate.Index = 0;
+ instruction_predicate.Padding = 0;
+
+ return instruction_predicate;
+}
+
+struct tgsi_instruction_predicate
+tgsi_build_instruction_predicate(int index,
+ unsigned negate,
+ unsigned swizzleX,
+ unsigned swizzleY,
+ unsigned swizzleZ,
+ unsigned swizzleW,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header)
+{
+ struct tgsi_instruction_predicate instruction_predicate;
+
+ instruction_predicate = tgsi_default_instruction_predicate();
+ instruction_predicate.SwizzleX = swizzleX;
+ instruction_predicate.SwizzleY = swizzleY;
+ instruction_predicate.SwizzleZ = swizzleZ;
+ instruction_predicate.SwizzleW = swizzleW;
+ instruction_predicate.Negate = negate;
+ instruction_predicate.Index = index;
+
+ instruction_grow(instruction, header);
+
+ return instruction_predicate;
+}
+
/** test for inequality of 32-bit values pointed to by a and b */
static INLINE boolean
compare32(const void *a, const void *b)
@@ -835,60 +875,6 @@ tgsi_build_instruction_ext_texture(
return instruction_ext_texture;
}
-struct tgsi_instruction_ext_predicate
-tgsi_default_instruction_ext_predicate(void)
-{
- struct tgsi_instruction_ext_predicate instruction_ext_predicate;
-
- instruction_ext_predicate.Type = TGSI_INSTRUCTION_EXT_TYPE_PREDICATE;
- instruction_ext_predicate.SwizzleX = TGSI_SWIZZLE_X;
- instruction_ext_predicate.SwizzleY = TGSI_SWIZZLE_Y;
- instruction_ext_predicate.SwizzleZ = TGSI_SWIZZLE_Z;
- instruction_ext_predicate.SwizzleW = TGSI_SWIZZLE_W;
- instruction_ext_predicate.Negate = 0;
- instruction_ext_predicate.SrcIndex = 0;
- instruction_ext_predicate.Padding = 0;
- instruction_ext_predicate.Extended = 0;
-
- return instruction_ext_predicate;
-}
-
-unsigned
-tgsi_compare_instruction_ext_predicate(struct tgsi_instruction_ext_predicate a,
- struct tgsi_instruction_ext_predicate b)
-{
- a.Padding = b.Padding = 0;
- a.Extended = b.Extended = 0;
- return compare32(&a, &b);
-}
-
-struct tgsi_instruction_ext_predicate
-tgsi_build_instruction_ext_predicate(unsigned index,
- unsigned negate,
- unsigned swizzleX,
- unsigned swizzleY,
- unsigned swizzleZ,
- unsigned swizzleW,
- struct tgsi_token *prev_token,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header)
-{
- struct tgsi_instruction_ext_predicate instruction_ext_predicate;
-
- instruction_ext_predicate = tgsi_default_instruction_ext_predicate();
- instruction_ext_predicate.SwizzleX = swizzleX;
- instruction_ext_predicate.SwizzleY = swizzleY;
- instruction_ext_predicate.SwizzleZ = swizzleZ;
- instruction_ext_predicate.SwizzleW = swizzleW;
- instruction_ext_predicate.Negate = negate;
- instruction_ext_predicate.SrcIndex = index;
-
- prev_token->Extended = 1;
- instruction_grow(instruction, header);
-
- return instruction_ext_predicate;
-}
-
struct tgsi_src_register
tgsi_default_src_register( void )
{
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h
index 669712eb8f9..0fe5f229d37 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.h
@@ -143,6 +143,7 @@ struct tgsi_instruction
tgsi_build_instruction(
unsigned opcode,
unsigned saturate,
+ unsigned predicate,
unsigned num_dst_regs,
unsigned num_src_regs,
struct tgsi_header *header );
@@ -157,6 +158,19 @@ tgsi_build_full_instruction(
struct tgsi_header *header,
unsigned maxsize );
+struct tgsi_instruction_predicate
+tgsi_default_instruction_predicate(void);
+
+struct tgsi_instruction_predicate
+tgsi_build_instruction_predicate(int index,
+ unsigned negate,
+ unsigned swizzleX,
+ unsigned swizzleY,
+ unsigned swizzleZ,
+ unsigned swizzleW,
+ struct tgsi_instruction *instruction,
+ struct tgsi_header *header);
+
struct tgsi_instruction_ext_label
tgsi_default_instruction_ext_label( void );
@@ -187,24 +201,6 @@ tgsi_build_instruction_ext_texture(
struct tgsi_instruction *instruction,
struct tgsi_header *header );
-struct tgsi_instruction_ext_predicate
-tgsi_default_instruction_ext_predicate(void);
-
-unsigned
-tgsi_compare_instruction_ext_predicate(struct tgsi_instruction_ext_predicate a,
- struct tgsi_instruction_ext_predicate b);
-
-struct tgsi_instruction_ext_predicate
-tgsi_build_instruction_ext_predicate(unsigned index,
- unsigned negate,
- unsigned swizzleX,
- unsigned swizzleY,
- unsigned swizzleZ,
- unsigned swizzleW,
- struct tgsi_token *prev_token,
- struct tgsi_instruction *instruction,
- struct tgsi_header *header);
-
struct tgsi_src_register
tgsi_default_src_register( void );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index ade5e7cbbd2..2bf367ba505 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -62,9 +62,6 @@
#define FAST_MATH 1
-/** for tgsi_full_instruction::Flags */
-#define SOA_DEPENDENCY_FLAG 0x1
-
#define TILE_TOP_LEFT 0
#define TILE_TOP_RIGHT 1
#define TILE_BOTTOM_LEFT 2
@@ -340,20 +337,6 @@ tgsi_exec_machine_bind_shader(
maxInstructions += 10;
}
- if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) {
- uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
- parse.FullToken.FullInstruction.Flags = SOA_DEPENDENCY_FLAG;
- /* XXX we only handle SOA dependencies properly for MOV/SWZ
- * at this time!
- */
- if (opcode != TGSI_OPCODE_MOV) {
- debug_printf("Warning: SOA dependency in instruction"
- " is not handled:\n");
- tgsi_dump_instruction(&parse.FullToken.FullInstruction,
- numInstructions);
- }
- }
-
memcpy(instructions + numInstructions,
&parse.FullToken.FullInstruction,
sizeof(instructions[0]));
@@ -395,6 +378,7 @@ tgsi_exec_machine_create( void )
mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES;
+ mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0];
/* Setup constants. */
for( i = 0; i < 4; i++ ) {
@@ -526,7 +510,7 @@ micro_ddy(
dst->f[0] =
dst->f[1] =
dst->f[2] =
- dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
+ dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
}
static void
@@ -604,6 +588,24 @@ micro_exp2(
dst->f[2] = util_fast_exp2( src->f[2] );
dst->f[3] = util_fast_exp2( src->f[3] );
#else
+
+#if DEBUG
+ /* Inf is okay for this instruction, so clamp it to silence assertions. */
+ uint i;
+ union tgsi_exec_channel clamped;
+
+ for (i = 0; i < 4; i++) {
+ if (src->f[i] > 127.99999f) {
+ clamped.f[i] = 127.99999f;
+ } else if (src->f[i] < -126.99999f) {
+ clamped.f[i] = -126.99999f;
+ } else {
+ clamped.f[i] = src->f[i];
+ }
+ }
+ src = &clamped;
+#endif
+
dst->f[0] = powf( 2.0f, src->f[0] );
dst->f[1] = powf( 2.0f, src->f[1] );
dst->f[2] = powf( 2.0f, src->f[2] );
@@ -1202,10 +1204,10 @@ fetch_src_file_channel(
assert(index->i[1] < TGSI_EXEC_NUM_PREDS);
assert(index->i[2] < TGSI_EXEC_NUM_PREDS);
assert(index->i[3] < TGSI_EXEC_NUM_PREDS);
- chan->u[0] = mach->Addrs[0].xyzw[swizzle].u[0];
- chan->u[1] = mach->Addrs[0].xyzw[swizzle].u[1];
- chan->u[2] = mach->Addrs[0].xyzw[swizzle].u[2];
- chan->u[3] = mach->Addrs[0].xyzw[swizzle].u[3];
+ chan->u[0] = mach->Predicates[0].xyzw[swizzle].u[0];
+ chan->u[1] = mach->Predicates[0].xyzw[swizzle].u[1];
+ chan->u[2] = mach->Predicates[0].xyzw[swizzle].u[2];
+ chan->u[3] = mach->Predicates[0].xyzw[swizzle].u[3];
break;
case TGSI_FILE_OUTPUT:
@@ -1496,10 +1498,17 @@ store_dest(
dst = &mach->Addrs[index].xyzw[chan_index];
break;
+ case TGSI_FILE_LOOP:
+ assert(reg->DstRegister.Index == 0);
+ assert(mach->LoopCounterStackTop > 0);
+ assert(chan_index == CHAN_X);
+ dst = &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[chan_index];
+ break;
+
case TGSI_FILE_PREDICATE:
index = reg->DstRegister.Index;
assert(index < TGSI_EXEC_NUM_PREDS);
- dst = &mach->Addrs[index].xyzw[chan_index];
+ dst = &mach->Predicates[index].xyzw[chan_index];
break;
default:
@@ -1507,6 +1516,47 @@ store_dest(
return;
}
+ if (inst->Instruction.Predicate) {
+ uint swizzle;
+ union tgsi_exec_channel *pred;
+
+ switch (chan_index) {
+ case CHAN_X:
+ swizzle = inst->InstructionPredicate.SwizzleX;
+ break;
+ case CHAN_Y:
+ swizzle = inst->InstructionPredicate.SwizzleY;
+ break;
+ case CHAN_Z:
+ swizzle = inst->InstructionPredicate.SwizzleZ;
+ break;
+ case CHAN_W:
+ swizzle = inst->InstructionPredicate.SwizzleW;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ assert(inst->InstructionPredicate.Index == 0);
+
+ pred = &mach->Predicates[inst->InstructionPredicate.Index].xyzw[swizzle];
+
+ if (inst->InstructionPredicate.Negate) {
+ for (i = 0; i < QUAD_SIZE; i++) {
+ if (pred->u[i]) {
+ execmask &= ~(1 << i);
+ }
+ }
+ } else {
+ for (i = 0; i < QUAD_SIZE; i++) {
+ if (!pred->u[i]) {
+ execmask &= ~(1 << i);
+ }
+ }
+ }
+ }
+
switch (inst->Instruction.Saturate) {
case TGSI_SAT_NONE:
for (i = 0; i < QUAD_SIZE; i++)
@@ -1762,6 +1812,64 @@ exec_tex(struct tgsi_exec_machine *mach,
}
}
+static void
+exec_txd(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ const uint unit = inst->FullSrcRegisters[3].SrcRegister.Index;
+ union tgsi_exec_channel r[4];
+ uint chan_index;
+
+ /*
+ * XXX: This is fake TXD -- the derivatives are not taken into account, yet.
+ */
+
+ switch (inst->InstructionExtTexture.Texture) {
+ case TGSI_TEXTURE_1D:
+ case TGSI_TEXTURE_SHADOW1D:
+
+ FETCH(&r[0], 0, CHAN_X);
+
+ fetch_texel(mach->Samplers[unit],
+ &r[0], &ZeroVec, &ZeroVec, 0.0f, /* S, T, P, BIAS */
+ &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
+ break;
+
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+
+ FETCH(&r[0], 0, CHAN_X);
+ FETCH(&r[1], 0, CHAN_Y);
+ FETCH(&r[2], 0, CHAN_Z);
+
+ fetch_texel(mach->Samplers[unit],
+ &r[0], &r[1], &r[2], 0.0f, /* inputs */
+ &r[0], &r[1], &r[2], &r[3]); /* outputs */
+ break;
+
+ case TGSI_TEXTURE_3D:
+ case TGSI_TEXTURE_CUBE:
+
+ FETCH(&r[0], 0, CHAN_X);
+ FETCH(&r[1], 0, CHAN_Y);
+ FETCH(&r[2], 0, CHAN_Z);
+
+ fetch_texel(mach->Samplers[unit],
+ &r[0], &r[1], &r[2], 0.0f,
+ &r[0], &r[1], &r[2], &r[3]);
+ break;
+
+ default:
+ assert(0);
+ }
+
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&r[chan_index], 0, chan_index);
+ }
+}
+
/**
* Evaluate a constant-valued coefficient at the position of the
@@ -1831,53 +1939,58 @@ typedef void (* eval_coef_func)(
unsigned chan );
static void
-exec_declaration(
- struct tgsi_exec_machine *mach,
- const struct tgsi_full_declaration *decl )
+exec_declaration(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_declaration *decl)
{
- if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
- if( decl->Declaration.File == TGSI_FILE_INPUT ) {
- unsigned first, last, mask;
- eval_coef_func eval;
+ if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
+ if (decl->Declaration.File == TGSI_FILE_INPUT) {
+ uint first, last, mask;
first = decl->DeclarationRange.First;
last = decl->DeclarationRange.Last;
mask = decl->Declaration.UsageMask;
- switch( decl->Declaration.Interpolate ) {
- case TGSI_INTERPOLATE_CONSTANT:
- eval = eval_constant_coef;
- break;
+ if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) {
+ assert(decl->Semantic.SemanticIndex == 0);
+ assert(first == last);
+ assert(mask = TGSI_WRITEMASK_XYZW);
- case TGSI_INTERPOLATE_LINEAR:
- eval = eval_linear_coef;
- break;
+ mach->Inputs[first] = mach->QuadPos;
+ } else if (decl->Semantic.SemanticName == TGSI_SEMANTIC_FACE) {
+ uint i;
- case TGSI_INTERPOLATE_PERSPECTIVE:
- eval = eval_perspective_coef;
- break;
+ assert(decl->Semantic.SemanticIndex == 0);
+ assert(first == last);
- default:
- assert( 0 );
- return;
- }
-
- if( mask == TGSI_WRITEMASK_XYZW ) {
- unsigned i, j;
-
- for( i = first; i <= last; i++ ) {
- for( j = 0; j < NUM_CHANNELS; j++ ) {
- eval( mach, i, j );
- }
+ for (i = 0; i < QUAD_SIZE; i++) {
+ mach->Inputs[first].xyzw[0].f[i] = mach->Face;
+ }
+ } else {
+ eval_coef_func eval;
+ uint i, j;
+
+ switch (decl->Declaration.Interpolate) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ eval = eval_constant_coef;
+ break;
+
+ case TGSI_INTERPOLATE_LINEAR:
+ eval = eval_linear_coef;
+ break;
+
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ eval = eval_perspective_coef;
+ break;
+
+ default:
+ assert(0);
+ return;
}
- }
- else {
- unsigned i, j;
- for( j = 0; j < NUM_CHANNELS; j++ ) {
- if( mask & (1 << j) ) {
- for( i = first; i <= last; i++ ) {
- eval( mach, i, j );
+ for (j = 0; j < NUM_CHANNELS; j++) {
+ if (mask & (1 << j)) {
+ for (i = first; i <= last; i++) {
+ eval(mach, i, j);
}
}
}
@@ -1894,6 +2007,7 @@ exec_instruction(
{
uint chan_index;
union tgsi_exec_channel r[10];
+ union tgsi_exec_channel d[8];
(*pc)++;
@@ -1902,42 +2016,27 @@ exec_instruction(
case TGSI_OPCODE_FLR:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_flr( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ micro_flr(&d[chan_index], &r[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
case TGSI_OPCODE_MOV:
- if (inst->Flags & SOA_DEPENDENCY_FLAG) {
- /* Do all fetches into temp regs, then do all stores to avoid
- * intermediate/accidental clobbering. This could be done all the
- * time for MOV but for other instructions we'll need more temps...
- */
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[chan_index], 0, chan_index );
- }
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( &r[chan_index], 0, chan_index );
- }
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH(&d[chan_index], 0, chan_index);
}
- else {
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- STORE( &r[0], 0, chan_index );
- }
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
case TGSI_OPCODE_LIT:
- if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
- }
-
if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
FETCH( &r[0], 0, CHAN_X );
if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, CHAN_Y );
+ micro_max(&d[CHAN_Y], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
}
if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
@@ -1948,11 +2047,19 @@ exec_instruction(
micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
micro_pow( &r[1], &r[1], &r[2] );
- micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, CHAN_Z );
+ micro_lt(&d[CHAN_Z], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
}
- }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ STORE(&d[CHAN_Y], 0, CHAN_Y);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ STORE(&d[CHAN_Z], 0, CHAN_Z);
+ }
+ }
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
+ STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
+ }
if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
}
@@ -2020,14 +2127,13 @@ exec_instruction(
break;
case TGSI_OPCODE_MUL:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
- {
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
FETCH(&r[0], 0, chan_index);
FETCH(&r[1], 1, chan_index);
-
- micro_mul( &r[0], &r[0], &r[1] );
-
- STORE(&r[0], 0, chan_index);
+ micro_mul(&d[chan_index], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2035,8 +2141,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_add( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
+ micro_add(&d[chan_index], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2092,25 +2200,29 @@ exec_instruction(
break;
case TGSI_OPCODE_DST:
- if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
- STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
- }
-
if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
FETCH( &r[0], 0, CHAN_Y );
FETCH( &r[1], 1, CHAN_Y);
- micro_mul( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, CHAN_Y );
+ micro_mul(&d[CHAN_Y], &r[0], &r[1]);
}
-
if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- FETCH( &r[0], 0, CHAN_Z );
- STORE( &r[0], 0, CHAN_Z );
+ FETCH(&d[CHAN_Z], 0, CHAN_Z);
}
-
if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
- FETCH( &r[0], 1, CHAN_W );
- STORE( &r[0], 0, CHAN_W );
+ FETCH(&d[CHAN_W], 1, CHAN_W);
+ }
+
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
+ STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ STORE(&d[CHAN_Y], 0, CHAN_Y);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ STORE(&d[CHAN_Z], 0, CHAN_Z);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
+ STORE(&d[CHAN_W], 0, CHAN_W);
}
break;
@@ -2120,9 +2232,10 @@ exec_instruction(
FETCH(&r[1], 1, chan_index);
/* XXX use micro_min()?? */
- micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
-
- STORE(&r[0], 0, chan_index);
+ micro_lt(&d[chan_index], &r[0], &r[1], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2132,9 +2245,10 @@ exec_instruction(
FETCH(&r[1], 1, chan_index);
/* XXX use micro_max()?? */
- micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
-
- STORE(&r[0], 0, chan_index );
+ micro_lt(&d[chan_index], &r[0], &r[1], &r[1], &r[0] );
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2143,8 +2257,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, chan_index );
+ micro_lt(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2153,9 +2269,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
-
- micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, chan_index );
+ micro_le(&d[chan_index], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2166,8 +2283,10 @@ exec_instruction(
FETCH( &r[1], 1, chan_index );
micro_mul( &r[0], &r[0], &r[1] );
FETCH( &r[1], 2, chan_index );
- micro_add( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
+ micro_add(&d[chan_index], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2175,10 +2294,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH(&r[0], 0, chan_index);
FETCH(&r[1], 1, chan_index);
-
- micro_sub( &r[0], &r[0], &r[1] );
-
- STORE(&r[0], 0, chan_index);
+ micro_sub(&d[chan_index], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2187,12 +2306,12 @@ exec_instruction(
FETCH(&r[0], 0, chan_index);
FETCH(&r[1], 1, chan_index);
FETCH(&r[2], 2, chan_index);
-
micro_sub( &r[1], &r[1], &r[2] );
micro_mul( &r[0], &r[0], &r[1] );
- micro_add( &r[0], &r[0], &r[2] );
-
- STORE(&r[0], 0, chan_index);
+ micro_add(&d[chan_index], &r[0], &r[2]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2201,8 +2320,10 @@ exec_instruction(
FETCH(&r[0], 0, chan_index);
FETCH(&r[1], 1, chan_index);
FETCH(&r[2], 2, chan_index);
- micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
- STORE(&r[0], 0, chan_index);
+ micro_lt(&d[chan_index], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2227,8 +2348,10 @@ exec_instruction(
case TGSI_OPCODE_FRC:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_frc( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ micro_frc(&d[chan_index], &r[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2238,8 +2361,10 @@ exec_instruction(
FETCH(&r[1], 1, chan_index);
micro_max(&r[0], &r[0], &r[1]);
FETCH(&r[1], 2, chan_index);
- micro_min(&r[0], &r[0], &r[1]);
- STORE(&r[0], 0, chan_index);
+ micro_min(&d[chan_index], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2247,19 +2372,17 @@ exec_instruction(
case TGSI_OPCODE_ARR:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_rnd( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ micro_rnd(&d[chan_index], &r[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
case TGSI_OPCODE_EX2:
FETCH(&r[0], 0, CHAN_X);
-#if FAST_MATH
micro_exp2( &r[0], &r[0] );
-#else
- micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
-#endif
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
STORE( &r[0], 0, chan_index );
@@ -2295,11 +2418,7 @@ exec_instruction(
FETCH(&r[4], 1, CHAN_Y);
micro_mul( &r[5], &r[3], &r[4] );
- micro_sub( &r[2], &r[2], &r[5] );
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
- STORE( &r[2], 0, CHAN_X );
- }
+ micro_sub(&d[CHAN_X], &r[2], &r[5]);
FETCH(&r[2], 1, CHAN_X);
@@ -2308,20 +2427,21 @@ exec_instruction(
FETCH(&r[5], 0, CHAN_X);
micro_mul( &r[1], &r[1], &r[5] );
- micro_sub( &r[3], &r[3], &r[1] );
-
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
- STORE( &r[3], 0, CHAN_Y );
- }
+ micro_sub(&d[CHAN_Y], &r[3], &r[1]);
micro_mul( &r[5], &r[5], &r[4] );
micro_mul( &r[0], &r[0], &r[2] );
- micro_sub( &r[5], &r[5], &r[0] );
+ micro_sub(&d[CHAN_Z], &r[5], &r[0]);
- if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
- STORE( &r[5], 0, CHAN_Z );
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
+ STORE(&d[CHAN_X], 0, CHAN_X);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ STORE(&d[CHAN_Y], 0, CHAN_Y);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ STORE(&d[CHAN_Z], 0, CHAN_Z);
}
-
if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
}
@@ -2330,11 +2450,11 @@ exec_instruction(
case TGSI_OPCODE_ABS:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH(&r[0], 0, chan_index);
-
- micro_abs( &r[0], &r[0] );
-
- STORE(&r[0], 0, chan_index);
+ micro_abs(&d[chan_index], &r[0]);
}
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
+ }
break;
case TGSI_OPCODE_RCC:
@@ -2386,16 +2506,20 @@ exec_instruction(
case TGSI_OPCODE_DDX:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_ddx( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ micro_ddx(&d[chan_index], &r[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
case TGSI_OPCODE_DDY:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_ddy( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ micro_ddy(&d[chan_index], &r[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2476,10 +2600,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_eq( &r[0], &r[0], &r[1],
- &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
- &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, chan_index );
+ micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2493,8 +2617,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
- STORE( &r[0], 0, chan_index );
+ micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2510,8 +2636,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
- STORE( &r[0], 0, chan_index );
+ micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2519,8 +2647,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
- STORE( &r[0], 0, chan_index );
+ micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2550,7 +2680,7 @@ exec_instruction(
/* src[1] = d[strq]/dx */
/* src[2] = d[strq]/dy */
/* src[3] = sampler unit */
- assert (0);
+ exec_txd(mach, inst);
break;
case TGSI_OPCODE_TXL:
@@ -2594,13 +2724,8 @@ exec_instruction(
micro_mul(&r[3], &r[3], &r[1]);
micro_add(&r[2], &r[2], &r[3]);
FETCH(&r[3], 0, CHAN_X);
- micro_add(&r[2], &r[2], &r[3]);
- if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
- STORE(&r[2], 0, CHAN_X);
- }
- if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
- STORE(&r[2], 0, CHAN_Z);
- }
+ micro_add(&d[CHAN_X], &r[2], &r[3]);
+
}
if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
@@ -2610,13 +2735,20 @@ exec_instruction(
micro_mul(&r[3], &r[3], &r[1]);
micro_add(&r[2], &r[2], &r[3]);
FETCH(&r[3], 0, CHAN_Y);
- micro_add(&r[2], &r[2], &r[3]);
- if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
- STORE(&r[2], 0, CHAN_Y);
- }
- if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
- STORE(&r[2], 0, CHAN_W);
- }
+ micro_add(&d[CHAN_Y], &r[2], &r[3]);
+
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
+ STORE(&d[CHAN_X], 0, CHAN_X);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+ STORE(&d[CHAN_Y], 0, CHAN_Y);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+ STORE(&d[CHAN_X], 0, CHAN_Z);
+ }
+ if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
+ STORE(&d[CHAN_Y], 0, CHAN_W);
}
break;
@@ -2701,8 +2833,10 @@ exec_instruction(
/* TGSI_OPCODE_SGN */
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_sgn( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ micro_sgn(&d[chan_index], &r[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2711,10 +2845,10 @@ exec_instruction(
FETCH(&r[0], 0, chan_index);
FETCH(&r[1], 1, chan_index);
FETCH(&r[2], 2, chan_index);
-
- micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
-
- STORE(&r[0], 0, chan_index);
+ micro_lt(&d[chan_index], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2889,32 +3023,40 @@ exec_instruction(
case TGSI_OPCODE_CEIL:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_ceil( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ micro_ceil(&d[chan_index], &r[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
case TGSI_OPCODE_I2F:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_i2f( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ micro_i2f(&d[chan_index], &r[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
case TGSI_OPCODE_NOT:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_not( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ micro_not(&d[chan_index], &r[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
case TGSI_OPCODE_TRUNC:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
- micro_trunc( &r[0], &r[0] );
- STORE( &r[0], 0, chan_index );
+ micro_trunc(&d[chan_index], &r[0]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2922,8 +3064,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_shl( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
+ micro_shl(&d[chan_index], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2931,8 +3075,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_ishr( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
+ micro_ishr(&d[chan_index], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2940,8 +3086,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_and( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
+ micro_and(&d[chan_index], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2949,8 +3097,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_or( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
+ micro_or(&d[chan_index], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2962,8 +3112,10 @@ exec_instruction(
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
FETCH( &r[1], 1, chan_index );
- micro_xor( &r[0], &r[0], &r[1] );
- STORE( &r[0], 0, chan_index );
+ micro_xor(&d[chan_index], &r[0], &r[1]);
+ }
+ FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+ STORE(&d[chan_index], 0, chan_index);
}
break;
@@ -2992,8 +3144,23 @@ exec_instruction(
for (chan_index = 0; chan_index < 3; chan_index++) {
FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index );
}
- STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X );
++mach->LoopCounterStackTop;
+ STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X);
+ /* update LoopMask */
+ if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
+ mach->LoopMask &= ~0x1;
+ }
+ if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
+ mach->LoopMask &= ~0x2;
+ }
+ if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
+ mach->LoopMask &= ~0x4;
+ }
+ if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
+ mach->LoopMask &= ~0x8;
+ }
+ /* TODO: if mach->LoopMask == 0, jump to end of loop */
+ UPDATE_EXEC_MASK(mach);
/* fall-through (for now) */
case TGSI_OPCODE_BGNLOOP:
/* push LoopMask and ContMasks */
@@ -3007,28 +3174,28 @@ exec_instruction(
case TGSI_OPCODE_ENDFOR:
assert(mach->LoopCounterStackTop > 0);
- micro_sub( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
- &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
- &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
+ micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
+ &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
+ &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
/* update LoopMask */
- if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[0] <= 0) {
+ if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
mach->LoopMask &= ~0x1;
}
- if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[1] <= 0 ) {
+ if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
mach->LoopMask &= ~0x2;
}
- if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[2] <= 0 ) {
+ if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
mach->LoopMask &= ~0x4;
}
- if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[3] <= 0 ) {
+ if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
mach->LoopMask &= ~0x8;
}
- micro_add( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
- &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
- &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]);
+ micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
+ &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
+ &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]);
assert(mach->LoopLabelStackTop > 0);
inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1];
- STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X );
+ STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X);
/* Restore ContMask, but don't pop */
assert(mach->ContStackTop > 0);
mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
@@ -3095,7 +3262,28 @@ exec_instruction(
break;
case TGSI_OPCODE_ENDSUB:
- /* no-op */
+ /*
+ * XXX: This really should be a no-op. We should never reach this opcode.
+ */
+
+ assert(mach->CallStackTop > 0);
+ mach->CallStackTop--;
+
+ mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
+ mach->CondMask = mach->CondStack[mach->CondStackTop];
+
+ mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
+ mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
+
+ mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
+ mach->ContMask = mach->ContStack[mach->ContStackTop];
+
+ assert(mach->FuncStackTop > 0);
+ mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
+
+ *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
+
+ UPDATE_EXEC_MASK(mach);
break;
case TGSI_OPCODE_NOP:
@@ -3106,6 +3294,8 @@ exec_instruction(
}
}
+#define DEBUG_EXECUTION 0
+
/**
* Run TGSI interpreter.
@@ -3149,10 +3339,67 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
exec_declaration( mach, mach->Declarations+i );
}
- /* execute instructions, until pc is set to -1 */
- while (pc != -1) {
- assert(pc < (int) mach->NumInstructions);
- exec_instruction( mach, mach->Instructions + pc, &pc );
+ {
+#if DEBUG_EXECUTION
+ struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS];
+ struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS];
+ uint inst = 1;
+
+ memcpy(temps, mach->Temps, sizeof(temps));
+ memcpy(outputs, mach->Outputs, sizeof(outputs));
+#endif
+
+ /* execute instructions, until pc is set to -1 */
+ while (pc != -1) {
+
+#if DEBUG_EXECUTION
+ uint i;
+
+ tgsi_dump_instruction(&mach->Instructions[pc], inst++);
+#endif
+
+ assert(pc < (int) mach->NumInstructions);
+ exec_instruction(mach, mach->Instructions + pc, &pc);
+
+#if DEBUG_EXECUTION
+ for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) {
+ if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) {
+ uint j;
+
+ memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i]));
+ debug_printf("TEMP[%2u] = ", i);
+ for (j = 0; j < 4; j++) {
+ if (j > 0) {
+ debug_printf(" ");
+ }
+ debug_printf("(%6f, %6f, %6f, %6f)\n",
+ temps[i].xyzw[0].f[j],
+ temps[i].xyzw[1].f[j],
+ temps[i].xyzw[2].f[j],
+ temps[i].xyzw[3].f[j]);
+ }
+ }
+ }
+ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
+ if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {
+ uint j;
+
+ memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));
+ debug_printf("OUT[%2u] = ", i);
+ for (j = 0; j < 4; j++) {
+ if (j > 0) {
+ debug_printf(" ");
+ }
+ debug_printf("{%6f, %6f, %6f, %6f}\n",
+ outputs[i].xyzw[0].f[j],
+ outputs[i].xyzw[1].f[j],
+ outputs[i].xyzw[2].f[j],
+ outputs[i].xyzw[3].f[j]);
+ }
+ }
+ }
+#endif
+ }
}
#if 0
@@ -3166,5 +3413,10 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
}
#endif
+ assert(mach->CondStackTop == 0);
+ assert(mach->LoopStackTop == 0);
+ assert(mach->ContStackTop == 0);
+ assert(mach->CallStackTop == 0);
+
return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 71fd300cbb0..afaf5c39c48 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -225,6 +225,7 @@ struct tgsi_exec_machine
struct tgsi_exec_vector Outputs[TGSI_MAX_TOTAL_VERTICES];
struct tgsi_exec_vector *Addrs;
+ struct tgsi_exec_vector *Predicates;
struct tgsi_sampler **Samplers;
@@ -241,6 +242,7 @@ struct tgsi_exec_machine
/* FRAGMENT processor only. */
const struct tgsi_interp_coef *InterpCoefs;
struct tgsi_exec_vector QuadPos;
+ float Face; /**< +1 if front facing, -1 if back facing */
/* Conditional execution masks */
uint CondMask; /**< For IF/ELSE/ENDIF */
@@ -261,7 +263,7 @@ struct tgsi_exec_machine
uint LoopLabelStack[TGSI_EXEC_MAX_LOOP_NESTING];
int LoopLabelStackTop;
- /** Loop counter stack (x = count, y = current, z = step) */
+ /** Loop counter stack (x = index, y = counter, z = step) */
struct tgsi_exec_vector LoopCounterStack[TGSI_EXEC_MAX_LOOP_NESTING];
int LoopCounterStackTop;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index 83f9df1183e..9ca29934528 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -175,6 +175,10 @@ tgsi_parse_token(
copy_token(&inst->Instruction, &token);
extended = inst->Instruction.Extended;
+ if (inst->Instruction.Predicate) {
+ next_token(ctx, &inst->InstructionPredicate);
+ }
+
while( extended ) {
struct tgsi_src_register_ext token;
@@ -189,10 +193,6 @@ tgsi_parse_token(
copy_token(&inst->InstructionExtTexture, &token);
break;
- case TGSI_INSTRUCTION_EXT_TYPE_PREDICATE:
- copy_token(&inst->InstructionExtPredicate, &token);
- break;
-
default:
assert( 0 );
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h
index 76f1676d85d..cb4772ade8d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h
@@ -80,9 +80,9 @@ struct tgsi_full_immediate
struct tgsi_full_instruction
{
struct tgsi_instruction Instruction;
+ struct tgsi_instruction_predicate InstructionPredicate;
struct tgsi_instruction_ext_label InstructionExtLabel;
struct tgsi_instruction_ext_texture InstructionExtTexture;
- struct tgsi_instruction_ext_predicate InstructionExtPredicate;
struct tgsi_full_dst_register FullDstRegisters[TGSI_FULL_MAX_DST_REGISTERS];
struct tgsi_full_src_register FullSrcRegisters[TGSI_FULL_MAX_SRC_REGISTERS];
uint Flags; /**< user-defined usage */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index 80e6ff2b795..557c885a8ad 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -47,9 +47,9 @@ union tgsi_any_token {
struct tgsi_immediate imm;
union tgsi_immediate_data imm_data;
struct tgsi_instruction insn;
+ struct tgsi_instruction_predicate insn_predicate;
struct tgsi_instruction_ext_label insn_ext_label;
struct tgsi_instruction_ext_texture insn_ext_texture;
- struct tgsi_instruction_ext_predicate insn_ext_predicate;
struct tgsi_src_register src;
struct tgsi_src_register_ext_mod src_ext_mod;
struct tgsi_dimension dim;
@@ -72,6 +72,7 @@ struct ureg_tokens {
#define UREG_MAX_IMMEDIATE 32
#define UREG_MAX_TEMP 256
#define UREG_MAX_ADDR 2
+#define UREG_MAX_LOOP 1
#define UREG_MAX_PRED 1
#define DOMAIN_DECL 0
@@ -123,6 +124,7 @@ struct ureg_program
unsigned nr_addrs;
unsigned nr_preds;
+ unsigned nr_loops;
unsigned nr_instructions;
struct ureg_tokens domain[2];
@@ -383,6 +385,7 @@ struct ureg_src ureg_DECL_constant(struct ureg_program *ureg,
i = ureg->nr_constant_ranges++;
ureg->constant_range[i].first = index;
ureg->constant_range[i].last = index;
+ goto out;
}
/* Collapse all ranges down to one:
@@ -450,6 +453,19 @@ struct ureg_dst ureg_DECL_address( struct ureg_program *ureg )
return ureg_dst_register( TGSI_FILE_ADDRESS, 0 );
}
+/* Allocate a new loop register.
+ */
+struct ureg_dst
+ureg_DECL_loop(struct ureg_program *ureg)
+{
+ if (ureg->nr_loops < UREG_MAX_LOOP) {
+ return ureg_dst_register(TGSI_FILE_LOOP, ureg->nr_loops++);
+ }
+
+ assert(0);
+ return ureg_dst_register(TGSI_FILE_LOOP, 0);
+}
+
/* Allocate a new predicate register.
*/
struct ureg_dst
@@ -694,35 +710,27 @@ ureg_emit_insn(struct ureg_program *ureg,
validate( opcode, num_dst, num_src );
out = get_tokens( ureg, DOMAIN_INSN, count );
- out[0].value = 0;
- out[0].insn.Type = TGSI_TOKEN_TYPE_INSTRUCTION;
- out[0].insn.NrTokens = 0;
+ out[0].insn = tgsi_default_instruction();
out[0].insn.Opcode = opcode;
out[0].insn.Saturate = saturate;
out[0].insn.NumDstRegs = num_dst;
out[0].insn.NumSrcRegs = num_src;
- out[0].insn.Padding = 0;
result.insn_token = ureg->domain[DOMAIN_INSN].count - count;
+ result.extended_token = result.insn_token;
if (predicate) {
- out[0].insn.Extended = 1;
- out[1].insn_ext_predicate = tgsi_default_instruction_ext_predicate();
- out[1].insn_ext_predicate.Negate = pred_negate;
- out[1].insn_ext_predicate.SwizzleX = pred_swizzle_x;
- out[1].insn_ext_predicate.SwizzleY = pred_swizzle_y;
- out[1].insn_ext_predicate.SwizzleZ = pred_swizzle_z;
- out[1].insn_ext_predicate.SwizzleW = pred_swizzle_w;
-
- result.extended_token = result.insn_token + 1;
- } else {
- out[0].insn.Extended = 0;
-
- result.extended_token = result.insn_token;
+ out[0].insn.Predicate = 1;
+ out[1].insn_predicate = tgsi_default_instruction_predicate();
+ out[1].insn_predicate.Negate = pred_negate;
+ out[1].insn_predicate.SwizzleX = pred_swizzle_x;
+ out[1].insn_predicate.SwizzleY = pred_swizzle_y;
+ out[1].insn_predicate.SwizzleZ = pred_swizzle_z;
+ out[1].insn_predicate.SwizzleW = pred_swizzle_w;
}
ureg->nr_instructions++;
-
+
return result;
}
@@ -1065,6 +1073,13 @@ static void emit_decls( struct ureg_program *ureg )
0, ureg->nr_addrs );
}
+ if (ureg->nr_loops) {
+ emit_decl_range(ureg,
+ TGSI_FILE_LOOP,
+ 0,
+ ureg->nr_loops);
+ }
+
if (ureg->nr_preds) {
emit_decl_range(ureg,
TGSI_FILE_PREDICATE,
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
index e6fad1f0f40..b89f55d5a87 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -163,6 +163,9 @@ struct ureg_dst
ureg_DECL_address( struct ureg_program * );
struct ureg_dst
+ureg_DECL_loop( struct ureg_program * );
+
+struct ureg_dst
ureg_DECL_predicate(struct ureg_program *);
/* Supply an index to the sampler declaration as this is the hook to