summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIan Romanick <ian.d.romanick@intel.com>2011-08-03 15:27:43 -0700
committerIan Romanick <ian.d.romanick@intel.com>2011-08-16 14:09:42 -0700
commit92ca560d68e8a6b532998707afcf4f60c0ce2806 (patch)
treec025c0c9f684b584f56dd151ee51570c000c2e9c
parent7f4c65256cc3f4d9f6a214424beabe688a5dd6a2 (diff)
downloadmesa-92ca560d68e8a6b532998707afcf4f60c0ce2806.tar.gz
ir_to_mesa: Implement ir_unop_any using DP4 w/saturate or DP4 w/SLT
This is just like the ir_binop_logic_or case. The operation ir_unop_any is (a.x || a.y || a.z || a.w). Logical-or is implemented using addition (followed by clampling to [0,1]) on values of 0.0 and 1.0. Replacing the logical-or operators with addition gives (a.x + a.y + a.z + a.w). This can be implemented using a dot-product with a vector of all 1.0. Previously a SNE instruction was used to clamp the resulting logic value to [0,1]. In a fragment shader, using a saturate on the dot-product has the same effect. Adding the saturate to the dot-product is free, so (at least) one instruction is saved. In a vertex shader, using an SLT on the negation of the dot-product result has the same effect. Many older shader architectures do not support the SNE instruction. It must be emulated using two SLT instructions and an ADD. On these architectures, the single SLT saves two instructions. Reviewed-by: Eric Anholt <eric@anholt.net>
-rw-r--r--src/mesa/program/ir_to_mesa.cpp27
1 files changed, 23 insertions, 4 deletions
diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 60d498bd9e3..1bd9a2eee1b 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1256,12 +1256,31 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
}
break;
- case ir_unop_any:
+ case ir_unop_any: {
assert(ir->operands[0]->type->is_vector());
- emit_dp(ir, result_dst, op[0], op[0],
- ir->operands[0]->type->vector_elements);
- emit(ir, OPCODE_SNE, result_dst, result_src, src_reg_for_float(0.0));
+
+ /* After the dot-product, the value will be an integer on the
+ * range [0,4]. Zero stays zero, and positive values become 1.0.
+ */
+ ir_to_mesa_instruction *const dp =
+ emit_dp(ir, result_dst, op[0], op[0],
+ ir->operands[0]->type->vector_elements);
+ if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+ /* The clamping to [0,1] can be done for free in the fragment
+ * shader with a saturate.
+ */
+ dp->saturate = true;
+ } else {
+ /* Negating the result of the dot-product gives values on the range
+ * [-4, 0]. Zero stays zero, and negative values become 1.0. This
+ * is achieved using SLT.
+ */
+ src_reg slt_src = result_src;
+ slt_src.negate = ~slt_src.negate;
+ emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
+ }
break;
+ }
case ir_binop_logic_xor:
emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);