summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Schürmann <daniel@schuermann.dev>2020-12-16 10:33:59 +0100
committerDylan Baker <dylan.c.baker@intel.com>2020-12-18 09:35:22 -0800
commit8232610da2dc2289950143718560afb28980c56e (patch)
tree2a10c444501846c0d86879420d80d5da9b4d71a8
parent79c2aeabebfe1320b4ec8ccf926fcb8f97ea9b24 (diff)
downloadmesa-8232610da2dc2289950143718560afb28980c56e.tar.gz
nir/opt_if: split ALU from Phi more aggressively
If the only user is a trivial bcsel which in a second step can be turned into a phi, this conversion is also worth it even if the previous result is not undefined or constant. Allows for some more loop unrolling or saves a few instructions. Totals from 62 (0.04% of 139391) affected shaders (NAVI10): SGPRs: 4976 -> 4992 (+0.32%) VGPRs: 4408 -> 4472 (+1.45%); split: -0.45%, +1.91% CodeSize: 453632 -> 464000 (+2.29%); split: -0.32%, +2.60% MaxWaves: 527 -> 511 (-3.04%); split: +0.38%, -3.42% Instrs: 84940 -> 86681 (+2.05%); split: -0.36%, +2.41% Cycles: 11946844 -> 11783708 (-1.37%); split: -1.40%, +0.04% VMEM: 9403 -> 10357 (+10.15%); split: +11.59%, -1.45% SMEM: 3003 -> 3025 (+0.73%); split: +1.07%, -0.33% VClause: 1756 -> 1997 (+13.72%); split: -0.11%, +13.84% SClause: 2914 -> 2915 (+0.03%); split: -0.10%, +0.14% Copies: 6426 -> 6768 (+5.32%); split: -4.14%, +9.46% Branches: 2105 -> 2102 (-0.14%); split: -1.66%, +1.52% PreSGPRs: 2921 -> 2909 (-0.41%); split: -0.55%, +0.14% PreVGPRs: 4151 -> 4179 (+0.67%); split: -0.24%, +0.92% cc: mesa-stable Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8123> (cherry picked from commit 8513b12590cf65f77c16a59774de2d268e5de5f9)
-rw-r--r--.pick_status.json2
-rw-r--r--src/compiler/nir/nir_opt_if.c160
2 files changed, 93 insertions, 69 deletions
diff --git a/.pick_status.json b/.pick_status.json
index 2f50b7030d7..f26284ababa 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -373,7 +373,7 @@
"description": "nir/opt_if: split ALU from Phi more aggressively",
"nominated": true,
"nomination_type": 0,
- "resolution": 0,
+ "resolution": 1,
"master_sha": null,
"because_sha": null
},
diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
index 31195b12bb7..e25c66f21b8 100644
--- a/src/compiler/nir/nir_opt_if.c
+++ b/src/compiler/nir/nir_opt_if.c
@@ -295,6 +295,39 @@ alu_instr_is_type_conversion(const nir_alu_instr *alu)
nir_op_infos[alu->op].output_type != nir_op_infos[alu->op].input_types[0];
}
+static bool
+is_trivial_bcsel(const nir_instr *instr, bool allow_non_phi_src)
+{
+ if (instr->type != nir_instr_type_alu)
+ return false;
+
+ nir_alu_instr *const bcsel = nir_instr_as_alu(instr);
+ if (bcsel->op != nir_op_bcsel &&
+ bcsel->op != nir_op_b32csel &&
+ bcsel->op != nir_op_fcsel)
+ return false;
+
+ for (unsigned i = 0; i < 3; i++) {
+ if (!nir_alu_src_is_trivial_ssa(bcsel, i) ||
+ bcsel->src[i].src.ssa->parent_instr->block != instr->block)
+ return false;
+
+ if (bcsel->src[i].src.ssa->parent_instr->type != nir_instr_type_phi) {
+ /* opt_split_alu_of_phi() is able to peel that src from the loop */
+ if (i == 0 || !allow_non_phi_src)
+ return false;
+ allow_non_phi_src = false;
+ }
+ }
+
+ nir_foreach_phi_src(src, nir_instr_as_phi(bcsel->src[0].src.ssa->parent_instr)) {
+ if (!nir_src_is_const(src->src))
+ return false;
+ }
+
+ return true;
+}
+
/**
* Splits ALU instructions that have a source that is a phi node
*
@@ -306,12 +339,13 @@ alu_instr_is_type_conversion(const nir_alu_instr *alu)
*
* - At least one source of the instruction is a phi node from the header block.
*
- * - The phi node selects a constant or undef from the block before the loop.
- *
* - Any non-phi sources of the ALU instruction come from a block that
* dominates the block before the loop. The most common failure mode for
* this check is sources that are generated in the loop header block.
*
+ * - The phi node selects a constant or undef from the block before the loop or
+ * the only ALU user is a trivial bcsel that gets removed by peeling the ALU
+ *
* The split process splits the original ALU instruction into two, one at the
* bottom of the loop and one at the block before the loop. The instruction
* before the loop computes the value on the first iteration, and the
@@ -450,60 +484,72 @@ opt_split_alu_of_phi(nir_builder *b, nir_loop *loop)
}
}
- if (has_phi_src_from_prev_block && all_non_phi_exist_in_prev_block &&
- (is_prev_result_undef || is_prev_result_const)) {
- nir_block *const continue_block = find_continue_block(loop);
+ if (!has_phi_src_from_prev_block || !all_non_phi_exist_in_prev_block)
+ continue;
- b->cursor = nir_after_block(prev_block);
- nir_ssa_def *prev_value = clone_alu_and_replace_src_defs(b, alu, prev_srcs);
+ if (!is_prev_result_undef && !is_prev_result_const) {
+ /* check if the only user is a trivial bcsel */
+ if (!list_is_empty(&alu->dest.dest.ssa.if_uses) ||
+ !list_is_singular(&alu->dest.dest.ssa.uses))
+ continue;
- /* Make a copy of the original ALU instruction. Replace the sources
- * of the new instruction that read a phi with an undef source from
- * prev_block with the non-undef source of that phi.
- *
- * Insert the new instruction at the end of the continue block.
- */
- b->cursor = nir_after_block_before_jump(continue_block);
+ nir_src *use = list_first_entry(&alu->dest.dest.ssa.uses, nir_src, use_link);
+ if (!is_trivial_bcsel(use->parent_instr, true))
+ continue;
+ }
+
+ /* Split ALU of Phi */
+ nir_block *const continue_block = find_continue_block(loop);
- nir_ssa_def *const alu_copy =
- clone_alu_and_replace_src_defs(b, alu, continue_srcs);
+ b->cursor = nir_after_block(prev_block);
+ nir_ssa_def *prev_value = clone_alu_and_replace_src_defs(b, alu, prev_srcs);
- /* Make a new phi node that selects a value from prev_block and the
- * result of the new instruction from continue_block.
- */
- nir_phi_instr *const phi = nir_phi_instr_create(b->shader);
- nir_phi_src *phi_src;
+ /* Make a copy of the original ALU instruction. Replace the sources
+ * of the new instruction that read a phi with an undef source from
+ * prev_block with the non-undef source of that phi.
+ *
+ * Insert the new instruction at the end of the continue block.
+ */
+ b->cursor = nir_after_block_before_jump(continue_block);
+
+ nir_ssa_def *const alu_copy =
+ clone_alu_and_replace_src_defs(b, alu, continue_srcs);
+
+ /* Make a new phi node that selects a value from prev_block and the
+ * result of the new instruction from continue_block.
+ */
+ nir_phi_instr *const phi = nir_phi_instr_create(b->shader);
+ nir_phi_src *phi_src;
- phi_src = ralloc(phi, nir_phi_src);
- phi_src->pred = prev_block;
- phi_src->src = nir_src_for_ssa(prev_value);
- exec_list_push_tail(&phi->srcs, &phi_src->node);
+ phi_src = ralloc(phi, nir_phi_src);
+ phi_src->pred = prev_block;
+ phi_src->src = nir_src_for_ssa(prev_value);
+ exec_list_push_tail(&phi->srcs, &phi_src->node);
- phi_src = ralloc(phi, nir_phi_src);
- phi_src->pred = continue_block;
- phi_src->src = nir_src_for_ssa(alu_copy);
- exec_list_push_tail(&phi->srcs, &phi_src->node);
+ phi_src = ralloc(phi, nir_phi_src);
+ phi_src->pred = continue_block;
+ phi_src->src = nir_src_for_ssa(alu_copy);
+ exec_list_push_tail(&phi->srcs, &phi_src->node);
- nir_ssa_dest_init(&phi->instr, &phi->dest,
- alu_copy->num_components, alu_copy->bit_size, NULL);
+ nir_ssa_dest_init(&phi->instr, &phi->dest,
+ alu_copy->num_components, alu_copy->bit_size, NULL);
- b->cursor = nir_after_phis(header_block);
- nir_builder_instr_insert(b, &phi->instr);
+ b->cursor = nir_after_phis(header_block);
+ nir_builder_instr_insert(b, &phi->instr);
- /* Modify all readers of the original ALU instruction to read the
- * result of the phi.
- */
- nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
- nir_src_for_ssa(&phi->dest.ssa));
+ /* Modify all readers of the original ALU instruction to read the
+ * result of the phi.
+ */
+ nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa,
+ nir_src_for_ssa(&phi->dest.ssa));
- /* Since the original ALU instruction no longer has any readers, just
- * remove it.
- */
- nir_instr_remove_v(&alu->instr);
- ralloc_free(alu);
+ /* Since the original ALU instruction no longer has any readers, just
+ * remove it.
+ */
+ nir_instr_remove_v(&alu->instr);
+ ralloc_free(alu);
- progress = true;
- }
+ progress = true;
}
return progress;
@@ -624,32 +670,10 @@ opt_simplify_bcsel_of_phi(nir_builder *b, nir_loop *loop)
* https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/170#note_110305
*/
nir_foreach_instr_safe(instr, header_block) {
- if (instr->type != nir_instr_type_alu)
+ if (!is_trivial_bcsel(instr, false))
continue;
nir_alu_instr *const bcsel = nir_instr_as_alu(instr);
- if (bcsel->op != nir_op_bcsel &&
- bcsel->op != nir_op_b32csel &&
- bcsel->op != nir_op_fcsel)
- continue;
-
- bool match = true;
- for (unsigned i = 0; i < 3; i++) {
- /* FINISHME: The abs, negate and swizzled cases could be handled by
- * adding move instructions at the bottom of the continue block and
- * more phi nodes in the header_block.
- */
- if (!nir_alu_src_is_trivial_ssa(bcsel, i) ||
- bcsel->src[i].src.ssa->parent_instr->type != nir_instr_type_phi ||
- bcsel->src[i].src.ssa->parent_instr->block != header_block) {
- match = false;
- break;
- }
- }
-
- if (!match)
- continue;
-
nir_phi_instr *const cond_phi =
nir_instr_as_phi(bcsel->src[0].src.ssa->parent_instr);