diff options
author | Iago Toral Quiroga <itoral@igalia.com> | 2023-04-26 09:45:03 +0200 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2023-04-27 08:43:29 +0000 |
commit | 0468ce3791fb8dbf90c680f07f0ca11d9f0969b4 (patch) | |
tree | 9371b27644e90062c10b59a76f8a28ac708b3a39 /src/broadcom | |
parent | 13f258fae3c60f84dbecee685f904360373320e0 (diff) | |
download | mesa-0468ce3791fb8dbf90c680f07f0ca11d9f0969b4.tar.gz |
broadcom/compiler: try harder to merge thread switch earlier
We have been stopping as soon as we find a conflict but that doesn't
mean we can't merge it in an earlier slot, so keep going. Going by
shader-db, this sometimes allows us to merge the final thrsw a bit
earlier and avoid emitting NOP instructions at the program end to
make up for its delay slots. I have not observed cases where this
helps with regular thrsw though, but it doesn't hurt to try with
those too.
total instructions in shared programs: 11526876 -> 11526354 (<.01%)
instructions in affected programs: 10760 -> 10238 (-4.85%)
helped: 236
HURT: 0
Instructions are helped.
total max-temps in shared programs: 2231705 -> 2231677 (<.01%)
max-temps in affected programs: 276 -> 248 (-10.14%)
helped: 27
HURT: 0
Max-temps are helped.
total inst-and-stalls in shared programs: 11545177 -> 11544655 (<.01%)
inst-and-stalls in affected programs: 10777 -> 10255 (-4.84%)
helped: 236
HURT: 0
Inst-and-stalls are helped.
total nops in shared programs: 321624 -> 321152 (-0.15%)
nops in affected programs: 751 -> 279 (-62.85%)
helped: 236
HURT: 0
Nops are helped.
Reviewed-by: Alejandro PiƱeiro <apinheiro@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22679>
Diffstat (limited to 'src/broadcom')
-rw-r--r-- | src/broadcom/compiler/qpu_schedule.c | 30 |
1 files changed, 22 insertions, 8 deletions
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c index 1b0d21463c4..3b32b48f86f 100644 --- a/src/broadcom/compiler/qpu_schedule.c +++ b/src/broadcom/compiler/qpu_schedule.c @@ -1766,12 +1766,6 @@ valid_thrsw_sequence(struct v3d_compile *c, struct choose_scoreboard *scoreboard struct qinst *qinst, int instructions_in_sequence, bool is_thrend) { - /* No emitting our thrsw while the previous thrsw hasn't happened yet. */ - if (scoreboard->last_thrsw_tick + 3 > - scoreboard->tick - instructions_in_sequence) { - return false; - } - for (int slot = 0; slot < instructions_in_sequence; slot++) { if (!qpu_inst_before_thrsw_valid_in_delay_slot(c, qinst, slot)) return false; @@ -1825,13 +1819,28 @@ emit_thrsw(struct v3d_compile *c, /* Find how far back into previous instructions we can put the THRSW. */ int slots_filled = 0; int invalid_sig_count = 0; + int invalid_seq_count = 0; bool last_thrsw_after_invalid_ok = false; struct qinst *merge_inst = NULL; vir_for_each_inst_rev(prev_inst, block) { + /* No emitting our thrsw while the previous thrsw hasn't + * happened yet. + */ + if (scoreboard->last_thrsw_tick + 3 > + scoreboard->tick - (slots_filled + 1)) { + break; + } + + if (!valid_thrsw_sequence(c, scoreboard, prev_inst, slots_filled + 1, is_thrend)) { - break; + /* Even if the current sequence isn't valid, we may + * be able to get a valid sequence by trying to move the + * thrsw earlier, so keep going. + */ + invalid_seq_count++; + goto cont_block; } struct v3d_qpu_sig sig = prev_inst->qpu.sig; @@ -1858,8 +1867,10 @@ emit_thrsw(struct v3d_compile *c, goto cont_block; } + /* We can merge the thrsw in this instruction */ last_thrsw_after_invalid_ok = false; invalid_sig_count = 0; + invalid_seq_count = 0; merge_inst = prev_inst; cont_block: @@ -1871,9 +1882,12 @@ cont_block: * merge the thrsw in the end, we need to adjust slots filled to match * the last valid merge point. */ - assert(invalid_sig_count == 0 || slots_filled >= invalid_sig_count); + assert((invalid_sig_count == 0 && invalid_seq_count == 0) || + slots_filled >= invalid_sig_count + invalid_seq_count); if (invalid_sig_count > 0) slots_filled -= invalid_sig_count; + if (invalid_seq_count > 0) + slots_filled -= invalid_seq_count; bool needs_free = false; if (merge_inst) { |