pan/midgard: Schedule before RA

This is a tradeoff. Scheduling before RA means we don't do RA on what-will-become pipeline registers. Importantly, it means the scheduler is able to reorder instructions, as registers have not been decided yet. Unfortunately, it also complicates register spilling, since the spills themselves won't get bundled optimally and we can only spill twice per ALU bundle (only one spill per bundle allowed here). It also prevents us from eliminating dead moves introduced by register allocation, as they are not dead before RA. The shader-db regressions are from poor spilling choices introduced by the new bundling requirements. These could be solved by the combination of a post-scheduler (to combine adjacent spills into bundles) with a VLIW-aware spill cost calculation. Nevertheless, the change is small enough that I feel it's worth it to eat a tiny shader-db regression for the sake of flexibility. Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
author: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> 2019-08-30 12:56:55 -0700
committer: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> 2019-08-30 15:50:28 -0700
commit: d699a17475b5d123e6a22778e8ac6e005774ce92 (patch)
tree: 4c52e52fe69d0719bebc6c511ac14f9f26cf7c08 /src/panfrost
parent: 5e06d90c4510eb3a8c42b0e0d1a3ebfd19830069 (diff)
download: mesa-d699a17475b5d123e6a22778e8ac6e005774ce92.tar.gz
1 files changed, 29 insertions, 27 deletions
diff --git a/src/panfrost/midgard/midgard_schedule.c b/src/panfrost/midgard/midgard_schedule.c
index 6693a1b725b..8f86701e33f 100644
--- a/src/panfrost/midgard/midgard_schedule.c
+++ b/src/panfrost/midgard/midgard_schedule.c
@@ -798,11 +798,26 @@ static void mir_spill_register(
                 ra_set_node_spill_cost(g, i, 1.0);
         }
 
-        mir_foreach_instr_global(ctx, ins) {
-                if (ins->no_spill &&
-                    ins->dest >= 0 &&
-                    ins->dest < ctx->temp_count)
-                        ra_set_node_spill_cost(g, ins->dest, -1.0);
+        /* We can't spill any bundles that contain unspills. This could be
+         * optimized to allow use of r27 to spill twice per bundle, but if
+         * you're at the point of optimizing spilling, it's too late. */
+
+        mir_foreach_block(ctx, block) {
+                mir_foreach_bundle_in_block(block, bun) {
+                        bool no_spill = false;
+
+                        for (unsigned i = 0; i < bun->instruction_count; ++i)
+                                no_spill |= bun->instructions[i]->no_spill;
+
+                        if (!no_spill)
+                                continue;
+
+                        for (unsigned i = 0; i < bun->instruction_count; ++i) {
+                                unsigned dest = bun->instructions[i]->dest;
+                                if (dest < ctx->temp_count)
+                                        ra_set_node_spill_cost(g, dest, -1.0);
+                        }
+                }
         }
 
         int spill_node = ra_get_best_spill_node(g);
@@ -831,7 +846,8 @@ static void mir_spill_register(
                 if (is_special_w)
                         spill_slot = spill_index++;
 
-                mir_foreach_instr_global_safe(ctx, ins) {
+                mir_foreach_block(ctx, block) {
+                mir_foreach_instr_in_block_safe(block, ins) {
                         if (ins->dest != spill_node) continue;
 
                         midgard_instruction st;
@@ -841,17 +857,19 @@ static void mir_spill_register(
                                 st.no_spill = true;
                         } else {
                                 ins->dest = SSA_FIXED_REGISTER(26);
+                                ins->no_spill = true;
                                 st = v_load_store_scratch(ins->dest, spill_slot, true, ins->mask);
                         }
 
                         /* Hint: don't rewrite this node */
                         st.hint = true;
 
-                        mir_insert_instruction_before(ctx, mir_next_op(ins), st);
+                        mir_insert_instruction_after_scheduled(ctx, block, ins, st);
 
                         if (!is_special)
                                 ctx->spills++;
                 }
+                }
         }
 
         /* For special reads, figure out how many components we need */
@@ -915,7 +933,7 @@ static void mir_spill_register(
 
                                 st.mask = read_mask;
 
-                                mir_insert_instruction_before(ctx, before, st);
+                                mir_insert_instruction_before_scheduled(ctx, block, before, st);
                                // consecutive_skip = true;
                         } else {
                                 /* Special writes already have their move spilled in */
@@ -962,8 +980,11 @@ schedule_program(compiler_context *ctx)
 
         mir_foreach_block(ctx, block) {
                 midgard_opt_dead_move_eliminate(ctx, block);
+                schedule_block(ctx, block);
         }
 
+        mir_create_pipeline_registers(ctx);
+
         do {
                 if (spilled) 
                         mir_spill_register(ctx, g, &spill_count);
@@ -974,25 +995,6 @@ schedule_program(compiler_context *ctx)
                 g = allocate_registers(ctx, &spilled);
         } while(spilled && ((iter_count--) > 0));
 
-        /* We can simplify a bit after RA */
-
-        mir_foreach_block(ctx, block) {
-                midgard_opt_post_move_eliminate(ctx, block, g);
-        }
-
-        /* After RA finishes, we schedule all at once */
-
-        mir_foreach_block(ctx, block) {
-                schedule_block(ctx, block);
-        }
-
-        /* Finally, we create pipeline registers as a peephole pass after
-         * scheduling. This isn't totally optimal, since there are cases where
-         * the usage of pipeline registers can eliminate spills, but it does
-         * save some power */
-
-        mir_create_pipeline_registers(ctx);
-
         if (iter_count <= 0) {
                 fprintf(stderr, "panfrost: Gave up allocating registers, rendering will be incomplete\n");
                 assert(0);
author	Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>	2019-08-30 12:56:55 -0700
committer	Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>	2019-08-30 15:50:28 -0700
commit	d699a17475b5d123e6a22778e8ac6e005774ce92 (patch)
tree	4c52e52fe69d0719bebc6c511ac14f9f26cf7c08 /src/panfrost
parent	5e06d90c4510eb3a8c42b0e0d1a3ebfd19830069 (diff)
download	mesa-d699a17475b5d123e6a22778e8ac6e005774ce92.tar.gz