summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavel Ondračka <pavel.ondracka@gmail.com>2022-08-10 09:17:56 +0200
committerDylan Baker <dylan.c.baker@intel.com>2022-08-16 10:08:19 -0700
commit9ae5b4aff3b7cde5b692f8c7b875959c7d24c334 (patch)
tree6ab4a8c04a9b6c4f0592352998c6a4fc13ca7027
parente6be63857b8de55680ea97efe090759c77460b40 (diff)
downloadmesa-9ae5b4aff3b7cde5b692f8c7b875959c7d24c334.tar.gz
r300: fix variables detection for paired ALU and TEX instructions in different branches
TEX instrutions can't write xyz and w to separate registers so we need to create variables from them first, otherwise we can create two variables from ALU writing the same register xyz and w in other branch (this usually works when TEX is not present as the xyz and w can read/write from different registers). This fixes regalloc because the variables are later used as a graph nodes. The variable order should not matter but it slightly does (leading to approx 0.3% shader-db temps increase as compared to previous state), so just sort the variables list afterwards to be as close to the previous behavior as possible and prevent the regression. CC: mesa-stable Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6936 Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com> Reviewed-by: Filip Gawin <filip@gawin.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17987> (cherry picked from commit 88fd397c741c0e1fe0d851fbc566925078df6013)
-rw-r--r--.pick_status.json2
-rw-r--r--src/gallium/drivers/r300/compiler/radeon_variable.c91
2 files changed, 87 insertions, 6 deletions
diff --git a/.pick_status.json b/.pick_status.json
index 9b54f6036bf..abb71d610ce 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -895,7 +895,7 @@
"description": "r300: fix variables detection for paired ALU and TEX instructions in different branches",
"nominated": true,
"nomination_type": 0,
- "resolution": 0,
+ "resolution": 1,
"main_sha": null,
"because_sha": null
},
diff --git a/src/gallium/drivers/r300/compiler/radeon_variable.c b/src/gallium/drivers/r300/compiler/radeon_variable.c
index 6aec0940d80..5011967e536 100644
--- a/src/gallium/drivers/r300/compiler/radeon_variable.c
+++ b/src/gallium/drivers/r300/compiler/radeon_variable.c
@@ -26,6 +26,7 @@
*/
#include <stdio.h>
+#include <stdlib.h>
#include "radeon_variable.h"
#include "memory_pool.h"
@@ -346,6 +347,31 @@ static void get_variable_pair_helper(
}
/**
+ * Compare function for sorting variable pointers by the lowest instruction
+ * IP from it and its friends.
+ */
+static int cmpfunc_variable_by_ip (const void * a, const void * b) {
+ struct rc_variable * var_a = *(struct rc_variable **)a;
+ struct rc_variable * var_b = *(struct rc_variable **)b;
+ unsigned int min_ip_a = var_a->Inst->IP;
+ unsigned int min_ip_b = var_b->Inst->IP;
+
+ /* Find the minimal IP of a variable and its friends */
+ while (var_a->Friend) {
+ var_a = var_a->Friend;
+ if (var_a->Inst->IP < min_ip_a)
+ min_ip_a = var_a->Inst->IP;
+ }
+ while (var_b->Friend) {
+ var_b = var_b->Friend;
+ if (var_b->Inst->IP < min_ip_b)
+ min_ip_b = var_b->Inst->IP;
+ }
+
+ return (int)min_ip_a - (int)min_ip_b;
+}
+
+/**
* Generate a list of variables used by the shader program. Each instruction
* that writes to a register is considered a variable. The struct rc_variable
* data structure includes a list of readers and is essentially a
@@ -357,14 +383,42 @@ struct rc_list * rc_get_variables(struct radeon_compiler * c)
struct rc_instruction * inst;
struct rc_list * variable_list = NULL;
+ /* We search for the variables in two loops in order to get it right in
+ * the following specific case
+ *
+ * IF aluresult.x___;
+ * ...
+ * MAD temp[0].xyz, src0.000, src0.111, src0.000
+ * MAD temp[0].w, src0.0, src0.1, src0.0
+ * ELSE;
+ * ...
+ * TXB temp[0], temp[1].xy_w, 2D[0] SEM_WAIT SEM_ACQUIRE;
+ * ENDIF;
+ * src0.xyz = input[0], src0.w = input[0], src1.xyz = temp[0], src1.w = temp[0] SEM_WAIT
+ * MAD temp[1].xyz, src0.xyz, src1.xyz, src0.000
+ * MAD temp[1].w, src0.w, src1.w, src0.0
+ *
+ * If we go just in one loop, we will first create two variables for the
+ * temp[0].xyz and temp[0].w. This happens because they don't share a reader
+ * as the src1.xyz and src1.w of the instruction where the value is used are
+ * in theory independent. They are not because the same register is written
+ * also by the texture instruction in the other branch and TEX can't write xyz
+ * and w separatelly.
+ *
+ * Therefore first search for RC_INSTRUCTION_NORMAL to create variables from
+ * the texture instruction and than the pair instructions will be properly
+ * marked as friends. So we will end with only one variable here as we should.
+ *
+ * This doesn't matter before the pair translation, because everything is
+ * RC_INSTRUCTION_NORMAL.
+ */
for (inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions;
inst = inst->Next) {
- struct rc_reader_data reader_data;
- struct rc_variable * new_var;
- memset(&reader_data, 0, sizeof(reader_data));
-
if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ struct rc_reader_data reader_data;
+ struct rc_variable * new_var;
+ memset(&reader_data, 0, sizeof(reader_data));
rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL);
if (reader_data.ReaderCount == 0) {
continue;
@@ -373,7 +427,15 @@ struct rc_list * rc_get_variables(struct radeon_compiler * c)
inst->U.I.DstReg.Index,
inst->U.I.DstReg.WriteMask, &reader_data);
get_variable_helper(&variable_list, new_var);
- } else {
+ }
+ }
+
+ bool needs_sorting = false;
+ for (inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next) {
+ if (inst->Type != RC_INSTRUCTION_NORMAL) {
+ needs_sorting = true;
get_variable_pair_helper(&variable_list, c, inst,
&inst->U.P.RGB);
get_variable_pair_helper(&variable_list, c, inst,
@@ -381,6 +443,25 @@ struct rc_list * rc_get_variables(struct radeon_compiler * c)
}
}
+ if (variable_list && needs_sorting) {
+ unsigned int count = rc_list_count(variable_list);
+ struct rc_variable **variables = memory_pool_malloc(&c->Pool,
+ sizeof(struct rc_variable *) * count);
+
+ struct rc_list * current = variable_list;
+ for(unsigned int i = 0; current; i++, current = current->Next) {
+ struct rc_variable * var = current->Item;
+ variables[i] = var;
+ }
+
+ qsort(variables, count, sizeof(struct rc_variable *), cmpfunc_variable_by_ip);
+
+ current = variable_list;
+ for(unsigned int i = 0; current; i++, current = current->Next) {
+ current->Item = variables[i];
+ }
+ }
+
return variable_list;
}