summaryrefslogtreecommitdiff
path: root/src/amd/common/ac_gpu_info.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/amd/common/ac_gpu_info.c')
-rw-r--r--src/amd/common/ac_gpu_info.c15
1 files changed, 12 insertions, 3 deletions
diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 3bfa1259ef1..1a6f0e41c13 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -521,6 +521,14 @@ bool ac_query_gpu_info(int fd, void *dev_p,
if (info->family == CHIP_KAVERI)
info->num_render_backends = 2;
+ /* Guess the number of enabled SEs because the kernel doesn't tell us. */
+ if (info->chip_class >= GFX10_3 && info->max_se > 1) {
+ unsigned num_rbs_per_se = info->num_render_backends / info->max_se;
+ info->num_se = util_bitcount(amdinfo->enabled_rb_pipes_mask) / num_rbs_per_se;
+ } else {
+ info->num_se = info->max_se;
+ }
+
info->clock_crystal_freq = amdinfo->gpu_counter_freq;
if (!info->clock_crystal_freq) {
fprintf(stderr, "amdgpu: clock crystal frequency is 0, timestamps will be wrong\n");
@@ -666,9 +674,9 @@ bool ac_query_gpu_info(int fd, void *dev_p,
*/
unsigned cu_group = info->chip_class >= GFX10 ? 2 : 1;
info->max_good_cu_per_sa = DIV_ROUND_UP(info->num_good_compute_units,
- (info->max_se * info->max_sh_per_se * cu_group)) * cu_group;
+ (info->num_se * info->max_sh_per_se * cu_group)) * cu_group;
info->min_good_cu_per_sa = (info->num_good_compute_units /
- (info->max_se * info->max_sh_per_se * cu_group)) * cu_group;
+ (info->num_se * info->max_sh_per_se * cu_group)) * cu_group;
memcpy(info->si_tile_mode_array, amdinfo->gb_tile_mode,
sizeof(amdinfo->gb_tile_mode));
@@ -962,6 +970,7 @@ void ac_print_gpu_info(struct radeon_info *info)
printf(" max_good_cu_per_sa = %i\n", info->max_good_cu_per_sa);
printf(" min_good_cu_per_sa = %i\n", info->min_good_cu_per_sa);
printf(" max_se = %i\n", info->max_se);
+ printf(" num_se = %i\n", info->num_se);
printf(" max_sh_per_se = %i\n", info->max_sh_per_se);
printf(" max_wave64_per_simd = %i\n", info->max_wave64_per_simd);
printf(" num_physical_sgprs_per_simd = %i\n", info->num_physical_sgprs_per_simd);
@@ -1289,7 +1298,7 @@ unsigned ac_get_compute_resource_limits(struct radeon_info *info,
if (info->chip_class >= GFX7) {
unsigned num_cu_per_se = info->num_good_compute_units /
- info->max_se;
+ info->num_se;
/* Force even distribution on all SIMDs in CU if the workgroup
* size is 64. This has shown some good improvements if # of CUs