summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Schürmann <daniel@schuermann.dev>2019-09-17 14:35:22 +0200
committerDaniel Schürmann <daniel@schuermann.dev>2019-09-19 12:10:00 +0200
commita70a9987181a09258406cc0d8ff5e34acc000371 (patch)
treee4022db8525d66c706bc10154476514058425cf9
parent93c8ebfa780ebd1495095e794731881aef29e7d3 (diff)
downloadmesa-a70a9987181a09258406cc0d8ff5e34acc000371.tar.gz
radv/aco: Setup alternate path in RADV to support the experimental ACO compiler
LLVM remains default and ACO can be enabled with RADV_PERFTEST=aco. Co-authored-by: Daniel Schürmann <daniel@schuermann.dev> Co-authored-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
-rw-r--r--src/amd/common/ac_llvm_util.c3
-rw-r--r--src/amd/meson.build1
-rw-r--r--src/amd/vulkan/meson.build2
-rw-r--r--src/amd/vulkan/radv_cmd_buffer.c4
-rw-r--r--src/amd/vulkan/radv_debug.h25
-rw-r--r--src/amd/vulkan/radv_device.c90
-rw-r--r--src/amd/vulkan/radv_extensions.py10
-rw-r--r--src/amd/vulkan/radv_pipeline.c33
-rw-r--r--src/amd/vulkan/radv_private.h4
-rw-r--r--src/amd/vulkan/radv_shader.c131
-rw-r--r--src/amd/vulkan/radv_shader.h5
11 files changed, 205 insertions, 103 deletions
diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index 64942670b6c..ddc8fee839b 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -49,6 +49,9 @@ static void ac_init_llvm_target()
/* For inline assembly. */
LLVMInitializeAMDGPUAsmParser();
+ /* For ACO disassembly. */
+ LLVMInitializeAMDGPUDisassembler();
+
/* Workaround for bug in llvm 4.0 that causes image intrinsics
* to disappear.
* https://reviews.llvm.org/D26348
diff --git a/src/amd/meson.build b/src/amd/meson.build
index f96a9aac095..1e459b26c1a 100644
--- a/src/amd/meson.build
+++ b/src/amd/meson.build
@@ -22,6 +22,7 @@ inc_amd = include_directories('.')
subdir('addrlib')
subdir('common')
+subdir('compiler')
if with_amd_vk
subdir('vulkan')
endif
diff --git a/src/amd/vulkan/meson.build b/src/amd/vulkan/meson.build
index 9278f1144d9..72cb64c5847 100644
--- a/src/amd/vulkan/meson.build
+++ b/src/amd/vulkan/meson.build
@@ -156,7 +156,7 @@ libvulkan_radeon = shared_library(
],
dependencies : [
dep_llvm, dep_libdrm_amdgpu, dep_thread, dep_elf, dep_dl, dep_m,
- dep_valgrind, radv_deps,
+ dep_valgrind, radv_deps, idep_aco,
idep_mesautil, idep_nir, idep_vulkan_util, idep_amdgfxregs_h, idep_xmlconfig,
],
c_args : [c_vis_args, no_override_init_args, radv_flags],
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 6937eeacc49..f35053b8695 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -2844,6 +2844,10 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
break;
case VK_ACCESS_SHADER_READ_BIT:
flush_bits |= RADV_CMD_FLAG_INV_VCACHE;
+ /* Unlike LLVM, ACO uses SMEM for SSBOs and we have to
+ * invalidate the scalar cache. */
+ if (cmd_buffer->device->physical_device->use_aco)
+ flush_bits |= RADV_CMD_FLAG_INV_SCACHE;
if (!image_is_coherent)
flush_bits |= RADV_CMD_FLAG_INV_L2;
diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h
index 6276589d025..ca71d535f2a 100644
--- a/src/amd/vulkan/radv_debug.h
+++ b/src/amd/vulkan/radv_debug.h
@@ -58,18 +58,19 @@ enum {
};
enum {
- RADV_PERFTEST_NO_BATCHCHAIN = 0x1,
- RADV_PERFTEST_SISCHED = 0x2,
- RADV_PERFTEST_LOCAL_BOS = 0x4,
- RADV_PERFTEST_OUT_OF_ORDER = 0x8,
- RADV_PERFTEST_DCC_MSAA = 0x10,
- RADV_PERFTEST_BO_LIST = 0x20,
- RADV_PERFTEST_SHADER_BALLOT = 0x40,
- RADV_PERFTEST_TC_COMPAT_CMASK = 0x80,
- RADV_PERFTEST_CS_WAVE_32 = 0x100,
- RADV_PERFTEST_PS_WAVE_32 = 0x200,
- RADV_PERFTEST_GE_WAVE_32 = 0x400,
- RADV_PERFTEST_DFSM = 0x800,
+ RADV_PERFTEST_NO_BATCHCHAIN = 0x1,
+ RADV_PERFTEST_SISCHED = 0x2,
+ RADV_PERFTEST_LOCAL_BOS = 0x4,
+ RADV_PERFTEST_OUT_OF_ORDER = 0x8,
+ RADV_PERFTEST_DCC_MSAA = 0x10,
+ RADV_PERFTEST_BO_LIST = 0x20,
+ RADV_PERFTEST_SHADER_BALLOT = 0x40,
+ RADV_PERFTEST_TC_COMPAT_CMASK = 0x80,
+ RADV_PERFTEST_CS_WAVE_32 = 0x100,
+ RADV_PERFTEST_PS_WAVE_32 = 0x200,
+ RADV_PERFTEST_GE_WAVE_32 = 0x400,
+ RADV_PERFTEST_DFSM = 0x800,
+ RADV_PERFTEST_ACO = 0x1000,
};
bool
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index af8607c1559..26de979b64f 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -86,41 +86,41 @@ radv_get_device_uuid(struct radeon_info *info, void *uuid)
}
static void
-radv_get_device_name(enum radeon_family family, char *name, size_t name_len)
+radv_get_device_name(enum radeon_family family, char *name, size_t name_len, bool aco)
{
const char *chip_string;
switch (family) {
- case CHIP_TAHITI: chip_string = "AMD RADV TAHITI"; break;
- case CHIP_PITCAIRN: chip_string = "AMD RADV PITCAIRN"; break;
- case CHIP_VERDE: chip_string = "AMD RADV CAPE VERDE"; break;
- case CHIP_OLAND: chip_string = "AMD RADV OLAND"; break;
- case CHIP_HAINAN: chip_string = "AMD RADV HAINAN"; break;
- case CHIP_BONAIRE: chip_string = "AMD RADV BONAIRE"; break;
- case CHIP_KAVERI: chip_string = "AMD RADV KAVERI"; break;
- case CHIP_KABINI: chip_string = "AMD RADV KABINI"; break;
- case CHIP_HAWAII: chip_string = "AMD RADV HAWAII"; break;
- case CHIP_TONGA: chip_string = "AMD RADV TONGA"; break;
- case CHIP_ICELAND: chip_string = "AMD RADV ICELAND"; break;
- case CHIP_CARRIZO: chip_string = "AMD RADV CARRIZO"; break;
- case CHIP_FIJI: chip_string = "AMD RADV FIJI"; break;
- case CHIP_POLARIS10: chip_string = "AMD RADV POLARIS10"; break;
- case CHIP_POLARIS11: chip_string = "AMD RADV POLARIS11"; break;
- case CHIP_POLARIS12: chip_string = "AMD RADV POLARIS12"; break;
- case CHIP_STONEY: chip_string = "AMD RADV STONEY"; break;
- case CHIP_VEGAM: chip_string = "AMD RADV VEGA M"; break;
- case CHIP_VEGA10: chip_string = "AMD RADV VEGA10"; break;
- case CHIP_VEGA12: chip_string = "AMD RADV VEGA12"; break;
- case CHIP_VEGA20: chip_string = "AMD RADV VEGA20"; break;
- case CHIP_RAVEN: chip_string = "AMD RADV RAVEN"; break;
- case CHIP_RAVEN2: chip_string = "AMD RADV RAVEN2"; break;
- case CHIP_NAVI10: chip_string = "AMD RADV NAVI10"; break;
- case CHIP_NAVI12: chip_string = "AMD RADV NAVI12"; break;
- case CHIP_NAVI14: chip_string = "AMD RADV NAVI14"; break;
- default: chip_string = "AMD RADV unknown"; break;
- }
-
- snprintf(name, name_len, "%s (LLVM " MESA_LLVM_VERSION_STRING ")", chip_string);
+ case CHIP_TAHITI: chip_string = "TAHITI"; break;
+ case CHIP_PITCAIRN: chip_string = "PITCAIRN"; break;
+ case CHIP_VERDE: chip_string = "CAPE VERDE"; break;
+ case CHIP_OLAND: chip_string = "OLAND"; break;
+ case CHIP_HAINAN: chip_string = "HAINAN"; break;
+ case CHIP_BONAIRE: chip_string = "BONAIRE"; break;
+ case CHIP_KAVERI: chip_string = "KAVERI"; break;
+ case CHIP_KABINI: chip_string = "KABINI"; break;
+ case CHIP_HAWAII: chip_string = "HAWAII"; break;
+ case CHIP_TONGA: chip_string = "TONGA"; break;
+ case CHIP_ICELAND: chip_string = "ICELAND"; break;
+ case CHIP_CARRIZO: chip_string = "CARRIZO"; break;
+ case CHIP_FIJI: chip_string = "FIJI"; break;
+ case CHIP_POLARIS10: chip_string = "POLARIS10"; break;
+ case CHIP_POLARIS11: chip_string = "POLARIS11"; break;
+ case CHIP_POLARIS12: chip_string = "POLARIS12"; break;
+ case CHIP_STONEY: chip_string = "STONEY"; break;
+ case CHIP_VEGAM: chip_string = "VEGA M"; break;
+ case CHIP_VEGA10: chip_string = "VEGA10"; break;
+ case CHIP_VEGA12: chip_string = "VEGA12"; break;
+ case CHIP_VEGA20: chip_string = "VEGA20"; break;
+ case CHIP_RAVEN: chip_string = "RAVEN"; break;
+ case CHIP_RAVEN2: chip_string = "RAVEN2"; break;
+ case CHIP_NAVI10: chip_string = "NAVI10"; break;
+ case CHIP_NAVI12: chip_string = "NAVI12"; break;
+ case CHIP_NAVI14: chip_string = "NAVI14"; break;
+ default: chip_string = "unknown"; break;
+ }
+
+ snprintf(name, name_len, "AMD RADV%s %s (LLVM " MESA_LLVM_VERSION_STRING ")", aco ? "/ACO" : "", chip_string);
}
static uint64_t
@@ -327,7 +327,14 @@ radv_physical_device_init(struct radv_physical_device *device,
radv_handle_env_var_force_family(device);
- radv_get_device_name(device->rad_info.family, device->name, sizeof(device->name));
+ device->use_aco = instance->perftest_flags & RADV_PERFTEST_ACO;
+ if ((device->rad_info.chip_class < GFX8 ||
+ device->rad_info.chip_class > GFX9) && device->use_aco) {
+ fprintf(stderr, "WARNING: disabling ACO on unsupported GPUs.\n");
+ device->use_aco = false;
+ }
+
+ radv_get_device_name(device->rad_info.family, device->name, sizeof(device->name), device->use_aco);
if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
device->ws->destroy(device->ws);
@@ -339,7 +346,8 @@ radv_physical_device_init(struct radv_physical_device *device,
/* These flags affect shader compilation. */
uint64_t shader_env_flags =
(device->instance->perftest_flags & RADV_PERFTEST_SISCHED ? 0x1 : 0) |
- (device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0);
+ (device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0) |
+ (device->use_aco ? 0x4 : 0);
/* The gpu id is already embedded in the uuid so we just pass "radv"
* when creating the cache.
@@ -362,9 +370,10 @@ radv_physical_device_init(struct radv_physical_device *device,
(device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
device->use_shader_ballot = device->rad_info.chip_class >= GFX8 &&
- device->instance->perftest_flags & RADV_PERFTEST_SHADER_BALLOT;
+ (device->use_aco || device->instance->perftest_flags & RADV_PERFTEST_SHADER_BALLOT);
device->use_ngg_streamout = false;
+ device->use_aco = device->instance->perftest_flags & RADV_PERFTEST_ACO;
/* Determine the number of threads per wave for all stages. */
device->cs_wave_size = 64;
@@ -500,6 +509,7 @@ static const struct debug_control radv_perftest_options[] = {
{"pswave32", RADV_PERFTEST_PS_WAVE_32},
{"gewave32", RADV_PERFTEST_GE_WAVE_32},
{"dfsm", RADV_PERFTEST_DFSM},
+ {"aco", RADV_PERFTEST_ACO},
{NULL, 0}
};
@@ -622,6 +632,8 @@ VkResult radv_CreateInstance(
instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
radv_perftest_options);
+ if (instance->perftest_flags & RADV_PERFTEST_ACO)
+ fprintf(stderr, "WARNING: Experimental compiler backend enabled. Here be dragons! Incorrect rendering, GPU hangs and/or resets are likely\n");
if (instance->debug_flags & RADV_DEBUG_STARTUP)
radv_logi("Created an instance");
@@ -832,7 +844,7 @@ void radv_GetPhysicalDeviceFeatures(
.shaderCullDistance = true,
.shaderFloat64 = true,
.shaderInt64 = true,
- .shaderInt16 = pdevice->rad_info.chip_class >= GFX9,
+ .shaderInt16 = pdevice->rad_info.chip_class >= GFX9 && !pdevice->use_aco,
.sparseBinding = true,
.variableMultisampleRate = true,
.inheritedQueries = true,
@@ -874,7 +886,7 @@ void radv_GetPhysicalDeviceFeatures2(
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
VkPhysicalDevice16BitStorageFeatures *features =
(VkPhysicalDevice16BitStorageFeatures*)ext;
- bool enabled = pdevice->rad_info.chip_class >= GFX8;
+ bool enabled = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_aco;
features->storageBuffer16BitAccess = enabled;
features->uniformAndStorageBuffer16BitAccess = enabled;
features->storagePushConstant16 = enabled;
@@ -968,7 +980,7 @@ void radv_GetPhysicalDeviceFeatures2(
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR: {
VkPhysicalDevice8BitStorageFeaturesKHR *features =
(VkPhysicalDevice8BitStorageFeaturesKHR*)ext;
- bool enabled = pdevice->rad_info.chip_class >= GFX8;
+ bool enabled = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_aco;
features->storageBuffer8BitAccess = enabled;
features->uniformAndStorageBuffer8BitAccess = enabled;
features->storagePushConstant8 = enabled;
@@ -977,8 +989,8 @@ void radv_GetPhysicalDeviceFeatures2(
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR: {
VkPhysicalDeviceFloat16Int8FeaturesKHR *features =
(VkPhysicalDeviceFloat16Int8FeaturesKHR*)ext;
- features->shaderFloat16 = pdevice->rad_info.chip_class >= GFX8;
- features->shaderInt8 = true;
+ features->shaderFloat16 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_aco;
+ features->shaderInt8 = !pdevice->use_aco;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR: {
diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py
index 4bb4c4dfc23..eb2505ba765 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -51,7 +51,7 @@ class Extension:
# and dEQP-VK.api.info.device fail due to the duplicated strings.
EXTENSIONS = [
Extension('VK_ANDROID_native_buffer', 5, 'ANDROID && device->rad_info.has_syncobj_wait_for_submit'),
- Extension('VK_KHR_16bit_storage', 1, True),
+ Extension('VK_KHR_16bit_storage', 1, '!device->use_aco'),
Extension('VK_KHR_bind_memory2', 1, True),
Extension('VK_KHR_create_renderpass2', 1, True),
Extension('VK_KHR_dedicated_allocation', 1, True),
@@ -87,7 +87,7 @@ EXTENSIONS = [
Extension('VK_KHR_sampler_ycbcr_conversion', 1, True),
Extension('VK_KHR_shader_atomic_int64', 1, 'LLVM_VERSION_MAJOR >= 9'),
Extension('VK_KHR_shader_draw_parameters', 1, True),
- Extension('VK_KHR_shader_float16_int8', 1, True),
+ Extension('VK_KHR_shader_float16_int8', 1, '!device->use_aco'),
Extension('VK_KHR_storage_buffer_storage_class', 1, True),
Extension('VK_KHR_surface', 25, 'RADV_HAS_SURFACE'),
Extension('VK_KHR_surface_protected_capabilities', 1, 'RADV_HAS_SURFACE'),
@@ -99,7 +99,7 @@ EXTENSIONS = [
Extension('VK_KHR_xlib_surface', 6, 'VK_USE_PLATFORM_XLIB_KHR'),
Extension('VK_KHR_multiview', 1, True),
Extension('VK_KHR_display', 23, 'VK_USE_PLATFORM_DISPLAY_KHR'),
- Extension('VK_KHR_8bit_storage', 1, 'device->rad_info.chip_class >= GFX8'),
+ Extension('VK_KHR_8bit_storage', 1, 'device->rad_info.chip_class >= GFX8 && !device->use_aco'),
Extension('VK_EXT_direct_mode_display', 1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
Extension('VK_EXT_acquire_xlib_display', 1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
Extension('VK_EXT_buffer_device_address', 1, True),
@@ -138,8 +138,8 @@ EXTENSIONS = [
Extension('VK_AMD_buffer_marker', 1, True),
Extension('VK_AMD_draw_indirect_count', 1, True),
Extension('VK_AMD_gcn_shader', 1, True),
- Extension('VK_AMD_gpu_shader_half_float', 1, 'device->rad_info.chip_class >= GFX9'),
- Extension('VK_AMD_gpu_shader_int16', 1, 'device->rad_info.chip_class >= GFX9'),
+ Extension('VK_AMD_gpu_shader_half_float', 1, '!device->use_aco && device->rad_info.chip_class >= GFX9'),
+ Extension('VK_AMD_gpu_shader_int16', 1, '!device->use_aco && device->rad_info.chip_class >= GFX9'),
Extension('VK_AMD_rasterization_order', 1, 'device->rad_info.has_out_of_order_rast'),
Extension('VK_AMD_shader_ballot', 1, 'device->use_shader_ballot'),
Extension('VK_AMD_shader_core_properties', 1, True),
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 5036fa69d20..70ffc2412b3 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -167,6 +167,8 @@ static uint32_t get_hash_flags(struct radv_device *device)
hash_flags |= RADV_HASH_SHADER_PS_WAVE32;
if (device->physical_device->ge_wave_size == 32)
hash_flags |= RADV_HASH_SHADER_GE_WAVE32;
+ if (device->physical_device->use_aco)
+ hash_flags |= RADV_HASH_SHADER_ACO;
return hash_flags;
}
@@ -2552,6 +2554,14 @@ void radv_stop_feedback(VkPipelineCreationFeedbackEXT *feedback, bool cache_hit)
}
static
+bool radv_aco_supported_stage(gl_shader_stage stage, bool has_gs, bool has_ts)
+{
+ return (stage == MESA_SHADER_VERTEX && !has_gs && !has_ts) ||
+ stage == MESA_SHADER_FRAGMENT ||
+ stage == MESA_SHADER_COMPUTE;
+}
+
+static
void radv_create_shaders(struct radv_pipeline *pipeline,
struct radv_device *device,
struct radv_pipeline_cache *cache,
@@ -2613,6 +2623,10 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
modules[MESA_SHADER_FRAGMENT] = &fs_m;
}
+ bool has_gs = modules[MESA_SHADER_GEOMETRY];
+ bool has_ts = modules[MESA_SHADER_TESS_CTRL] || modules[MESA_SHADER_TESS_EVAL];
+ bool use_aco = device->physical_device->use_aco;
+
for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
const VkPipelineShaderStageCreateInfo *stage = pStages[i];
@@ -2621,10 +2635,11 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
radv_start_feedback(stage_feedbacks[i]);
+ bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts);
nir[i] = radv_shader_compile_to_nir(device, modules[i],
stage ? stage->pName : "main", i,
stage ? stage->pSpecializationInfo : NULL,
- flags, pipeline->layout);
+ flags, pipeline->layout, aco);
/* We don't want to alter meta shaders IR directly so clone it
* first.
@@ -2651,7 +2666,10 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
nir_lower_non_uniform_ssbo_access |
nir_lower_non_uniform_texture_access |
nir_lower_non_uniform_image_access);
- NIR_PASS_V(nir[i], nir_lower_bool_to_int32);
+
+ bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts);
+ if (!aco)
+ NIR_PASS_V(nir[i], nir_lower_bool_to_int32);
}
if (radv_can_dump_shader(device, modules[i], false))
@@ -2690,11 +2708,13 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
if (!pipeline->shaders[MESA_SHADER_FRAGMENT]) {
radv_start_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT]);
+ bool aco = use_aco && radv_aco_supported_stage(MESA_SHADER_FRAGMENT, has_gs, has_ts);
pipeline->shaders[MESA_SHADER_FRAGMENT] =
radv_shader_variant_compile(device, modules[MESA_SHADER_FRAGMENT], &nir[MESA_SHADER_FRAGMENT], 1,
pipeline->layout, keys + MESA_SHADER_FRAGMENT,
infos + MESA_SHADER_FRAGMENT,
- keep_executable_info, &binaries[MESA_SHADER_FRAGMENT]);
+ keep_executable_info, aco,
+ &binaries[MESA_SHADER_FRAGMENT]);
radv_stop_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT], false);
}
@@ -2725,7 +2745,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
pipeline->shaders[MESA_SHADER_TESS_CTRL] = radv_shader_variant_compile(device, modules[MESA_SHADER_TESS_CTRL], combined_nir, 2,
pipeline->layout,
&key, &infos[MESA_SHADER_TESS_CTRL], keep_executable_info,
- &binaries[MESA_SHADER_TESS_CTRL]);
+ false, &binaries[MESA_SHADER_TESS_CTRL]);
radv_stop_feedback(stage_feedbacks[MESA_SHADER_TESS_CTRL], false);
}
@@ -2744,7 +2764,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
pipeline->shaders[MESA_SHADER_GEOMETRY] = radv_shader_variant_compile(device, modules[MESA_SHADER_GEOMETRY], combined_nir, 2,
pipeline->layout,
&keys[pre_stage], &infos[MESA_SHADER_GEOMETRY], keep_executable_info,
- &binaries[MESA_SHADER_GEOMETRY]);
+ false, &binaries[MESA_SHADER_GEOMETRY]);
radv_stop_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY], false);
}
@@ -2763,10 +2783,11 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
radv_start_feedback(stage_feedbacks[i]);
+ bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts);
pipeline->shaders[i] = radv_shader_variant_compile(device, modules[i], &nir[i], 1,
pipeline->layout,
keys + i, infos + i,keep_executable_info,
- &binaries[i]);
+ aco, &binaries[i]);
radv_stop_feedback(stage_feedbacks[i], false);
}
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 03dc9e02145..0a3e7ca9d88 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -296,6 +296,9 @@ struct radv_physical_device {
uint8_t cs_wave_size;
uint8_t ge_wave_size;
+ /* Whether to use the experimental compiler backend */
+ bool use_aco;
+
/* This is the drivers on-disk cache used as a fallback as opposed to
* the pipeline cache defined by apps.
*/
@@ -1421,6 +1424,7 @@ struct radv_shader_module;
#define RADV_HASH_SHADER_CS_WAVE32 (1 << 4)
#define RADV_HASH_SHADER_PS_WAVE32 (1 << 5)
#define RADV_HASH_SHADER_GE_WAVE32 (1 << 6)
+#define RADV_HASH_SHADER_ACO (1 << 7)
void
radv_hash_shaders(unsigned char *hash,
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 98abe8cd437..2bd4c351745 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -48,9 +48,11 @@
#include "util/debug.h"
#include "ac_exp_param.h"
+#include "aco_interface.h"
+
#include "util/string_buffer.h"
-static const struct nir_shader_compiler_options nir_options = {
+static const struct nir_shader_compiler_options nir_options_llvm = {
.vertex_id_zero_based = true,
.lower_scmp = true,
.lower_flrp16 = true,
@@ -80,6 +82,36 @@ static const struct nir_shader_compiler_options nir_options = {
.use_interpolated_input_intrinsics = true,
};
+static const struct nir_shader_compiler_options nir_options_aco = {
+ .vertex_id_zero_based = true,
+ .lower_scmp = true,
+ .lower_flrp16 = true,
+ .lower_flrp32 = true,
+ .lower_flrp64 = true,
+ .lower_device_index_to_zero = true,
+ .lower_fdiv = true,
+ .lower_bitfield_insert_to_bitfield_select = true,
+ .lower_bitfield_extract = true,
+ .lower_sub = true, /* TODO: set this to false once !1236 is merged */
+ .lower_pack_snorm_2x16 = true,
+ .lower_pack_snorm_4x8 = true,
+ .lower_pack_unorm_2x16 = true,
+ .lower_pack_unorm_4x8 = true,
+ .lower_unpack_snorm_2x16 = true,
+ .lower_unpack_snorm_4x8 = true,
+ .lower_unpack_unorm_2x16 = true,
+ .lower_unpack_unorm_4x8 = true,
+ .lower_unpack_half_2x16 = true,
+ .lower_extract_byte = true,
+ .lower_extract_word = true,
+ .lower_ffma = true,
+ .lower_fpow = true,
+ .lower_mul_2x32_64 = true,
+ .lower_rotate = true,
+ .max_unroll_iterations = 32,
+ .use_interpolated_input_intrinsics = true,
+};
+
bool
radv_can_dump_shader(struct radv_device *device,
struct radv_shader_module *module,
@@ -257,15 +289,18 @@ radv_shader_compile_to_nir(struct radv_device *device,
gl_shader_stage stage,
const VkSpecializationInfo *spec_info,
const VkPipelineCreateFlags flags,
- const struct radv_pipeline_layout *layout)
+ const struct radv_pipeline_layout *layout,
+ bool use_aco)
{
nir_shader *nir;
+ const nir_shader_compiler_options *nir_options = use_aco ? &nir_options_aco :
+ &nir_options_llvm;
if (module->nir) {
/* Some things such as our meta clear/blit code will give us a NIR
* shader directly. In that case, we just ignore the SPIR-V entirely
* and just use the NIR shader */
nir = module->nir;
- nir->options = &nir_options;
+ nir->options = nir_options;
nir_validate_shader(nir, "in internal shader");
assert(exec_list_length(&nir->functions) == 1);
@@ -305,13 +340,13 @@ radv_shader_compile_to_nir(struct radv_device *device,
.descriptor_indexing = true,
.device_group = true,
.draw_parameters = true,
- .float16 = true,
+ .float16 = !device->physical_device->use_aco,
.float64 = true,
.geometry_streams = true,
.image_read_without_format = true,
.image_write_without_format = true,
- .int8 = true,
- .int16 = true,
+ .int8 = !device->physical_device->use_aco,
+ .int16 = !device->physical_device->use_aco,
.int64 = true,
.int64_atomics = true,
.multiview = true,
@@ -320,8 +355,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
.runtime_descriptor_array = true,
.shader_viewport_index_layer = true,
.stencil_export = true,
- .storage_8bit = true,
- .storage_16bit = true,
+ .storage_8bit = !device->physical_device->use_aco,
+ .storage_16bit = !device->physical_device->use_aco,
.storage_image_ms = true,
.subgroup_arithmetic = true,
.subgroup_ballot = true,
@@ -343,7 +378,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
nir = spirv_to_nir(spirv, module->size / 4,
spec_entries, num_spec_entries,
stage, entrypoint_name,
- &spirv_options, &nir_options);
+ &spirv_options, nir_options);
assert(nir->info.stage == stage);
nir_validate_shader(nir, "after spirv_to_nir");
@@ -383,6 +418,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
NIR_PASS_V(nir, nir_split_var_copies);
NIR_PASS_V(nir, nir_split_per_member_structs);
+ if (nir->info.stage == MESA_SHADER_FRAGMENT && use_aco)
+ NIR_PASS_V(nir, nir_lower_io_to_vector, nir_var_shader_out);
if (nir->info.stage == MESA_SHADER_FRAGMENT)
NIR_PASS_V(nir, nir_lower_input_attachments, true);
@@ -961,7 +998,7 @@ radv_shader_variant_create(struct radv_device *device,
assert(binary->type == RADV_BINARY_TYPE_LEGACY);
config = ((struct radv_shader_binary_legacy *)binary)->config;
variant->code_size = radv_get_shader_binary_size(((struct radv_shader_binary_legacy *)binary)->code_size);
- variant->exec_size = variant->code_size;
+ variant->exec_size = ((struct radv_shader_binary_legacy *)binary)->exec_size;
}
variant->info = binary->info;
@@ -1049,13 +1086,12 @@ shader_variant_compile(struct radv_device *device,
struct radv_nir_compiler_options *options,
bool gs_copy_shader,
bool keep_shader_info,
+ bool use_aco,
struct radv_shader_binary **binary_out)
{
enum radeon_family chip_family = device->physical_device->rad_info.family;
- enum ac_target_machine_options tm_options = 0;
- struct ac_llvm_compiler ac_llvm;
struct radv_shader_binary *binary = NULL;
- bool thread_compiler;
+ bool init_llvm;
options->family = chip_family;
options->chip_class = device->physical_device->rad_info.chip_class;
@@ -1079,32 +1115,48 @@ shader_variant_compile(struct radv_device *device,
else
options->wave_size = device->physical_device->ge_wave_size;
- if (options->supports_spill)
- tm_options |= AC_TM_SUPPORTS_SPILL;
- if (device->instance->perftest_flags & RADV_PERFTEST_SISCHED)
- tm_options |= AC_TM_SISCHED;
- if (options->check_ir)
- tm_options |= AC_TM_CHECK_IR;
- if (device->instance->debug_flags & RADV_DEBUG_NO_LOAD_STORE_OPT)
- tm_options |= AC_TM_NO_LOAD_STORE_OPT;
-
- thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM);
- ac_init_llvm_once();
- radv_init_llvm_compiler(&ac_llvm,
- thread_compiler,
- chip_family, tm_options,
- options->wave_size);
- if (gs_copy_shader) {
- assert(shader_count == 1);
- radv_compile_gs_copy_shader(&ac_llvm, *shaders, &binary,
- info, options);
+ init_llvm = !use_aco || options->dump_shader;
+#ifndef NDEBUG
+ init_llvm |= options->record_llvm_ir;
+#endif
+ if (init_llvm)
+ ac_init_llvm_once();
+
+ if (use_aco) {
+ aco_compile_shader(shader_count, shaders, &binary, info, options);
+ binary->info = *info;
} else {
- radv_compile_nir_shader(&ac_llvm, &binary, info,
- shaders, shader_count, options);
- }
- binary->info = *info;
+ enum ac_target_machine_options tm_options = 0;
+ struct ac_llvm_compiler ac_llvm;
+ bool thread_compiler;
+
+ if (options->supports_spill)
+ tm_options |= AC_TM_SUPPORTS_SPILL;
+ if (device->instance->perftest_flags & RADV_PERFTEST_SISCHED)
+ tm_options |= AC_TM_SISCHED;
+ if (options->check_ir)
+ tm_options |= AC_TM_CHECK_IR;
+ if (device->instance->debug_flags & RADV_DEBUG_NO_LOAD_STORE_OPT)
+ tm_options |= AC_TM_NO_LOAD_STORE_OPT;
+
+ thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM);
+ radv_init_llvm_compiler(&ac_llvm,
+ thread_compiler,
+ chip_family, tm_options,
+ options->wave_size);
+
+ if (gs_copy_shader) {
+ assert(shader_count == 1);
+ radv_compile_gs_copy_shader(&ac_llvm, *shaders, &binary,
+ info, options);
+ } else {
+ radv_compile_nir_shader(&ac_llvm, &binary, info,
+ shaders, shader_count, options);
+ }
- radv_destroy_llvm_compiler(&ac_llvm, thread_compiler);
+ binary->info = *info;
+ radv_destroy_llvm_compiler(&ac_llvm, thread_compiler);
+ }
struct radv_shader_variant *variant = radv_shader_variant_create(device, binary,
keep_shader_info);
@@ -1143,6 +1195,7 @@ radv_shader_variant_compile(struct radv_device *device,
const struct radv_shader_variant_key *key,
struct radv_shader_info *info,
bool keep_shader_info,
+ bool use_aco,
struct radv_shader_binary **binary_out)
{
struct radv_nir_compiler_options options = {0};
@@ -1156,7 +1209,7 @@ radv_shader_variant_compile(struct radv_device *device,
options.robust_buffer_access = device->robust_buffer_access;
return shader_variant_compile(device, module, shaders, shader_count, shaders[shader_count - 1]->info.stage, info,
- &options, false, keep_shader_info, binary_out);
+ &options, false, keep_shader_info, use_aco, binary_out);
}
struct radv_shader_variant *
@@ -1172,7 +1225,7 @@ radv_create_gs_copy_shader(struct radv_device *device,
options.key.has_multiview_view_index = multiview;
return shader_variant_compile(device, NULL, &shader, 1, MESA_SHADER_VERTEX,
- info, &options, true, keep_shader_info, binary_out);
+ info, &options, true, keep_shader_info, false, binary_out);
}
void
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index 57f9d160ed0..459ff863a91 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -333,6 +333,7 @@ struct radv_shader_binary_legacy {
struct radv_shader_binary base;
struct ac_shader_config config;
unsigned code_size;
+ unsigned exec_size;
unsigned llvm_ir_size;
unsigned disasm_size;
@@ -390,7 +391,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
gl_shader_stage stage,
const VkSpecializationInfo *spec_info,
const VkPipelineCreateFlags flags,
- const struct radv_pipeline_layout *layout);
+ const struct radv_pipeline_layout *layout,
+ bool use_aco);
void *
radv_alloc_shader_memory(struct radv_device *device,
@@ -412,6 +414,7 @@ radv_shader_variant_compile(struct radv_device *device,
const struct radv_shader_variant_key *key,
struct radv_shader_info *info,
bool keep_shader_info,
+ bool use_aco,
struct radv_shader_binary **binary_out);
struct radv_shader_variant *