/* * Copyright © 2020 Valve Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * */ #include "aco_builder.h" #include "aco_ir.h" #include namespace aco { namespace { /* there can also be LDS and VALU clauses, but I don't see how those are interesting */ enum clause_type { clause_smem, clause_other, /* GFX10: */ clause_vmem, clause_flat, /* GFX11: */ clause_mimg_load, clause_mimg_store, clause_mimg_atomic, clause_mimg_sample, clause_vmem_load, clause_vmem_store, clause_vmem_atomic, clause_flat_load, clause_flat_store, clause_flat_atomic, clause_bvh, }; void emit_clause(Builder& bld, unsigned num_instrs, aco_ptr* instrs) { unsigned start = 0; /* skip any stores at the start */ for (; (start < num_instrs) && instrs[start]->definitions.empty(); start++) bld.insert(std::move(instrs[start])); unsigned end = start; for (; (end < num_instrs) && !instrs[end]->definitions.empty(); end++) ; unsigned clause_size = end - start; if (clause_size > 1) bld.sopp(aco_opcode::s_clause, -1, clause_size - 1); for (unsigned i = start; i < num_instrs; i++) bld.insert(std::move(instrs[i])); } clause_type get_type(Program* program, aco_ptr& instr) { if (instr->isSMEM() && !instr->operands.empty()) return clause_smem; if (program->gfx_level >= GFX11) { if (instr->isMIMG()) { switch (instr->opcode) { case aco_opcode::image_bvh_intersect_ray: case aco_opcode::image_bvh64_intersect_ray: return clause_bvh; case aco_opcode::image_atomic_swap: case aco_opcode::image_atomic_cmpswap: case aco_opcode::image_atomic_add: case aco_opcode::image_atomic_sub: case aco_opcode::image_atomic_rsub: case aco_opcode::image_atomic_smin: case aco_opcode::image_atomic_umin: case aco_opcode::image_atomic_smax: case aco_opcode::image_atomic_umax: case aco_opcode::image_atomic_and: case aco_opcode::image_atomic_or: case aco_opcode::image_atomic_xor: case aco_opcode::image_atomic_inc: case aco_opcode::image_atomic_dec: case aco_opcode::image_atomic_fcmpswap: case aco_opcode::image_atomic_fmin: case aco_opcode::image_atomic_fmax: return clause_mimg_atomic; default: if (instr->definitions.empty()) return clause_mimg_store; else return !instr->operands[1].isUndefined() && instr->operands[1].regClass() == s4 ? clause_mimg_sample : clause_mimg_load; } } else if (instr->isMTBUF() || instr->isScratch()) { return instr->definitions.empty() ? clause_vmem_store : clause_vmem_load; } else if (instr->isMUBUF()) { switch (instr->opcode) { case aco_opcode::buffer_atomic_add: case aco_opcode::buffer_atomic_and_x2: case aco_opcode::buffer_atomic_rsub: case aco_opcode::buffer_atomic_umax: case aco_opcode::buffer_atomic_dec: case aco_opcode::buffer_atomic_smax: case aco_opcode::buffer_atomic_fmax: case aco_opcode::buffer_atomic_rsub_x2: case aco_opcode::buffer_atomic_smin: case aco_opcode::buffer_atomic_sub: case aco_opcode::buffer_atomic_sub_x2: case aco_opcode::buffer_atomic_xor_x2: case aco_opcode::buffer_atomic_add_f32: case aco_opcode::buffer_atomic_inc: case aco_opcode::buffer_atomic_swap_x2: case aco_opcode::buffer_atomic_cmpswap: case aco_opcode::buffer_atomic_fmin_x2: case aco_opcode::buffer_atomic_umin: case aco_opcode::buffer_atomic_or: case aco_opcode::buffer_atomic_umax_x2: case aco_opcode::buffer_atomic_smin_x2: case aco_opcode::buffer_atomic_umin_x2: case aco_opcode::buffer_atomic_cmpswap_x2: case aco_opcode::buffer_atomic_add_x2: case aco_opcode::buffer_atomic_swap: case aco_opcode::buffer_atomic_and: case aco_opcode::buffer_atomic_fmin: case aco_opcode::buffer_atomic_fcmpswap_x2: case aco_opcode::buffer_atomic_or_x2: case aco_opcode::buffer_atomic_fcmpswap: case aco_opcode::buffer_atomic_xor: case aco_opcode::buffer_atomic_dec_x2: case aco_opcode::buffer_atomic_fmax_x2: case aco_opcode::buffer_atomic_csub: case aco_opcode::buffer_atomic_inc_x2: case aco_opcode::buffer_atomic_smax_x2: return clause_vmem_atomic; default: return instr->definitions.empty() ? clause_vmem_store : clause_vmem_load; } } else if (instr->isGlobal()) { switch (instr->opcode) { case aco_opcode::global_atomic_swap: case aco_opcode::global_atomic_umax: case aco_opcode::global_atomic_cmpswap: case aco_opcode::global_atomic_and_x2: case aco_opcode::global_atomic_fmax: case aco_opcode::global_atomic_smax_x2: case aco_opcode::global_atomic_fmax_x2: case aco_opcode::global_atomic_dec: case aco_opcode::global_atomic_dec_x2: case aco_opcode::global_atomic_umin: case aco_opcode::global_atomic_fcmpswap_x2: case aco_opcode::global_atomic_inc: case aco_opcode::global_atomic_and: case aco_opcode::global_atomic_fmin: case aco_opcode::global_atomic_fcmpswap: case aco_opcode::global_atomic_or_x2: case aco_opcode::global_atomic_smax: case aco_opcode::global_atomic_sub: case aco_opcode::global_atomic_xor: case aco_opcode::global_atomic_swap_x2: case aco_opcode::global_atomic_umax_x2: case aco_opcode::global_atomic_umin_x2: case aco_opcode::global_atomic_xor_x2: case aco_opcode::global_atomic_inc_x2: case aco_opcode::global_atomic_fmin_x2: case aco_opcode::global_atomic_add_f32: case aco_opcode::global_atomic_add: case aco_opcode::global_atomic_or: case aco_opcode::global_atomic_add_x2: case aco_opcode::global_atomic_smin_x2: case aco_opcode::global_atomic_smin: case aco_opcode::global_atomic_csub: case aco_opcode::global_atomic_sub_x2: case aco_opcode::global_atomic_cmpswap_x2: return clause_vmem_atomic; default: return instr->definitions.empty() ? clause_vmem_store : clause_vmem_load; } } else if (instr->isFlat()) { switch (instr->opcode) { case aco_opcode::flat_atomic_smax: case aco_opcode::flat_atomic_fcmpswap_x2: case aco_opcode::flat_atomic_inc_x2: case aco_opcode::flat_atomic_dec: case aco_opcode::flat_atomic_fmin: case aco_opcode::flat_atomic_umax_x2: case aco_opcode::flat_atomic_add_f32: case aco_opcode::flat_atomic_or: case aco_opcode::flat_atomic_smax_x2: case aco_opcode::flat_atomic_umin: case aco_opcode::flat_atomic_sub: case aco_opcode::flat_atomic_swap: case aco_opcode::flat_atomic_swap_x2: case aco_opcode::flat_atomic_cmpswap_x2: case aco_opcode::flat_atomic_fcmpswap: case aco_opcode::flat_atomic_add: case aco_opcode::flat_atomic_umin_x2: case aco_opcode::flat_atomic_xor_x2: case aco_opcode::flat_atomic_smin: case aco_opcode::flat_atomic_fmax_x2: case aco_opcode::flat_atomic_cmpswap: case aco_opcode::flat_atomic_dec_x2: case aco_opcode::flat_atomic_sub_x2: case aco_opcode::flat_atomic_add_x2: case aco_opcode::flat_atomic_umax: case aco_opcode::flat_atomic_xor: case aco_opcode::flat_atomic_and_x2: case aco_opcode::flat_atomic_inc: case aco_opcode::flat_atomic_and: case aco_opcode::flat_atomic_fmin_x2: case aco_opcode::flat_atomic_smin_x2: case aco_opcode::flat_atomic_or_x2: case aco_opcode::flat_atomic_fmax: return clause_flat_atomic; default: return instr->definitions.empty() ? clause_flat_store : clause_flat_load; } } } else { if (instr->isVMEM() && !instr->operands.empty()) { if (program->gfx_level == GFX10 && instr->isMIMG() && get_mimg_nsa_dwords(instr.get()) > 0) return clause_other; else return clause_vmem; } else if (instr->isScratch() || instr->isGlobal()) { return clause_vmem; } else if (instr->isFlat()) { return clause_flat; } } return clause_other; } } /* end namespace */ void form_hard_clauses(Program* program) { for (Block& block : program->blocks) { unsigned num_instrs = 0; aco_ptr current_instrs[63]; clause_type current_type = clause_other; std::vector> new_instructions; new_instructions.reserve(block.instructions.size()); Builder bld(program, &new_instructions); for (unsigned i = 0; i < block.instructions.size(); i++) { aco_ptr& instr = block.instructions[i]; clause_type type = get_type(program, instr); if (type != current_type || num_instrs == 63 || (num_instrs && !should_form_clause(current_instrs[0].get(), instr.get()))) { emit_clause(bld, num_instrs, current_instrs); num_instrs = 0; current_type = type; } if (type == clause_other) { bld.insert(std::move(instr)); continue; } current_instrs[num_instrs++] = std::move(instr); } emit_clause(bld, num_instrs, current_instrs); block.instructions = std::move(new_instructions); } } } // namespace aco