diff options
author | Ronak Chauhan <RonakNilesh.Chauhan@amd.com> | 2020-09-09 18:01:28 +0530 |
---|---|---|
committer | Ronak Chauhan <RonakNilesh.Chauhan@amd.com> | 2020-09-09 18:01:28 +0530 |
commit | f078577f31cc96b6e8a064f628f81a376f21e2e2 (patch) | |
tree | 47bd5a77b99d0132b6521b6f5a9fe7e04005bee2 | |
parent | d816499f95d673bbad297d0231cbeaf5efbbc5de (diff) | |
download | llvm-277873ce0f48.tar.gz |
Revert "[AMDGPU] Support disassembly for AMDGPU kernel descriptors"277873ce0f48
This reverts commit 487a80531006add8102d50dbcce4b6fd729ab1f6.
Tests fail on big endian machines.
-rw-r--r-- | llvm/include/llvm/Support/AMDHSAKernelDescriptor.h | 70 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp | 345 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h | 30 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/nop-data.ll | 4 | ||||
-rw-r--r-- | llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s | 37 | ||||
-rw-r--r-- | llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s | 49 | ||||
-rw-r--r-- | llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s | 36 | ||||
-rw-r--r-- | llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s | 58 | ||||
-rw-r--r-- | llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s | 53 | ||||
-rw-r--r-- | llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s | 41 | ||||
-rw-r--r-- | llvm/tools/llvm-objdump/llvm-objdump.cpp | 17 |
11 files changed, 50 insertions, 690 deletions
diff --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h index 48a09ac48005..d1c2147536a7 100644 --- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h +++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h @@ -162,49 +162,39 @@ struct kernel_descriptor_t { uint8_t reserved2[6]; }; -enum : uint32_t { - GROUP_SEGMENT_FIXED_SIZE_OFFSET = 0, - PRIVATE_SEGMENT_FIXED_SIZE_OFFSET = 4, - RESERVED0_OFFSET = 8, - KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET = 16, - RESERVED1_OFFSET = 24, - COMPUTE_PGM_RSRC3_OFFSET = 44, - COMPUTE_PGM_RSRC1_OFFSET = 48, - COMPUTE_PGM_RSRC2_OFFSET = 52, - KERNEL_CODE_PROPERTIES_OFFSET = 56, - RESERVED2_OFFSET = 58, -}; - static_assert( sizeof(kernel_descriptor_t) == 64, "invalid size for kernel_descriptor_t"); -static_assert(offsetof(kernel_descriptor_t, group_segment_fixed_size) == - GROUP_SEGMENT_FIXED_SIZE_OFFSET, - "invalid offset for group_segment_fixed_size"); -static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) == - PRIVATE_SEGMENT_FIXED_SIZE_OFFSET, - "invalid offset for private_segment_fixed_size"); -static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET, - "invalid offset for reserved0"); -static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) == - KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET, - "invalid offset for kernel_code_entry_byte_offset"); -static_assert(offsetof(kernel_descriptor_t, reserved1) == RESERVED1_OFFSET, - "invalid offset for reserved1"); -static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc3) == - COMPUTE_PGM_RSRC3_OFFSET, - "invalid offset for compute_pgm_rsrc3"); -static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc1) == - COMPUTE_PGM_RSRC1_OFFSET, - "invalid offset for compute_pgm_rsrc1"); -static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc2) == - COMPUTE_PGM_RSRC2_OFFSET, - "invalid offset for compute_pgm_rsrc2"); -static_assert(offsetof(kernel_descriptor_t, kernel_code_properties) == - KERNEL_CODE_PROPERTIES_OFFSET, - "invalid offset for kernel_code_properties"); -static_assert(offsetof(kernel_descriptor_t, reserved2) == RESERVED2_OFFSET, - "invalid offset for reserved2"); +static_assert( + offsetof(kernel_descriptor_t, group_segment_fixed_size) == 0, + "invalid offset for group_segment_fixed_size"); +static_assert( + offsetof(kernel_descriptor_t, private_segment_fixed_size) == 4, + "invalid offset for private_segment_fixed_size"); +static_assert( + offsetof(kernel_descriptor_t, reserved0) == 8, + "invalid offset for reserved0"); +static_assert( + offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) == 16, + "invalid offset for kernel_code_entry_byte_offset"); +static_assert( + offsetof(kernel_descriptor_t, reserved1) == 24, + "invalid offset for reserved1"); +static_assert( + offsetof(kernel_descriptor_t, compute_pgm_rsrc3) == 44, + "invalid offset for compute_pgm_rsrc3"); +static_assert( + offsetof(kernel_descriptor_t, compute_pgm_rsrc1) == 48, + "invalid offset for compute_pgm_rsrc1"); +static_assert( + offsetof(kernel_descriptor_t, compute_pgm_rsrc2) == 52, + "invalid offset for compute_pgm_rsrc2"); +static_assert( + offsetof(kernel_descriptor_t, kernel_code_properties) == 56, + "invalid offset for kernel_code_properties"); +static_assert( + offsetof(kernel_descriptor_t, reserved2) == 58, + "invalid offset for reserved2"); } // end namespace amdhsa } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 840208169168..9c2f2e7eecd1 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -34,7 +34,6 @@ #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/AMDHSAKernelDescriptor.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -1217,350 +1216,6 @@ bool AMDGPUDisassembler::isGFX10() const { } //===----------------------------------------------------------------------===// -// AMDGPU specific symbol handling -//===----------------------------------------------------------------------===// -#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \ - do { \ - KdStream << Indent << DIRECTIVE " " \ - << ((FourByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \ - } while (0) - -// NOLINTNEXTLINE(readability-identifier-naming) -MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1( - uint32_t FourByteBuffer, raw_string_ostream &KdStream) const { - using namespace amdhsa; - StringRef Indent = "\t"; - - // We cannot accurately backward compute #VGPRs used from - // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same - // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we - // simply calculate the inverse of what the assembler does. - - uint32_t GranulatedWorkitemVGPRCount = - (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT) >> - COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT; - - uint32_t NextFreeVGPR = (GranulatedWorkitemVGPRCount + 1) * - AMDGPU::IsaInfo::getVGPREncodingGranule(&STI); - - KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n'; - - // We cannot backward compute values used to calculate - // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following - // directives can't be computed: - // .amdhsa_reserve_vcc - // .amdhsa_reserve_flat_scratch - // .amdhsa_reserve_xnack_mask - // They take their respective default values if not specified in the assembly. - // - // GRANULATED_WAVEFRONT_SGPR_COUNT - // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK) - // - // We compute the inverse as though all directives apart from NEXT_FREE_SGPR - // are set to 0. So while disassembling we consider that: - // - // GRANULATED_WAVEFRONT_SGPR_COUNT - // = f(NEXT_FREE_SGPR + 0 + 0 + 0) - // - // The disassembler cannot recover the original values of those 3 directives. - - uint32_t GranulatedWavefrontSGPRCount = - (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT) >> - COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT; - - if (isGFX10() && GranulatedWavefrontSGPRCount) - return MCDisassembler::Fail; - - uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) * - AMDGPU::IsaInfo::getSGPREncodingGranule(&STI); - - KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n'; - KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n'; - KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n'; - KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n"; - - if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIORITY) - return MCDisassembler::Fail; - - PRINT_DIRECTIVE(".amdhsa_float_round_mode_32", - COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32); - PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64", - COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64); - PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32", - COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32); - PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64", - COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64); - - if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIV) - return MCDisassembler::Fail; - - PRINT_DIRECTIVE(".amdhsa_dx10_clamp", COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP); - - if (FourByteBuffer & COMPUTE_PGM_RSRC1_DEBUG_MODE) - return MCDisassembler::Fail; - - PRINT_DIRECTIVE(".amdhsa_ieee_mode", COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE); - - if (FourByteBuffer & COMPUTE_PGM_RSRC1_BULKY) - return MCDisassembler::Fail; - - if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER) - return MCDisassembler::Fail; - - PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_FP16_OVFL); - - if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED0) - return MCDisassembler::Fail; - - if (isGFX10()) { - PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode", - COMPUTE_PGM_RSRC1_WGP_MODE); - PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_MEM_ORDERED); - PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_FWD_PROGRESS); - } - return MCDisassembler::Success; -} - -// NOLINTNEXTLINE(readability-identifier-naming) -MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2( - uint32_t FourByteBuffer, raw_string_ostream &KdStream) const { - using namespace amdhsa; - StringRef Indent = "\t"; - PRINT_DIRECTIVE( - ".amdhsa_system_sgpr_private_segment_wavefront_offset", - COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET); - PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x", - COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X); - PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y", - COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y); - PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z", - COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z); - PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info", - COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO); - PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id", - COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID); - - if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH) - return MCDisassembler::Fail; - - if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY) - return MCDisassembler::Fail; - - if (FourByteBuffer & COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE) - return MCDisassembler::Fail; - - PRINT_DIRECTIVE( - ".amdhsa_exception_fp_ieee_invalid_op", - COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION); - PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src", - COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE); - PRINT_DIRECTIVE( - ".amdhsa_exception_fp_ieee_div_zero", - COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO); - PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow", - COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW); - PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow", - COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW); - PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact", - COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT); - PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero", - COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO); - - if (FourByteBuffer & COMPUTE_PGM_RSRC2_RESERVED0) - return MCDisassembler::Fail; - - return MCDisassembler::Success; -} - -#undef PRINT_DIRECTIVE - -MCDisassembler::DecodeStatus -AMDGPUDisassembler::decodeKernelDescriptorDirective( - DataExtractor::Cursor &Cursor, ArrayRef<uint8_t> Bytes, - raw_string_ostream &KdStream) const { -#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \ - do { \ - KdStream << Indent << DIRECTIVE " " \ - << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \ - } while (0) - - uint16_t TwoByteBuffer = 0; - uint32_t FourByteBuffer = 0; - uint64_t EightByteBuffer = 0; - - StringRef ReservedBytes; - StringRef Indent = "\t"; - - assert(Bytes.size() == 64); - DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8); - - switch (Cursor.tell()) { - case amdhsa::GROUP_SEGMENT_FIXED_SIZE_OFFSET: - FourByteBuffer = DE.getU32(Cursor); - KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer - << '\n'; - return MCDisassembler::Success; - - case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET: - FourByteBuffer = DE.getU32(Cursor); - KdStream << Indent << ".amdhsa_private_segment_fixed_size " - << FourByteBuffer << '\n'; - return MCDisassembler::Success; - - case amdhsa::RESERVED0_OFFSET: - // 8 reserved bytes, must be 0. - EightByteBuffer = DE.getU64(Cursor); - if (EightByteBuffer) { - return MCDisassembler::Fail; - } - return MCDisassembler::Success; - - case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET: - // KERNEL_CODE_ENTRY_BYTE_OFFSET - // So far no directive controls this for Code Object V3, so simply skip for - // disassembly. - DE.skip(Cursor, 8); - return MCDisassembler::Success; - - case amdhsa::RESERVED1_OFFSET: - // 20 reserved bytes, must be 0. - ReservedBytes = DE.getBytes(Cursor, 20); - for (int I = 0; I < 20; ++I) { - if (ReservedBytes[I] != 0) { - return MCDisassembler::Fail; - } - } - return MCDisassembler::Success; - - case amdhsa::COMPUTE_PGM_RSRC3_OFFSET: - // COMPUTE_PGM_RSRC3 - // - Only set for GFX10, GFX6-9 have this to be 0. - // - Currently no directives directly control this. - FourByteBuffer = DE.getU32(Cursor); - if (!isGFX10() && FourByteBuffer) { - return MCDisassembler::Fail; - } - return MCDisassembler::Success; - - case amdhsa::COMPUTE_PGM_RSRC1_OFFSET: - FourByteBuffer = DE.getU32(Cursor); - if (decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream) == - MCDisassembler::Fail) { - return MCDisassembler::Fail; - } - return MCDisassembler::Success; - - case amdhsa::COMPUTE_PGM_RSRC2_OFFSET: - FourByteBuffer = DE.getU32(Cursor); - if (decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream) == - MCDisassembler::Fail) { - return MCDisassembler::Fail; - } - return MCDisassembler::Success; - - case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET: - using namespace amdhsa; - TwoByteBuffer = DE.getU16(Cursor); - - PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer", - KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER); - PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr", - KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR); - PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr", - KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR); - PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr", - KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR); - PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id", - KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID); - PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init", - KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT); - PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size", - KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE); - - if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0) - return MCDisassembler::Fail; - - // Reserved for GFX9 - if (isGFX9() && - (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) { - return MCDisassembler::Fail; - } else if (isGFX10()) { - PRINT_DIRECTIVE(".amdhsa_wavefront_size32", - KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); - } - - if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) - return MCDisassembler::Fail; - - return MCDisassembler::Success; - - case amdhsa::RESERVED2_OFFSET: - // 6 bytes from here are reserved, must be 0. - ReservedBytes = DE.getBytes(Cursor, 6); - for (int I = 0; I < 6; ++I) { - if (ReservedBytes[I] != 0) - return MCDisassembler::Fail; - } - return MCDisassembler::Success; - - default: - llvm_unreachable("Unhandled index. Case statements cover everything."); - return MCDisassembler::Fail; - } -#undef PRINT_DIRECTIVE -} - -MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor( - StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const { - // CP microcode requires the kernel descriptor to be 64 aligned. - if (Bytes.size() != 64 || KdAddress % 64 != 0) - return MCDisassembler::Fail; - - std::string Kd; - raw_string_ostream KdStream(Kd); - KdStream << ".amdhsa_kernel " << KdName << '\n'; - - DataExtractor::Cursor C(0); - while (C && C.tell() < Bytes.size()) { - MCDisassembler::DecodeStatus Status = - decodeKernelDescriptorDirective(C, Bytes, KdStream); - - cantFail(C.takeError()); - - if (Status == MCDisassembler::Fail) - return MCDisassembler::Fail; - } - KdStream << ".end_amdhsa_kernel\n"; - outs() << KdStream.str(); - return MCDisassembler::Success; -} - -Optional<MCDisassembler::DecodeStatus> -AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, - ArrayRef<uint8_t> Bytes, uint64_t Address, - raw_ostream &CStream) const { - // Right now only kernel descriptor needs to be handled. - // We ignore all other symbols for target specific handling. - // TODO: - // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code - // Object V2 and V3 when symbols are marked protected. - - // amd_kernel_code_t for Code Object V2. - if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) { - Size = 256; - return MCDisassembler::Fail; - } - - // Code Object V3 kernel descriptors. - StringRef Name = Symbol.Name; - if (Symbol.Type == ELF::STT_OBJECT && Name.endswith(StringRef(".kd"))) { - Size = 64; // Size = 64 regardless of success or failure. - return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address); - } - return None; -} - -//===----------------------------------------------------------------------===// // AMDGPUSymbolizer //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index 315602c35288..f975af409a09 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -17,11 +17,10 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" #include "llvm/MC/MCDisassembler/MCSymbolizer.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/Support/DataExtractor.h" #include <algorithm> #include <cstdint> @@ -67,33 +66,6 @@ public: DecodeStatus tryDecodeInst(const uint8_t* Table, MCInst &MI, uint64_t Inst, uint64_t Address) const; - Optional<DecodeStatus> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, - ArrayRef<uint8_t> Bytes, - uint64_t Address, - raw_ostream &CStream) const override; - - DecodeStatus decodeKernelDescriptor(StringRef KdName, ArrayRef<uint8_t> Bytes, - uint64_t KdAddress) const; - - DecodeStatus - decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, - ArrayRef<uint8_t> Bytes, - raw_string_ostream &KdStream) const; - - /// Decode as directives that handle COMPUTE_PGM_RSRC1. - /// \param FourByteBuffer - Bytes holding contents of COMPUTE_PGM_RSRC1. - /// \param KdStream - Stream to write the disassembled directives to. - // NOLINTNEXTLINE(readability-identifier-naming) - DecodeStatus decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, - raw_string_ostream &KdStream) const; - - /// Decode as directives that handle COMPUTE_PGM_RSRC2. - /// \param FourByteBuffer - Bytes holding contents of COMPUTE_PGM_RSRC2. - /// \param KdStream - Stream to write the disassembled directives to. - // NOLINTNEXTLINE(readability-identifier-naming) - DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, - raw_string_ostream &KdStream) const; - DecodeStatus convertSDWAInst(MCInst &MI) const; DecodeStatus convertDPP8Inst(MCInst &MI) const; DecodeStatus convertMIMGInst(MCInst &MI) const; diff --git a/llvm/test/CodeGen/AMDGPU/nop-data.ll b/llvm/test/CodeGen/AMDGPU/nop-data.ll index e21ca97e8ffc..7b6853acce28 100644 --- a/llvm/test/CodeGen/AMDGPU/nop-data.ll +++ b/llvm/test/CodeGen/AMDGPU/nop-data.ll @@ -1,7 +1,7 @@ ; RUN: llc -mtriple=amdgcn--amdhsa -mattr=-code-object-v3 -mcpu=fiji -filetype=obj < %s | llvm-objdump -d - --mcpu=fiji | FileCheck %s ; CHECK: <kernel0>: -; CHECK: s_endpgm +; CHECK-NEXT: s_endpgm define amdgpu_kernel void @kernel0() align 256 { entry: ret void @@ -80,7 +80,7 @@ entry: ; CHECK-EMPTY: ; CHECK-NEXT: <kernel1>: -; CHECK: s_endpgm +; CHECK-NEXT: s_endpgm define amdgpu_kernel void @kernel1(i32 addrspace(1)* addrspace(4)* %ptr.out) align 256 { entry: ret void diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s deleted file mode 100644 index eee3fd4b7103..000000000000 --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s +++ /dev/null @@ -1,37 +0,0 @@ -;; Failure test. We create a malformed kernel descriptor (KD) by manually -;; setting the bytes, because one can't create a malformed KD using the -;; assembler directives. - -; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t.o - -; RUN: printf ".type my_kernel.kd, @object \nmy_kernel.kd:\n.size my_kernel.kd, 64\n" > %t1.sym_info -; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t.o \ -; RUN: | tail -n +9 > %t1.sym_content -; RUN: cat %t1.sym_info %t1.sym_content > %t1.s - -; RUN: llvm-mc %t1.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t-re-assemble.o -; RUN: diff %t.o %t-re-assemble.o - -;; Test failure by setting one of the reserved bytes to non-zero value. - -.type my_kernel.kd, @object -.size my_kernel.kd, 64 -my_kernel.kd: - .long 0x00000000 ;; group_segment_fixed_size - .long 0x00000000 ;; private_segment_fixed_size - .quad 0x00FF000000000000 ;; reserved bytes. - .quad 0x0000000000000000 ;; kernel_code_entry_byte_offset, any value works. - - ;; 20 reserved bytes. - .quad 0x0000000000000000 - .quad 0x0000000000000000 - .long 0x00000000 - - .long 0x00000000 ;; compute_PGM_RSRC3 - .long 0x00000000 ;; compute_PGM_RSRC1 - .long 0x00000000 ;; compute_PGM_RSRC2 - .short 0x0000 ;; additional fields. - - ;; 6 reserved bytes. - .long 0x0000000 - .short 0x0000 diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s deleted file mode 100644 index 0b798a298d39..000000000000 --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s +++ /dev/null @@ -1,49 +0,0 @@ -;; Test disassembly for GRANULATED_WAVEFRONT_SGPR_COUNT in the kernel descriptor. - -; RUN: split-file %s %t.dir - -; RUN: llvm-mc %t.dir/1.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 -; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +8 \ -; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1-re-assemble -; RUN: diff %t1 %t1-re-assemble - -; RUN: llvm-mc %t.dir/2.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 -; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +8 \ -; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2-re-assemble -; RUN: diff %t2 %t2-re-assemble - -; RUN: llvm-mc %t.dir/3.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3 -; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +8 \ -; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3-re-assemble -; RUN: diff %t3 %t3-re-assemble - - -;--- 1.s -;; Only set next_free_sgpr. -.amdhsa_kernel my_kernel_1 - .amdhsa_next_free_vgpr 0 - .amdhsa_next_free_sgpr 42 - .amdhsa_reserve_flat_scratch 0 - .amdhsa_reserve_xnack_mask 0 - .amdhsa_reserve_vcc 0 -.end_amdhsa_kernel - -;--- 2.s -;; Only set other directives. -.amdhsa_kernel my_kernel_2 - .amdhsa_next_free_vgpr 0 - .amdhsa_next_free_sgpr 0 - .amdhsa_reserve_flat_scratch 1 - .amdhsa_reserve_xnack_mask 1 - .amdhsa_reserve_vcc 1 -.end_amdhsa_kernel - -;--- 3.s -;; Set all affecting directives. -.amdhsa_kernel my_kernel_3 - .amdhsa_next_free_vgpr 0 - .amdhsa_next_free_sgpr 35 - .amdhsa_reserve_flat_scratch 1 - .amdhsa_reserve_xnack_mask 1 - .amdhsa_reserve_vcc 1 -.end_amdhsa_kernel diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s deleted file mode 100644 index a8883d2f74be..000000000000 --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s +++ /dev/null @@ -1,36 +0,0 @@ -;; Test disassembly for GRANULATED_WORKITEM_VGPR_COUNT in the kernel descriptor. - -; RUN: split-file %s %t.dir - -; RUN: llvm-mc %t.dir/1.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 -; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +8 \ -; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1-re-assemble -; RUN: diff %t1 %t1-re-assemble - -; RUN: llvm-mc %t.dir/2.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 -; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +8 \ -; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2-re-assemble -; RUN: diff %t2 %t2-re-assemble - -; RUN: llvm-mc %t.dir/3.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3 -; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +8 \ -; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3-re-assemble -; RUN: diff %t3 %t3-re-assemble - -;--- 1.s -.amdhsa_kernel my_kernel_1 - .amdhsa_next_free_vgpr 23 - .amdhsa_next_free_sgpr 0 -.end_amdhsa_kernel - -;--- 2.s -.amdhsa_kernel my_kernel_2 - .amdhsa_next_free_vgpr 14 - .amdhsa_next_free_sgpr 0 -.end_amdhsa_kernel - -;--- 3.s -.amdhsa_kernel my_kernel_3 - .amdhsa_next_free_vgpr 32 - .amdhsa_next_free_sgpr 0 -.end_amdhsa_kernel diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s deleted file mode 100644 index 803507a130c0..000000000000 --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s +++ /dev/null @@ -1,58 +0,0 @@ -;; Entirely zeroed kernel descriptor (for GFX10). - -; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -filetype=obj -o %t -; RUN: llvm-objdump -s -j .text %t | FileCheck --check-prefix=OBJDUMP %s - -;; TODO: -;; This file and kd-zeroed-raw.s should produce the same output for the kernel -;; descriptor - a block of 64 zeroed bytes. But looks like the assembler sets -;; the FWD_PROGRESS bit in COMPUTE_PGM_RSRC1 to 1 even when the directive -;; mentions 0 (see line 36). - -;; Check the raw bytes right now. - -; OBJDUMP: 0000 00000000 00000000 00000000 00000000 -; OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 -; OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 -; OBJDUMP-NEXT: 0030 01000000 00000000 00000000 00000000 - -.amdhsa_kernel my_kernel - .amdhsa_group_segment_fixed_size 0 - .amdhsa_private_segment_fixed_size 0 - .amdhsa_next_free_vgpr 8 - .amdhsa_reserve_vcc 0 - .amdhsa_reserve_flat_scratch 0 - .amdhsa_reserve_xnack_mask 0 - .amdhsa_next_free_sgpr 8 - .amdhsa_float_round_mode_32 0 - .amdhsa_float_round_mode_16_64 0 - .amdhsa_float_denorm_mode_32 0 - .amdhsa_float_denorm_mode_16_64 0 - .amdhsa_dx10_clamp 0 - .amdhsa_ieee_mode 0 - .amdhsa_fp16_overflow 0 - .amdhsa_workgroup_processor_mode 0 - .amdhsa_memory_ordered 0 - .amdhsa_forward_progress 0 - .amdhsa_system_sgpr_private_segment_wavefront_offset 0 - .amdhsa_system_sgpr_workgroup_id_x 0 - .amdhsa_system_sgpr_workgroup_id_y 0 - .amdhsa_system_sgpr_workgroup_id_z 0 - .amdhsa_system_sgpr_workgroup_info 0 - .amdhsa_system_vgpr_workitem_id 0 - .amdhsa_exception_fp_ieee_invalid_op 0 - .amdhsa_exception_fp_denorm_src 0 - .amdhsa_exception_fp_ieee_div_zero 0 - .amdhsa_exception_fp_ieee_overflow 0 - .amdhsa_exception_fp_ieee_underflow 0 - .amdhsa_exception_fp_ieee_inexact 0 - .amdhsa_exception_int_div_zero 0 - .amdhsa_user_sgpr_private_segment_buffer 0 - .amdhsa_user_sgpr_dispatch_ptr 0 - .amdhsa_user_sgpr_queue_ptr 0 - .amdhsa_user_sgpr_kernarg_segment_ptr 0 - .amdhsa_user_sgpr_dispatch_id 0 - .amdhsa_user_sgpr_flat_scratch_init 0 - .amdhsa_user_sgpr_private_segment_size 0 - .amdhsa_wavefront_size32 0 -.end_amdhsa_kernel diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s deleted file mode 100644 index de4fdf74d88e..000000000000 --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s +++ /dev/null @@ -1,53 +0,0 @@ -;; Entirely zeroed kernel descriptor (for GFX9). - -; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 -; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t1 \ -; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 -; RUN: diff %t1 %t2 - -; RUN: llvm-objdump -s -j .text %t1 | FileCheck --check-prefix=OBJDUMP %s - -; OBJDUMP: 0000 00000000 00000000 00000000 00000000 -; OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 -; OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 -; OBJDUMP-NEXT: 0030 00000000 00000000 00000000 00000000 - -;; This file and kd-zeroed-raw.s produce the same output for the kernel -;; descriptor - a block of 64 zeroed bytes. - -.amdhsa_kernel my_kernel - .amdhsa_group_segment_fixed_size 0 - .amdhsa_private_segment_fixed_size 0 - .amdhsa_next_free_vgpr 0 - .amdhsa_reserve_vcc 0 - .amdhsa_reserve_flat_scratch 0 - .amdhsa_reserve_xnack_mask 0 - .amdhsa_next_free_sgpr 0 - .amdhsa_float_round_mode_32 0 - .amdhsa_float_round_mode_16_64 0 - .amdhsa_float_denorm_mode_32 0 - .amdhsa_float_denorm_mode_16_64 0 - .amdhsa_dx10_clamp 0 - .amdhsa_ieee_mode 0 - .amdhsa_fp16_overflow 0 - .amdhsa_system_sgpr_private_segment_wavefront_offset 0 - .amdhsa_system_sgpr_workgroup_id_x 0 - .amdhsa_system_sgpr_workgroup_id_y 0 - .amdhsa_system_sgpr_workgroup_id_z 0 - .amdhsa_system_sgpr_workgroup_info 0 - .amdhsa_system_vgpr_workitem_id 0 - .amdhsa_exception_fp_ieee_invalid_op 0 - .amdhsa_exception_fp_denorm_src 0 - .amdhsa_exception_fp_ieee_div_zero 0 - .amdhsa_exception_fp_ieee_overflow 0 - .amdhsa_exception_fp_ieee_underflow 0 - .amdhsa_exception_fp_ieee_inexact 0 - .amdhsa_exception_int_div_zero 0 - .amdhsa_user_sgpr_private_segment_buffer 0 - .amdhsa_user_sgpr_dispatch_ptr 0 - .amdhsa_user_sgpr_queue_ptr 0 - .amdhsa_user_sgpr_kernarg_segment_ptr 0 - .amdhsa_user_sgpr_dispatch_id 0 - .amdhsa_user_sgpr_flat_scratch_init 0 - .amdhsa_user_sgpr_private_segment_size 0 -.end_amdhsa_kernel diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s deleted file mode 100644 index 85554209d5d8..000000000000 --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s +++ /dev/null @@ -1,41 +0,0 @@ -; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 -; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t1 \ -; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 -; RUN: llvm-objdump -s -j .text %t2 | FileCheck --check-prefix=OBJDUMP %s - -;; Not running lit-test over gfx10 (see kd-zeroed-gfx10.s for details). -;; kd-zeroed-raw.s and kd-zeroed-*.s should produce the same output for the -;; kernel descriptor - a block of 64 zeroed bytes. - -;; The disassembly will produce the contents of kd-zeroed-*.s which on being -;; assembled contains additional relocation info. A diff over the entire object -;; will fail in this case. So we check by looking the bytes in .text. - -; OBJDUMP: 0000 00000000 00000000 00000000 00000000 -; OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 -; OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 -; OBJDUMP-NEXT: 0030 00000000 00000000 00000000 00000000 - -;; The entire object is zeroed out. - -.type my_kernel.kd, @object -.size my_kernel.kd, 64 -my_kernel.kd: - .long 0x00000000 ;; group_segment_fixed_size - .long 0x00000000 ;; private_segment_fixed_size - .quad 0x0000000000000000 ;; reserved bytes. - .quad 0x0000000000000000 ;; kernel_code_entry_byte_offset, any value works. - - ;; 20 reserved bytes. - .quad 0x0000000000000000 - .quad 0x0000000000000000 - .long 0x00000000 - - .long 0x00000000 ;; compute_PGM_RSRC3 - .long 0x00000000 ;; compute_PGM_RSRC1 - .long 0x00000000 ;; compute_PGM_RSRC2 - .short 0x0000 ;; additional fields. - - ;; 6 reserved bytes. - .long 0x0000000 - .short 0x0000 diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 46ed7414dbb3..b63d08b90ff5 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1854,6 +1854,23 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, outs() << SectionName << ":\n"; } + if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) { + if (Symbols[SI].Type == ELF::STT_AMDGPU_HSA_KERNEL) { + // skip amd_kernel_code_t at the begining of kernel symbol (256 bytes) + Start += 256; + } + if (SI == SE - 1 || + Symbols[SI + 1].Type == ELF::STT_AMDGPU_HSA_KERNEL) { + // cut trailing zeroes at the end of kernel + // cut up to 256 bytes + const uint64_t EndAlign = 256; + const auto Limit = End - (std::min)(EndAlign, End - Start); + while (End > Limit && + *reinterpret_cast<const support::ulittle32_t*>(&Bytes[End - 4]) == 0) + End -= 4; + } + } + outs() << '\n'; if (!NoLeadingAddr) outs() << format(Is64Bits ? "%016" PRIx64 " " : "%08" PRIx64 " ", |