summaryrefslogtreecommitdiff
path: root/src/amd/compiler/aco_opcodes.py
diff options
context:
space:
mode:
authorRhys Perry <pendingchaos02@gmail.com>2021-02-03 14:47:24 +0000
committerRhys Perry <pendingchaos02@gmail.com>2021-03-11 16:31:19 +0000
commit83ce9407f20e17b63252211b43db59a78de8c1f1 (patch)
tree3454d27862a5b789a7fa335bf5e90944062326e1 /src/amd/compiler/aco_opcodes.py
parent0af7ff49fdecd05531b4c4f5fb841e11f9f451c1 (diff)
downloadmesa-83ce9407f20e17b63252211b43db59a78de8c1f1.tar.gz
aco: add instruction classes
These should mostly match LLVM. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8994>
Diffstat (limited to 'src/amd/compiler/aco_opcodes.py')
-rw-r--r--src/amd/compiler/aco_opcodes.py340
1 files changed, 185 insertions, 155 deletions
diff --git a/src/amd/compiler/aco_opcodes.py b/src/amd/compiler/aco_opcodes.py
index 9138fe2bac9..abe852caa36 100644
--- a/src/amd/compiler/aco_opcodes.py
+++ b/src/amd/compiler/aco_opcodes.py
@@ -30,6 +30,28 @@
import sys
from enum import Enum
+class InstrClass(Enum):
+ Valu32 = 0
+ ValuConvert32 = 1
+ Valu64 = 2
+ ValuQuarterRate32 = 3
+ ValuFma = 4
+ ValuTranscendental32 = 5
+ ValuDouble = 6
+ ValuDoubleAdd = 7
+ ValuDoubleConvert = 8
+ ValuDoubleTranscendental = 9
+ Salu = 10
+ SMem = 11
+ Barrier = 12
+ Branch = 13
+ Sendmsg = 14
+ DS = 15
+ Export = 16
+ VMem = 17
+ Waitcnt = 18
+ Other = 19
+
class Format(Enum):
PSEUDO = 0
SOP1 = 1
@@ -169,7 +191,7 @@ class Opcode(object):
"""Class that represents all the information we have about the opcode
NOTE: this must be kept in sync with aco_op_info
"""
- def __init__(self, name, opcode_gfx7, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod, is_atomic):
+ def __init__(self, name, opcode_gfx7, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod, is_atomic, cls):
"""Parameters:
- name is the name of the opcode (prepend nir_op_ for the enum name)
@@ -196,6 +218,7 @@ class Opcode(object):
self.output_mod = "1" if output_mod else "0"
self.is_atomic = "1" if is_atomic else "0"
self.format = format
+ self.cls = cls
parts = name.replace('_e64', '').rsplit('_', 2)
op_dtype = parts[-1]
@@ -237,11 +260,18 @@ class Opcode(object):
# global dictionary of opcodes
opcodes = {}
-def opcode(name, opcode_gfx7 = -1, opcode_gfx9 = -1, opcode_gfx10 = -1, format = Format.PSEUDO, input_mod = False, output_mod = False, is_atomic = False):
+def opcode(name, opcode_gfx7 = -1, opcode_gfx9 = -1, opcode_gfx10 = -1, format = Format.PSEUDO, cls = InstrClass.Other, input_mod = False, output_mod = False, is_atomic = False):
assert name not in opcodes
- opcodes[name] = Opcode(name, opcode_gfx7, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod, is_atomic)
+ opcodes[name] = Opcode(name, opcode_gfx7, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod, is_atomic, cls)
+
+def default_class(opcodes, cls):
+ for op in opcodes:
+ if isinstance(op[-1], InstrClass):
+ yield op
+ else:
+ yield op + (cls,)
-opcode("exp", 0, 0, 0, format = Format.EXP)
+opcode("exp", 0, 0, 0, format = Format.EXP, cls = InstrClass.Export)
opcode("p_parallelcopy")
opcode("p_startpgm")
opcode("p_phi")
@@ -334,9 +364,9 @@ SOP2 = {
(0x28, 0x28, 0x26, 0x26, 0x28, "s_bfe_i32"),
(0x29, 0x29, 0x27, 0x27, 0x29, "s_bfe_u64"),
(0x2a, 0x2a, 0x28, 0x28, 0x2a, "s_bfe_i64"),
- (0x2b, 0x2b, 0x29, 0x29, -1, "s_cbranch_g_fork"),
+ (0x2b, 0x2b, 0x29, 0x29, -1, "s_cbranch_g_fork", InstrClass.Branch),
(0x2c, 0x2c, 0x2a, 0x2a, 0x2c, "s_absdiff_i32"),
- ( -1, -1, 0x2b, 0x2b, -1, "s_rfe_restore_b64"),
+ ( -1, -1, 0x2b, 0x2b, -1, "s_rfe_restore_b64", InstrClass.Branch),
( -1, -1, -1, 0x2e, 0x2e, "s_lshl1_add_u32"),
( -1, -1, -1, 0x2f, 0x2f, "s_lshl2_add_u32"),
( -1, -1, -1, 0x30, 0x30, "s_lshl3_add_u32"),
@@ -349,8 +379,8 @@ SOP2 = {
# actually a pseudo-instruction. it's lowered to SALU during assembly though, so it's useful to identify it as a SOP2.
( -1, -1, -1, -1, -1, "p_constaddr_addlo"),
}
-for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SOP2:
- opcode(name, gfx7, gfx9, gfx10, Format.SOP2)
+for (gfx6, gfx7, gfx8, gfx9, gfx10, name, cls) in default_class(SOP2, InstrClass.Salu):
+ opcode(name, gfx7, gfx9, gfx10, Format.SOP2, cls)
# SOPK instructions: 0 input (+ imm), 1 output + optional scc
@@ -373,20 +403,20 @@ SOPK = {
(0x0e, 0x0e, 0x0d, 0x0d, 0x0e, "s_cmpk_le_u32"),
(0x0f, 0x0f, 0x0e, 0x0e, 0x0f, "s_addk_i32"),
(0x10, 0x10, 0x0f, 0x0f, 0x10, "s_mulk_i32"),
- (0x11, 0x11, 0x10, 0x10, -1, "s_cbranch_i_fork"),
+ (0x11, 0x11, 0x10, 0x10, -1, "s_cbranch_i_fork", InstrClass.Branch),
(0x12, 0x12, 0x11, 0x11, 0x12, "s_getreg_b32"),
(0x13, 0x13, 0x12, 0x12, 0x13, "s_setreg_b32"),
(0x15, 0x15, 0x14, 0x14, 0x15, "s_setreg_imm32_b32"), # requires 32bit literal
- ( -1, -1, 0x15, 0x15, 0x16, "s_call_b64"),
- ( -1, -1, -1, -1, 0x17, "s_waitcnt_vscnt"),
- ( -1, -1, -1, -1, 0x18, "s_waitcnt_vmcnt"),
- ( -1, -1, -1, -1, 0x19, "s_waitcnt_expcnt"),
- ( -1, -1, -1, -1, 0x1a, "s_waitcnt_lgkmcnt"),
- ( -1, -1, -1, -1, 0x1b, "s_subvector_loop_begin"),
- ( -1, -1, -1, -1, 0x1c, "s_subvector_loop_end"),
+ ( -1, -1, 0x15, 0x15, 0x16, "s_call_b64", InstrClass.Branch),
+ ( -1, -1, -1, -1, 0x17, "s_waitcnt_vscnt", InstrClass.Waitcnt),
+ ( -1, -1, -1, -1, 0x18, "s_waitcnt_vmcnt", InstrClass.Waitcnt),
+ ( -1, -1, -1, -1, 0x19, "s_waitcnt_expcnt", InstrClass.Waitcnt),
+ ( -1, -1, -1, -1, 0x1a, "s_waitcnt_lgkmcnt", InstrClass.Waitcnt),
+ ( -1, -1, -1, -1, 0x1b, "s_subvector_loop_begin", InstrClass.Branch),
+ ( -1, -1, -1, -1, 0x1c, "s_subvector_loop_end", InstrClass.Branch),
}
-for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SOPK:
- opcode(name, gfx7, gfx9, gfx10, Format.SOPK)
+for (gfx6, gfx7, gfx8, gfx9, gfx10, name, cls) in default_class(SOPK, InstrClass.Salu):
+ opcode(name, gfx7, gfx9, gfx10, Format.SOPK, cls)
# SOP1 instructions: 1 input, 1 output (+optional SCC)
@@ -421,9 +451,9 @@ SOP1 = {
(0x1d, 0x1d, 0x1a, 0x1a, 0x1d, "s_bitset1_b32"),
(0x1e, 0x1e, 0x1b, 0x1b, 0x1e, "s_bitset1_b64"),
(0x1f, 0x1f, 0x1c, 0x1c, 0x1f, "s_getpc_b64"),
- (0x20, 0x20, 0x1d, 0x1d, 0x20, "s_setpc_b64"),
- (0x21, 0x21, 0x1e, 0x1e, 0x21, "s_swappc_b64"),
- (0x22, 0x22, 0x1f, 0x1f, 0x22, "s_rfe_b64"),
+ (0x20, 0x20, 0x1d, 0x1d, 0x20, "s_setpc_b64", InstrClass.Branch),
+ (0x21, 0x21, 0x1e, 0x1e, 0x21, "s_swappc_b64", InstrClass.Branch),
+ (0x22, 0x22, 0x1f, 0x1f, 0x22, "s_rfe_b64", InstrClass.Branch),
(0x24, 0x24, 0x20, 0x20, 0x24, "s_and_saveexec_b64"),
(0x25, 0x25, 0x21, 0x21, 0x25, "s_or_saveexec_b64"),
(0x26, 0x26, 0x22, 0x22, 0x26, "s_xor_saveexec_b64"),
@@ -438,7 +468,7 @@ SOP1 = {
(0x2f, 0x2f, 0x2b, 0x2b, 0x2f, "s_movrels_b64"),
(0x30, 0x30, 0x2c, 0x2c, 0x30, "s_movreld_b32"),
(0x31, 0x31, 0x2d, 0x2d, 0x31, "s_movreld_b64"),
- (0x32, 0x32, 0x2e, 0x2e, -1, "s_cbranch_join"),
+ (0x32, 0x32, 0x2e, 0x2e, -1, "s_cbranch_join", InstrClass.Branch),
(0x34, 0x34, 0x30, 0x30, 0x34, "s_abs_i32"),
(0x35, 0x35, -1, -1, 0x35, "s_mov_fed_b32"),
( -1, -1, 0x32, 0x32, -1, "s_set_gpr_idx_idx"),
@@ -463,8 +493,8 @@ SOP1 = {
# actually a pseudo-instruction. it's lowered to SALU during assembly though, so it's useful to identify it as a SOP1.
( -1, -1, -1, -1, -1, "p_constaddr_getpc"),
}
-for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SOP1:
- opcode(name, gfx7, gfx9, gfx10, Format.SOP1)
+for (gfx6, gfx7, gfx8, gfx9, gfx10, name, cls) in default_class(SOP1, InstrClass.Salu):
+ opcode(name, gfx7, gfx9, gfx10, Format.SOP1, cls)
# SOPC instructions: 2 inputs and 0 outputs (+SCC)
@@ -492,7 +522,7 @@ SOPC = {
( -1, -1, 0x13, 0x13, 0x13, "s_cmp_lg_u64"),
}
for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SOPC:
- opcode(name, gfx7, gfx9, gfx10, Format.SOPC)
+ opcode(name, gfx7, gfx9, gfx10, Format.SOPC, InstrClass.Salu)
# SOPP instructions: 0 inputs (+optional scc/vcc), 0 outputs
@@ -500,31 +530,31 @@ SOPP = {
# GFX6, GFX7, GFX8, GFX9, GFX10, name
(0x00, 0x00, 0x00, 0x00, 0x00, "s_nop"),
(0x01, 0x01, 0x01, 0x01, 0x01, "s_endpgm"),
- (0x02, 0x02, 0x02, 0x02, 0x02, "s_branch"),
+ (0x02, 0x02, 0x02, 0x02, 0x02, "s_branch", InstrClass.Branch),
( -1, -1, 0x03, 0x03, 0x03, "s_wakeup"),
- (0x04, 0x04, 0x04, 0x04, 0x04, "s_cbranch_scc0"),
- (0x05, 0x05, 0x05, 0x05, 0x05, "s_cbranch_scc1"),
- (0x06, 0x06, 0x06, 0x06, 0x06, "s_cbranch_vccz"),
- (0x07, 0x07, 0x07, 0x07, 0x07, "s_cbranch_vccnz"),
- (0x08, 0x08, 0x08, 0x08, 0x08, "s_cbranch_execz"),
- (0x09, 0x09, 0x09, 0x09, 0x09, "s_cbranch_execnz"),
- (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_barrier"),
+ (0x04, 0x04, 0x04, 0x04, 0x04, "s_cbranch_scc0", InstrClass.Branch),
+ (0x05, 0x05, 0x05, 0x05, 0x05, "s_cbranch_scc1", InstrClass.Branch),
+ (0x06, 0x06, 0x06, 0x06, 0x06, "s_cbranch_vccz", InstrClass.Branch),
+ (0x07, 0x07, 0x07, 0x07, 0x07, "s_cbranch_vccnz", InstrClass.Branch),
+ (0x08, 0x08, 0x08, 0x08, 0x08, "s_cbranch_execz", InstrClass.Branch),
+ (0x09, 0x09, 0x09, 0x09, 0x09, "s_cbranch_execnz", InstrClass.Branch),
+ (0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_barrier", InstrClass.Barrier),
( -1, 0x0b, 0x0b, 0x0b, 0x0b, "s_setkill"),
- (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "s_waitcnt"),
+ (0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "s_waitcnt", InstrClass.Waitcnt),
(0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "s_sethalt"),
(0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "s_sleep"),
(0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "s_setprio"),
- (0x10, 0x10, 0x10, 0x10, 0x10, "s_sendmsg"),
- (0x11, 0x11, 0x11, 0x11, 0x11, "s_sendmsghalt"),
- (0x12, 0x12, 0x12, 0x12, 0x12, "s_trap"),
+ (0x10, 0x10, 0x10, 0x10, 0x10, "s_sendmsg", InstrClass.Sendmsg),
+ (0x11, 0x11, 0x11, 0x11, 0x11, "s_sendmsghalt", InstrClass.Sendmsg),
+ (0x12, 0x12, 0x12, 0x12, 0x12, "s_trap", InstrClass.Branch),
(0x13, 0x13, 0x13, 0x13, 0x13, "s_icache_inv"),
(0x14, 0x14, 0x14, 0x14, 0x14, "s_incperflevel"),
(0x15, 0x15, 0x15, 0x15, 0x15, "s_decperflevel"),
(0x16, 0x16, 0x16, 0x16, 0x16, "s_ttracedata"),
- ( -1, 0x17, 0x17, 0x17, 0x17, "s_cbranch_cdbgsys"),
- ( -1, 0x18, 0x18, 0x18, 0x18, "s_cbranch_cdbguser"),
- ( -1, 0x19, 0x19, 0x19, 0x19, "s_cbranch_cdbgsys_or_user"),
- ( -1, 0x1a, 0x1a, 0x1a, 0x1a, "s_cbranch_cdbgsys_and_user"),
+ ( -1, 0x17, 0x17, 0x17, 0x17, "s_cbranch_cdbgsys", InstrClass.Branch),
+ ( -1, 0x18, 0x18, 0x18, 0x18, "s_cbranch_cdbguser", InstrClass.Branch),
+ ( -1, 0x19, 0x19, 0x19, 0x19, "s_cbranch_cdbgsys_or_user", InstrClass.Branch),
+ ( -1, 0x1a, 0x1a, 0x1a, 0x1a, "s_cbranch_cdbgsys_and_user", InstrClass.Branch),
( -1, -1, 0x1b, 0x1b, 0x1b, "s_endpgm_saved"),
( -1, -1, 0x1c, 0x1c, -1, "s_set_gpr_idx_off"),
( -1, -1, 0x1d, 0x1d, -1, "s_set_gpr_idx_mode"),
@@ -538,8 +568,8 @@ SOPP = {
( -1, -1, -1, -1, 0x25, "s_denorm_mode"),
( -1, -1, -1, -1, 0x26, "s_ttracedata_imm"),
}
-for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SOPP:
- opcode(name, gfx7, gfx9, gfx10, Format.SOPP)
+for (gfx6, gfx7, gfx8, gfx9, gfx10, name, cls) in default_class(SOPP, InstrClass.Salu):
+ opcode(name, gfx7, gfx9, gfx10, Format.SOPP, cls)
# SMEM instructions: sbase input (2 sgpr), potentially 2 offset inputs, 1 sdata input/output
@@ -634,7 +664,7 @@ SMEM = {
( -1, -1, -1, 0xac, 0xac, "s_atomic_dec_x2"),
}
for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SMEM:
- opcode(name, gfx7, gfx9, gfx10, Format.SMEM, is_atomic = "atomic" in name)
+ opcode(name, gfx7, gfx9, gfx10, Format.SMEM, InstrClass.SMem, is_atomic = "atomic" in name)
# VOP2 instructions: 2 inputs, 1 output (+ optional vcc)
@@ -715,12 +745,12 @@ VOP2 = {
( -1, -1, -1, -1, 0x3c, "v_pk_fmac_f16", False),
}
for (gfx6, gfx7, gfx8, gfx9, gfx10, name, modifiers) in VOP2:
- opcode(name, gfx7, gfx9, gfx10, Format.VOP2, modifiers, modifiers)
+ opcode(name, gfx7, gfx9, gfx10, Format.VOP2, InstrClass.Valu32, modifiers, modifiers)
if True:
# v_cndmask_b32 can use input modifiers but not output modifiers
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x00, 0x00, 0x00, 0x00, 0x01, "v_cndmask_b32")
- opcode(name, gfx7, gfx9, gfx10, Format.VOP2, True, False)
+ opcode(name, gfx7, gfx9, gfx10, Format.VOP2, InstrClass.Valu32, True, False)
# VOP1 instructions: instructions with 1 input and 1 output
@@ -729,8 +759,8 @@ VOP1 = {
(0x00, 0x00, 0x00, 0x00, 0x00, "v_nop", False, False),
(0x01, 0x01, 0x01, 0x01, 0x01, "v_mov_b32", False, False),
(0x02, 0x02, 0x02, 0x02, 0x02, "v_readfirstlane_b32", False, False),
- (0x03, 0x03, 0x03, 0x03, 0x03, "v_cvt_i32_f64", True, False),
- (0x04, 0x04, 0x04, 0x04, 0x04, "v_cvt_f64_i32", False, True),
+ (0x03, 0x03, 0x03, 0x03, 0x03, "v_cvt_i32_f64", True, False, InstrClass.ValuDoubleConvert),
+ (0x04, 0x04, 0x04, 0x04, 0x04, "v_cvt_f64_i32", False, True, InstrClass.ValuDoubleConvert),
(0x05, 0x05, 0x05, 0x05, 0x05, "v_cvt_f32_i32", False, True),
(0x06, 0x06, 0x06, 0x06, 0x06, "v_cvt_f32_u32", False, True),
(0x07, 0x07, 0x07, 0x07, 0x07, "v_cvt_u32_f32", True, False),
@@ -742,50 +772,50 @@ VOP1 = {
(0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "v_cvt_rpi_i32_f32", True, False),
(0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "v_cvt_flr_i32_f32", True, False),
(0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "v_cvt_off_f32_i4", False, True),
- (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "v_cvt_f32_f64", True, True),
- (0x10, 0x10, 0x10, 0x10, 0x10, "v_cvt_f64_f32", True, True),
+ (0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "v_cvt_f32_f64", True, True, InstrClass.ValuDoubleConvert),
+ (0x10, 0x10, 0x10, 0x10, 0x10, "v_cvt_f64_f32", True, True, InstrClass.ValuDoubleConvert),
(0x11, 0x11, 0x11, 0x11, 0x11, "v_cvt_f32_ubyte0", False, True),
(0x12, 0x12, 0x12, 0x12, 0x12, "v_cvt_f32_ubyte1", False, True),
(0x13, 0x13, 0x13, 0x13, 0x13, "v_cvt_f32_ubyte2", False, True),
(0x14, 0x14, 0x14, 0x14, 0x14, "v_cvt_f32_ubyte3", False, True),
- (0x15, 0x15, 0x15, 0x15, 0x15, "v_cvt_u32_f64", True, False),
- (0x16, 0x16, 0x16, 0x16, 0x16, "v_cvt_f64_u32", False, True),
- ( -1, 0x17, 0x17, 0x17, 0x17, "v_trunc_f64", True, True),
- ( -1, 0x18, 0x18, 0x18, 0x18, "v_ceil_f64", True, True),
- ( -1, 0x19, 0x19, 0x19, 0x19, "v_rndne_f64", True, True),
- ( -1, 0x1a, 0x1a, 0x1a, 0x1a, "v_floor_f64", True, True),
+ (0x15, 0x15, 0x15, 0x15, 0x15, "v_cvt_u32_f64", True, False, InstrClass.ValuDoubleConvert),
+ (0x16, 0x16, 0x16, 0x16, 0x16, "v_cvt_f64_u32", False, True, InstrClass.ValuDoubleConvert),
+ ( -1, 0x17, 0x17, 0x17, 0x17, "v_trunc_f64", True, True, InstrClass.ValuDouble),
+ ( -1, 0x18, 0x18, 0x18, 0x18, "v_ceil_f64", True, True, InstrClass.ValuDouble),
+ ( -1, 0x19, 0x19, 0x19, 0x19, "v_rndne_f64", True, True, InstrClass.ValuDouble),
+ ( -1, 0x1a, 0x1a, 0x1a, 0x1a, "v_floor_f64", True, True, InstrClass.ValuDouble),
( -1, -1, -1, -1, 0x1b, "v_pipeflush", False, False),
(0x20, 0x20, 0x1b, 0x1b, 0x20, "v_fract_f32", True, True),
(0x21, 0x21, 0x1c, 0x1c, 0x21, "v_trunc_f32", True, True),
(0x22, 0x22, 0x1d, 0x1d, 0x22, "v_ceil_f32", True, True),
(0x23, 0x23, 0x1e, 0x1e, 0x23, "v_rndne_f32", True, True),
(0x24, 0x24, 0x1f, 0x1f, 0x24, "v_floor_f32", True, True),
- (0x25, 0x25, 0x20, 0x20, 0x25, "v_exp_f32", True, True),
- (0x26, 0x26, -1, -1, -1, "v_log_clamp_f32", True, True),
- (0x27, 0x27, 0x21, 0x21, 0x27, "v_log_f32", True, True),
- (0x28, 0x28, -1, -1, -1, "v_rcp_clamp_f32", True, True),
- (0x29, 0x29, -1, -1, -1, "v_rcp_legacy_f32", True, True),
- (0x2a, 0x2a, 0x22, 0x22, 0x2a, "v_rcp_f32", True, True),
- (0x2b, 0x2b, 0x23, 0x23, 0x2b, "v_rcp_iflag_f32", True, True),
- (0x2c, 0x2c, -1, -1, -1, "v_rsq_clamp_f32", True, True),
- (0x2d, 0x2d, -1, -1, -1, "v_rsq_legacy_f32", True, True),
- (0x2e, 0x2e, 0x24, 0x24, 0x2e, "v_rsq_f32", True, True),
- (0x2f, 0x2f, 0x25, 0x25, 0x2f, "v_rcp_f64", True, True),
- (0x30, 0x30, -1, -1, -1, "v_rcp_clamp_f64", True, True),
- (0x31, 0x31, 0x26, 0x26, 0x31, "v_rsq_f64", True, True),
- (0x32, 0x32, -1, -1, -1, "v_rsq_clamp_f64", True, True),
- (0x33, 0x33, 0x27, 0x27, 0x33, "v_sqrt_f32", True, True),
- (0x34, 0x34, 0x28, 0x28, 0x34, "v_sqrt_f64", True, True),
- (0x35, 0x35, 0x29, 0x29, 0x35, "v_sin_f32", True, True),
- (0x36, 0x36, 0x2a, 0x2a, 0x36, "v_cos_f32", True, True),
+ (0x25, 0x25, 0x20, 0x20, 0x25, "v_exp_f32", True, True, InstrClass.ValuTranscendental32),
+ (0x26, 0x26, -1, -1, -1, "v_log_clamp_f32", True, True, InstrClass.ValuTranscendental32),
+ (0x27, 0x27, 0x21, 0x21, 0x27, "v_log_f32", True, True, InstrClass.ValuTranscendental32),
+ (0x28, 0x28, -1, -1, -1, "v_rcp_clamp_f32", True, True, InstrClass.ValuTranscendental32),
+ (0x29, 0x29, -1, -1, -1, "v_rcp_legacy_f32", True, True, InstrClass.ValuTranscendental32),
+ (0x2a, 0x2a, 0x22, 0x22, 0x2a, "v_rcp_f32", True, True, InstrClass.ValuTranscendental32),
+ (0x2b, 0x2b, 0x23, 0x23, 0x2b, "v_rcp_iflag_f32", True, True, InstrClass.ValuTranscendental32),
+ (0x2c, 0x2c, -1, -1, -1, "v_rsq_clamp_f32", True, True, InstrClass.ValuTranscendental32),
+ (0x2d, 0x2d, -1, -1, -1, "v_rsq_legacy_f32", True, True, InstrClass.ValuTranscendental32),
+ (0x2e, 0x2e, 0x24, 0x24, 0x2e, "v_rsq_f32", True, True, InstrClass.ValuTranscendental32),
+ (0x2f, 0x2f, 0x25, 0x25, 0x2f, "v_rcp_f64", True, True, InstrClass.ValuDoubleTranscendental),
+ (0x30, 0x30, -1, -1, -1, "v_rcp_clamp_f64", True, True, InstrClass.ValuDoubleTranscendental),
+ (0x31, 0x31, 0x26, 0x26, 0x31, "v_rsq_f64", True, True, InstrClass.ValuDoubleTranscendental),
+ (0x32, 0x32, -1, -1, -1, "v_rsq_clamp_f64", True, True, InstrClass.ValuDoubleTranscendental),
+ (0x33, 0x33, 0x27, 0x27, 0x33, "v_sqrt_f32", True, True, InstrClass.ValuTranscendental32),
+ (0x34, 0x34, 0x28, 0x28, 0x34, "v_sqrt_f64", True, True, InstrClass.ValuDoubleTranscendental),
+ (0x35, 0x35, 0x29, 0x29, 0x35, "v_sin_f32", True, True, InstrClass.ValuTranscendental32),
+ (0x36, 0x36, 0x2a, 0x2a, 0x36, "v_cos_f32", True, True, InstrClass.ValuTranscendental32),
(0x37, 0x37, 0x2b, 0x2b, 0x37, "v_not_b32", False, False),
(0x38, 0x38, 0x2c, 0x2c, 0x38, "v_bfrev_b32", False, False),
(0x39, 0x39, 0x2d, 0x2d, 0x39, "v_ffbh_u32", False, False),
(0x3a, 0x3a, 0x2e, 0x2e, 0x3a, "v_ffbl_b32", False, False),
(0x3b, 0x3b, 0x2f, 0x2f, 0x3b, "v_ffbh_i32", False, False),
- (0x3c, 0x3c, 0x30, 0x30, 0x3c, "v_frexp_exp_i32_f64", True, False),
- (0x3d, 0x3d, 0x31, 0x31, 0x3d, "v_frexp_mant_f64", True, False),
- (0x3e, 0x3e, 0x32, 0x32, 0x3e, "v_fract_f64", True, True),
+ (0x3c, 0x3c, 0x30, 0x30, 0x3c, "v_frexp_exp_i32_f64", True, False, InstrClass.ValuDouble),
+ (0x3d, 0x3d, 0x31, 0x31, 0x3d, "v_frexp_mant_f64", True, False, InstrClass.ValuDouble),
+ (0x3e, 0x3e, 0x32, 0x32, 0x3e, "v_fract_f64", True, True, InstrClass.ValuDouble),
(0x3f, 0x3f, 0x33, 0x33, 0x3f, "v_frexp_exp_i32_f32", True, False),
(0x40, 0x40, 0x34, 0x34, 0x40, "v_frexp_mant_f32", True, False),
(0x41, 0x41, 0x35, 0x35, 0x41, "v_clrexcp", False, False),
@@ -798,11 +828,11 @@ VOP1 = {
( -1, -1, 0x3a, 0x3a, 0x51, "v_cvt_f16_i16", False, True),
( -1, -1, 0x3b, 0x3b, 0x52, "v_cvt_u16_f16", True, False),
( -1, -1, 0x3c, 0x3c, 0x53, "v_cvt_i16_f16", True, False),
- ( -1, -1, 0x3d, 0x3d, 0x54, "v_rcp_f16", True, True),
- ( -1, -1, 0x3e, 0x3e, 0x55, "v_sqrt_f16", True, True),
- ( -1, -1, 0x3f, 0x3f, 0x56, "v_rsq_f16", True, True),
- ( -1, -1, 0x40, 0x40, 0x57, "v_log_f16", True, True),
- ( -1, -1, 0x41, 0x41, 0x58, "v_exp_f16", True, True),
+ ( -1, -1, 0x3d, 0x3d, 0x54, "v_rcp_f16", True, True, InstrClass.ValuTranscendental32),
+ ( -1, -1, 0x3e, 0x3e, 0x55, "v_sqrt_f16", True, True, InstrClass.ValuTranscendental32),
+ ( -1, -1, 0x3f, 0x3f, 0x56, "v_rsq_f16", True, True, InstrClass.ValuTranscendental32),
+ ( -1, -1, 0x40, 0x40, 0x57, "v_log_f16", True, True, InstrClass.ValuTranscendental32),
+ ( -1, -1, 0x41, 0x41, 0x58, "v_exp_f16", True, True, InstrClass.ValuTranscendental32),
( -1, -1, 0x42, 0x42, 0x59, "v_frexp_mant_f16", True, False),
( -1, -1, 0x43, 0x43, 0x5a, "v_frexp_exp_i16_f16", True, False),
( -1, -1, 0x44, 0x44, 0x5b, "v_floor_f16", True, True),
@@ -810,18 +840,18 @@ VOP1 = {
( -1, -1, 0x46, 0x46, 0x5d, "v_trunc_f16", True, True),
( -1, -1, 0x47, 0x47, 0x5e, "v_rndne_f16", True, True),
( -1, -1, 0x48, 0x48, 0x5f, "v_fract_f16", True, True),
- ( -1, -1, 0x49, 0x49, 0x60, "v_sin_f16", True, True),
- ( -1, -1, 0x4a, 0x4a, 0x61, "v_cos_f16", True, True),
- ( -1, 0x46, 0x4b, 0x4b, -1, "v_exp_legacy_f32", True, True),
- ( -1, 0x45, 0x4c, 0x4c, -1, "v_log_legacy_f32", True, True),
+ ( -1, -1, 0x49, 0x49, 0x60, "v_sin_f16", True, True, InstrClass.ValuTranscendental32),
+ ( -1, -1, 0x4a, 0x4a, 0x61, "v_cos_f16", True, True, InstrClass.ValuTranscendental32),
+ ( -1, 0x46, 0x4b, 0x4b, -1, "v_exp_legacy_f32", True, True, InstrClass.ValuTranscendental32),
+ ( -1, 0x45, 0x4c, 0x4c, -1, "v_log_legacy_f32", True, True, InstrClass.ValuTranscendental32),
( -1, -1, -1, 0x4f, 0x62, "v_sat_pk_u8_i16", False, False),
( -1, -1, -1, 0x4d, 0x63, "v_cvt_norm_i16_f16", True, False),
( -1, -1, -1, 0x4e, 0x64, "v_cvt_norm_u16_f16", True, False),
( -1, -1, -1, 0x51, 0x65, "v_swap_b32", False, False),
( -1, -1, -1, -1, 0x68, "v_swaprel_b32", False, False),
}
-for (gfx6, gfx7, gfx8, gfx9, gfx10, name, in_mod, out_mod) in VOP1:
- opcode(name, gfx7, gfx9, gfx10, Format.VOP1, in_mod, out_mod)
+for (gfx6, gfx7, gfx8, gfx9, gfx10, name, in_mod, out_mod, cls) in default_class(VOP1, InstrClass.Valu32):
+ opcode(name, gfx7, gfx9, gfx10, Format.VOP1, cls, in_mod, out_mod)
# VOPC instructions:
@@ -831,33 +861,33 @@ VOPC_CLASS = {
( -1, -1, 0x14, 0x14, 0x8f, "v_cmp_class_f16"),
(0x98, 0x98, 0x11, 0x11, 0x98, "v_cmpx_class_f32"),
( -1, -1, 0x15, 0x15, 0x9f, "v_cmpx_class_f16"),
- (0xa8, 0xa8, 0x12, 0x12, 0xa8, "v_cmp_class_f64"),
- (0xb8, 0xb8, 0x13, 0x13, 0xb8, "v_cmpx_class_f64"),
+ (0xa8, 0xa8, 0x12, 0x12, 0xa8, "v_cmp_class_f64", InstrClass.ValuDouble),
+ (0xb8, 0xb8, 0x13, 0x13, 0xb8, "v_cmpx_class_f64", InstrClass.ValuDouble),
}
-for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in VOPC_CLASS:
- opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False)
+for (gfx6, gfx7, gfx8, gfx9, gfx10, name, cls) in default_class(VOPC_CLASS, InstrClass.Valu32):
+ opcode(name, gfx7, gfx9, gfx10, Format.VOPC, cls, True, False)
COMPF = ["f", "lt", "eq", "le", "gt", "lg", "ge", "o", "u", "nge", "nlg", "ngt", "nle", "neq", "nlt", "tru"]
for i in range(8):
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x20+i, 0x20+i, 0xc8+i, "v_cmp_"+COMPF[i]+"_f16")
- opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False)
+ opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False)
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x30+i, 0x30+i, 0xd8+i, "v_cmpx_"+COMPF[i]+"_f16")
- opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False)
+ opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False)
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x28+i, 0x28+i, 0xe8+i, "v_cmp_"+COMPF[i+8]+"_f16")
- opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False)
+ opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False)
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x38+i, 0x38+i, 0xf8+i, "v_cmpx_"+COMPF[i+8]+"_f16")
- opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False)
+ opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False)
for i in range(16):
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x00+i, 0x00+i, 0x40+i, 0x40+i, 0x00+i, "v_cmp_"+COMPF[i]+"_f32")
- opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False)
+ opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False)
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x10+i, 0x10+i, 0x50+i, 0x50+i, 0x10+i, "v_cmpx_"+COMPF[i]+"_f32")
- opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False)
+ opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False)
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x20+i, 0x20+i, 0x60+i, 0x60+i, 0x20+i, "v_cmp_"+COMPF[i]+"_f64")
- opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False)
+ opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.ValuDouble, True, False)
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x30+i, 0x30+i, 0x70+i, 0x70+i, 0x30+i, "v_cmpx_"+COMPF[i]+"_f64")
- opcode(name, gfx7, gfx9, gfx10, Format.VOPC, True, False)
+ opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.ValuDouble, True, False)
# GFX_6_7
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x40+i, 0x40+i, -1, -1, -1, "v_cmps_"+COMPF[i]+"_f32")
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x50+i, 0x50+i, -1, -1, -1, "v_cmpsx_"+COMPF[i]+"_f32")
@@ -869,41 +899,41 @@ COMPI = ["f", "lt", "eq", "le", "gt", "lg", "ge", "tru"]
# GFX_8_9
for i in [0,7]: # only 0 and 7
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa0+i, 0xa0+i, -1, "v_cmp_"+COMPI[i]+"_i16")
- opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
+ opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb0+i, 0xb0+i, -1, "v_cmpx_"+COMPI[i]+"_i16")
- opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
+ opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa8+i, 0xa8+i, -1, "v_cmp_"+COMPI[i]+"_u16")
- opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
+ opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb8+i, 0xb8+i, -1, "v_cmpx_"+COMPI[i]+"_u16")
- opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
+ opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
for i in range(1, 7): # [1..6]
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa0+i, 0xa0+i, 0x88+i, "v_cmp_"+COMPI[i]+"_i16")
- opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
+ opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb0+i, 0xb0+i, 0x98+i, "v_cmpx_"+COMPI[i]+"_i16")
- opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
+ opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa8+i, 0xa8+i, 0xa8+i, "v_cmp_"+COMPI[i]+"_u16")
- opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
+ opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb8+i, 0xb8+i, 0xb8+i, "v_cmpx_"+COMPI[i]+"_u16")
- opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
+ opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
for i in range(8):
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x80+i, 0x80+i, 0xc0+i, 0xc0+i, 0x80+i, "v_cmp_"+COMPI[i]+"_i32")
- opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
+ opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x90+i, 0x90+i, 0xd0+i, 0xd0+i, 0x90+i, "v_cmpx_"+COMPI[i]+"_i32")
- opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
+ opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xa0+i, 0xa0+i, 0xe0+i, 0xe0+i, 0xa0+i, "v_cmp_"+COMPI[i]+"_i64")
- opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
+ opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu64)
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xb0+i, 0xb0+i, 0xf0+i, 0xf0+i, 0xb0+i, "v_cmpx_"+COMPI[i]+"_i64")
- opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
+ opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu64)
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xc0+i, 0xc0+i, 0xc8+i, 0xc8+i, 0xc0+i, "v_cmp_"+COMPI[i]+"_u32")
- opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
+ opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xd0+i, 0xd0+i, 0xd8+i, 0xd8+i, 0xd0+i, "v_cmpx_"+COMPI[i]+"_u32")
- opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
+ opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xe0+i, 0xe0+i, 0xe8+i, 0xe8+i, 0xe0+i, "v_cmp_"+COMPI[i]+"_u64")
- opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
+ opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu64)
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xf0+i, 0xf0+i, 0xf8+i, 0xf8+i, 0xf0+i, "v_cmpx_"+COMPI[i]+"_u64")
- opcode(name, gfx7, gfx9, gfx10, Format.VOPC)
+ opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu64)
# VOPP instructions: packed 16bit instructions - 1 or 2 inputs and 1 output
@@ -935,7 +965,7 @@ VOPP = {
# note that these are only supported on gfx9+ so we'll need to distinguish between gfx8 and gfx9 here
# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, -1, code, code, name)
for (code, name, modifiers) in VOPP:
- opcode(name, -1, code, code, Format.VOP3P, modifiers, modifiers)
+ opcode(name, -1, code, code, Format.VOP3P, InstrClass.Valu32, modifiers, modifiers)
# VINTERP instructions:
@@ -946,7 +976,7 @@ VINTRP = {
}
# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name)
for (code, name) in VINTRP:
- opcode(name, code, code, code, Format.VINTRP)
+ opcode(name, code, code, code, Format.VINTRP, InstrClass.Valu32)
# VOP3 instructions: 3 inputs, 1 output
# VOP3b instructions: have a unique scalar output, e.g. VOP2 with vcc out
@@ -962,8 +992,8 @@ VOP3 = {
(0x148, 0x148, 0x1c8, 0x1c8, 0x148, "v_bfe_u32", False, False),
(0x149, 0x149, 0x1c9, 0x1c9, 0x149, "v_bfe_i32", False, False),
(0x14a, 0x14a, 0x1ca, 0x1ca, 0x14a, "v_bfi_b32", False, False),
- (0x14b, 0x14b, 0x1cb, 0x1cb, 0x14b, "v_fma_f32", True, True),
- (0x14c, 0x14c, 0x1cc, 0x1cc, 0x14c, "v_fma_f64", True, True),
+ (0x14b, 0x14b, 0x1cb, 0x1cb, 0x14b, "v_fma_f32", True, True, InstrClass.ValuFma),
+ (0x14c, 0x14c, 0x1cc, 0x1cc, 0x14c, "v_fma_f64", True, True, InstrClass.ValuDouble),
(0x14d, 0x14d, 0x1cd, 0x1cd, 0x14d, "v_lerp_u8", False, False),
(0x14e, 0x14e, 0x1ce, 0x1ce, 0x14e, "v_alignbit_b32", False, False),
(0x14f, 0x14f, 0x1cf, 0x1cf, 0x14f, "v_alignbyte_b32", False, False),
@@ -984,36 +1014,36 @@ VOP3 = {
(0x15e, 0x15e, 0x1dd, 0x1dd, 0x15e, "v_cvt_pk_u8_f32", True, False),
(0x15f, 0x15f, 0x1de, 0x1de, 0x15f, "v_div_fixup_f32", True, True),
(0x160, 0x160, 0x1df, 0x1df, 0x160, "v_div_fixup_f64", True, True),
- (0x161, 0x161, -1, -1, -1, "v_lshl_b64", False, False),
- (0x162, 0x162, -1, -1, -1, "v_lshr_b64", False, False),
- (0x163, 0x163, -1, -1, -1, "v_ashr_i64", False, False),
- (0x164, 0x164, 0x280, 0x280, 0x164, "v_add_f64", True, True),
- (0x165, 0x165, 0x281, 0x281, 0x165, "v_mul_f64", True, True),
- (0x166, 0x166, 0x282, 0x282, 0x166, "v_min_f64", True, True),
- (0x167, 0x167, 0x283, 0x283, 0x167, "v_max_f64", True, True),
- (0x168, 0x168, 0x284, 0x284, 0x168, "v_ldexp_f64", False, True), # src1 can take input modifiers
- (0x169, 0x169, 0x285, 0x285, 0x169, "v_mul_lo_u32", False, False),
- (0x16a, 0x16a, 0x286, 0x286, 0x16a, "v_mul_hi_u32", False, False),
- (0x16b, 0x16b, 0x285, 0x285, 0x16b, "v_mul_lo_i32", False, False), # identical to v_mul_lo_u32
- (0x16c, 0x16c, 0x287, 0x287, 0x16c, "v_mul_hi_i32", False, False),
+ (0x161, 0x161, -1, -1, -1, "v_lshl_b64", False, False, InstrClass.Valu64),
+ (0x162, 0x162, -1, -1, -1, "v_lshr_b64", False, False, InstrClass.Valu64),
+ (0x163, 0x163, -1, -1, -1, "v_ashr_i64", False, False, InstrClass.Valu64),
+ (0x164, 0x164, 0x280, 0x280, 0x164, "v_add_f64", True, True, InstrClass.ValuDoubleAdd),
+ (0x165, 0x165, 0x281, 0x281, 0x165, "v_mul_f64", True, True, InstrClass.ValuDouble),
+ (0x166, 0x166, 0x282, 0x282, 0x166, "v_min_f64", True, True, InstrClass.ValuDouble),
+ (0x167, 0x167, 0x283, 0x283, 0x167, "v_max_f64", True, True, InstrClass.ValuDouble),
+ (0x168, 0x168, 0x284, 0x284, 0x168, "v_ldexp_f64", False, True, InstrClass.ValuDouble), # src1 can take input modifiers
+ (0x169, 0x169, 0x285, 0x285, 0x169, "v_mul_lo_u32", False, False, InstrClass.ValuQuarterRate32),
+ (0x16a, 0x16a, 0x286, 0x286, 0x16a, "v_mul_hi_u32", False, False, InstrClass.ValuQuarterRate32),
+ (0x16b, 0x16b, 0x285, 0x285, 0x16b, "v_mul_lo_i32", False, False, InstrClass.ValuQuarterRate32), # identical to v_mul_lo_u32
+ (0x16c, 0x16c, 0x287, 0x287, 0x16c, "v_mul_hi_i32", False, False, InstrClass.ValuQuarterRate32),
(0x16d, 0x16d, 0x1e0, 0x1e0, 0x16d, "v_div_scale_f32", True, True), # writes to VCC
- (0x16e, 0x16e, 0x1e1, 0x1e1, 0x16e, "v_div_scale_f64", True, True), # writes to VCC
+ (0x16e, 0x16e, 0x1e1, 0x1e1, 0x16e, "v_div_scale_f64", True, True, InstrClass.ValuDouble), # writes to VCC
(0x16f, 0x16f, 0x1e2, 0x1e2, 0x16f, "v_div_fmas_f32", True, True), # takes VCC input
- (0x170, 0x170, 0x1e3, 0x1e3, 0x170, "v_div_fmas_f64", True, True), # takes VCC input
+ (0x170, 0x170, 0x1e3, 0x1e3, 0x170, "v_div_fmas_f64", True, True, InstrClass.ValuDouble), # takes VCC input
(0x171, 0x171, 0x1e4, 0x1e4, 0x171, "v_msad_u8", False, False),
(0x172, 0x172, 0x1e5, 0x1e5, 0x172, "v_qsad_pk_u16_u8", False, False),
(0x172, -1, -1, -1, -1, "v_qsad_u8", False, False), # what's the difference?
(0x173, 0x173, 0x1e6, 0x1e6, 0x173, "v_mqsad_pk_u16_u8", False, False),
(0x173, -1, -1, -1, -1, "v_mqsad_u8", False, False), # what's the difference?
- (0x174, 0x174, 0x292, 0x292, 0x174, "v_trig_preop_f64", False, False),
+ (0x174, 0x174, 0x292, 0x292, 0x174, "v_trig_preop_f64", False, False, InstrClass.ValuDouble),
( -1, 0x175, 0x1e7, 0x1e7, 0x175, "v_mqsad_u32_u8", False, False),
- ( -1, 0x176, 0x1e8, 0x1e8, 0x176, "v_mad_u64_u32", False, False),
- ( -1, 0x177, 0x1e9, 0x1e9, 0x177, "v_mad_i64_i32", False, False),
+ ( -1, 0x176, 0x1e8, 0x1e8, 0x176, "v_mad_u64_u32", False, False, InstrClass.Valu64),
+ ( -1, 0x177, 0x1e9, 0x1e9, 0x177, "v_mad_i64_i32", False, False, InstrClass.Valu64),
( -1, -1, 0x1ea, 0x1ea, -1, "v_mad_legacy_f16", True, True),
( -1, -1, 0x1eb, 0x1eb, -1, "v_mad_legacy_u16", False, False),
( -1, -1, 0x1ec, 0x1ec, -1, "v_mad_legacy_i16", False, False),
( -1, -1, 0x1ed, 0x1ed, 0x344, "v_perm_b32", False, False),
- ( -1, -1, 0x1ee, 0x1ee, -1, "v_fma_legacy_f16", True, True),
+ ( -1, -1, 0x1ee, 0x1ee, -1, "v_fma_legacy_f16", True, True, InstrClass.ValuFma),
( -1, -1, 0x1ef, 0x1ef, -1, "v_div_fixup_legacy_f16", True, True),
(0x12c, 0x12c, 0x1f0, 0x1f0, -1, "v_cvt_pkaccum_u8_f32", True, False),
( -1, -1, -1, 0x1f1, 0x373, "v_mad_u32_u16", False, False),
@@ -1049,9 +1079,9 @@ VOP3 = {
(0x122, 0x122, 0x28b, 0x28b, 0x364, "v_bcnt_u32_b32", False, False),
(0x123, 0x123, 0x28c, 0x28c, 0x365, "v_mbcnt_lo_u32_b32", False, False),
( -1, -1, 0x28d, 0x28d, 0x366, "v_mbcnt_hi_u32_b32_e64", False, False),
- ( -1, -1, 0x28f, 0x28f, 0x2ff, "v_lshlrev_b64", False, False),
- ( -1, -1, 0x290, 0x290, 0x300, "v_lshrrev_b64", False, False),
- ( -1, -1, 0x291, 0x291, 0x301, "v_ashrrev_i64", False, False),
+ ( -1, -1, 0x28f, 0x28f, 0x2ff, "v_lshlrev_b64", False, False, InstrClass.Valu64),
+ ( -1, -1, 0x290, 0x290, 0x300, "v_lshrrev_b64", False, False, InstrClass.Valu64),
+ ( -1, -1, 0x291, 0x291, 0x301, "v_ashrrev_i64", False, False, InstrClass.Valu64),
(0x11e, 0x11e, 0x293, 0x293, 0x363, "v_bfm_b32", False, False),
(0x12d, 0x12d, 0x294, 0x294, 0x368, "v_cvt_pknorm_i16_f32", True, False),
(0x12e, 0x12e, 0x295, 0x295, 0x369, "v_cvt_pknorm_u16_f32", True, False),
@@ -1081,10 +1111,10 @@ VOP3 = {
( -1, -1, -1, -1, 0x307, "v_lshrrev_b16_e64", False, False),
( -1, -1, -1, -1, 0x308, "v_ashrrev_i16_e64", False, False),
( -1, -1, -1, -1, 0x314, "v_lshlrev_b16_e64", False, False),
- ( -1, -1, -1, -1, 0x140, "v_fma_legacy_f32", True, True), #GFX10.3+
+ ( -1, -1, -1, -1, 0x140, "v_fma_legacy_f32", True, True, InstrClass.ValuFma), #GFX10.3+
}
-for (gfx6, gfx7, gfx8, gfx9, gfx10, name, in_mod, out_mod) in VOP3:
- opcode(name, gfx7, gfx9, gfx10, Format.VOP3, in_mod, out_mod)
+for (gfx6, gfx7, gfx8, gfx9, gfx10, name, in_mod, out_mod, cls) in default_class(VOP3, InstrClass.Valu32):
+ opcode(name, gfx7, gfx9, gfx10, Format.VOP3, cls, in_mod, out_mod)
# DS instructions: 3 inputs (1 addr, 2 data), 1 output
@@ -1246,7 +1276,7 @@ DS = {
( -1, 0xff, 0xff, 0xff, 0xff, "ds_read_b128"),
}
for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in DS:
- opcode(name, gfx7, gfx9, gfx10, Format.DS)
+ opcode(name, gfx7, gfx9, gfx10, Format.DS, InstrClass.DS)
# MUBUF instructions:
MUBUF = {
@@ -1332,7 +1362,7 @@ MUBUF = {
( -1, -1, -1, -1, 0x34, "buffer_atomic_csub"), #GFX10.3+. seems glc must be set
}
for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in MUBUF:
- opcode(name, gfx7, gfx9, gfx10, Format.MUBUF, is_atomic = "atomic" in name)
+ opcode(name, gfx7, gfx9, gfx10, Format.MUBUF, InstrClass.VMem, is_atomic = "atomic" in name)
MTBUF = {
(0x00, 0x00, 0x00, 0x00, 0x00, "tbuffer_load_format_x"),
@@ -1353,7 +1383,7 @@ MTBUF = {
( -1, -1, 0x0f, 0x0f, 0x0f, "tbuffer_store_format_d16_xyzw"),
}
for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in MTBUF:
- opcode(name, gfx7, gfx9, gfx10, Format.MTBUF)
+ opcode(name, gfx7, gfx9, gfx10, Format.MTBUF, InstrClass.VMem)
IMAGE = {
@@ -1372,9 +1402,9 @@ IMAGE = {
}
# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name)
for (code, name) in IMAGE:
- opcode(name, code, code, code, Format.MIMG)
+ opcode(name, code, code, code, Format.MIMG, InstrClass.VMem)
-opcode("image_msaa_load", -1, -1, 0x80, Format.MIMG) #GFX10.3+
+opcode("image_msaa_load", -1, -1, 0x80, Format.MIMG, InstrClass.VMem) #GFX10.3+
IMAGE_ATOMIC = {
(0x0f, 0x0f, 0x10, "image_atomic_swap"),
@@ -1398,7 +1428,7 @@ IMAGE_ATOMIC = {
# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (gfx6, gfx7, gfx89, gfx89, ???, name)
# gfx7 and gfx10 opcodes are the same here
for (gfx6, gfx7, gfx89, name) in IMAGE_ATOMIC:
- opcode(name, gfx7, gfx89, gfx7, Format.MIMG, is_atomic = True)
+ opcode(name, gfx7, gfx89, gfx7, Format.MIMG, InstrClass.VMem, is_atomic = True)
IMAGE_SAMPLE = {
(0x20, "image_sample"),
@@ -1444,7 +1474,7 @@ IMAGE_SAMPLE = {
}
# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name)
for (code, name) in IMAGE_SAMPLE:
- opcode(name, code, code, code, Format.MIMG)
+ opcode(name, code, code, code, Format.MIMG, InstrClass.VMem)
IMAGE_GATHER4 = {
(0x40, "image_gather4"),
@@ -1477,7 +1507,7 @@ IMAGE_GATHER4 = {
}
# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name)
for (code, name) in IMAGE_GATHER4:
- opcode(name, code, code, code, Format.MIMG)
+ opcode(name, code, code, code, Format.MIMG, InstrClass.VMem)
FLAT = {
@@ -1538,7 +1568,7 @@ FLAT = {
(0x60, -1, 0x60, "flat_atomic_fmax_x2"),
}
for (gfx7, gfx8, gfx10, name) in FLAT:
- opcode(name, gfx7, gfx8, gfx10, Format.FLAT, is_atomic = "atomic" in name)
+ opcode(name, gfx7, gfx8, gfx10, Format.FLAT, InstrClass.VMem, is_atomic = "atomic" in name) #TODO: also LDS?
GLOBAL = {
#GFX8_9, GFX10
@@ -1601,7 +1631,7 @@ GLOBAL = {
( -1, 0x34, "global_atomic_csub"), #GFX10.3+. seems glc must be set
}
for (gfx8, gfx10, name) in GLOBAL:
- opcode(name, -1, gfx8, gfx10, Format.GLOBAL, is_atomic = "atomic" in name)
+ opcode(name, -1, gfx8, gfx10, Format.GLOBAL, InstrClass.VMem, is_atomic = "atomic" in name)
SCRATCH = {
#GFX8_9, GFX10
@@ -1629,7 +1659,7 @@ SCRATCH = {
(0x25, 0x25, "scratch_load_short_d16_hi"),
}
for (gfx8, gfx10, name) in SCRATCH:
- opcode(name, -1, gfx8, gfx10, Format.SCRATCH)
+ opcode(name, -1, gfx8, gfx10, Format.SCRATCH, InstrClass.VMem)
# check for duplicate opcode numbers
for ver in ['gfx9', 'gfx10']: