From 87b5058d8f54ea3fb047aee320ec2c8a8022b73f Mon Sep 17 00:00:00 2001 From: Pan Xiuli Date: Wed, 13 Jul 2016 11:00:57 +0800 Subject: Backend: Add gen8+ instruction compact support Add three src instruction compact and one/two src instruction compact for gen8+ as well as decompact function for gen8+. Signed-off-by: Pan Xiuli Reviewed-by: Yang Rong --- backend/src/backend/gen8_encoder.cpp | 3 + backend/src/backend/gen_context.cpp | 8 +- backend/src/backend/gen_defs.hpp | 26 ++ backend/src/backend/gen_insn_compact.cpp | 425 ++++++++++++++++++++++++++----- backend/src/backend/gen_program.cpp | 8 +- backend/src/backend/gen_program.hpp | 2 +- 6 files changed, 403 insertions(+), 69 deletions(-) diff --git a/backend/src/backend/gen8_encoder.cpp b/backend/src/backend/gen8_encoder.cpp index d5059a8f..2a79e30e 100644 --- a/backend/src/backend/gen8_encoder.cpp +++ b/backend/src/backend/gen8_encoder.cpp @@ -37,6 +37,7 @@ static const uint32_t untypedRWMask[] = { namespace gbe { + extern bool compactAlu3(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src0, GenRegister src1, GenRegister src2); void Gen8Encoder::setHeader(GenNativeInstruction *insn) { Gen8NativeInstruction *gen8_insn = &insn->gen8_insn; if (this->curr.execWidth == 8) @@ -490,6 +491,8 @@ namespace gbe GenRegister src1, GenRegister src2) { + if(compactAlu3(this, opcode, dest, src0, src1, src2)) + return; GenNativeInstruction *insn = this->next(opcode); Gen8NativeInstruction *gen8_insn = &insn->gen8_insn; diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 8802efcb..8e9659ee 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -3888,6 +3888,12 @@ namespace gbe } void GenContext::outputAssembly(FILE *file, GenKernel* genKernel) { + /* get gen version for the instruction compact */ + uint32_t insn_version = 0; + if (IS_GEN7(deviceID) || IS_GEN75(deviceID)) + insn_version = 7; + else if (IS_GEN8(deviceID) || IS_GEN9(deviceID)) + insn_version = 8; fprintf(file, "%s's disassemble begin:\n", genKernel->getName()); ir::LabelIndex curLabel = (ir::LabelIndex)0; GenCompactInstruction * pCom = NULL; @@ -3910,7 +3916,7 @@ namespace gbe fprintf(file, " (%8i) ", insnID); pCom = (GenCompactInstruction*)&p->store[insnID]; if(pCom->bits1.cmpt_control == 1) { - decompactInstruction(pCom, &insn); + decompactInstruction(pCom, &insn, insn_version); gen_disasm(file, &insn, deviceID, 1); insnID++; } else { diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp index 66ae5b59..bcbb23f4 100644 --- a/backend/src/backend/gen_defs.hpp +++ b/backend/src/backend/gen_defs.hpp @@ -492,6 +492,32 @@ struct GenInstruction { union GenCompactInstruction { struct GenInstruction low; + /* Gen8+ src3 compact inst */ + struct { + struct { + uint32_t opcode:7; + uint32_t pad:1; + uint32_t control_index:2; + uint32_t src_index:2; + uint32_t dst_reg_nr:7; + uint32_t pad1:9; + uint32_t src0_rep_ctrl:1; + uint32_t compact_control:1; + uint32_t debug_control:1; + uint32_t saturate:1; + } bits1; + struct { + uint32_t src1_rep_ctrl:1; + uint32_t src2_rep_ctrl:1; + uint32_t src0_subnr:3; + uint32_t src1_subnr:3; + uint32_t src2_subnr:3; + uint32_t src0_reg_nr:7; + uint32_t src1_reg_nr:7; + uint32_t src2_reg_nr:7; + } bits2; + } src3Insn; + /* Normal src2 compact inst */ struct { struct { uint32_t opcode:7; diff --git a/backend/src/backend/gen_insn_compact.cpp b/backend/src/backend/gen_insn_compact.cpp index 7be33ba9..036d0572 100644 --- a/backend/src/backend/gen_insn_compact.cpp +++ b/backend/src/backend/gen_insn_compact.cpp @@ -62,6 +62,13 @@ namespace gbe { {0b0101000000100000000, 31}, }; + static compact_table_entry src3_control_table[] = { + {0b100000000110000000000001, 0}, + {0b000000000110000000000001, 1}, + {0b000000001000000000000001, 2}, + {0b000000001000000000100001, 3}, + }; + static compact_table_entry data_type_table[] = { {0b000000001000001100, 20}, {0b001000000000000001, 0}, @@ -97,6 +104,41 @@ namespace gbe { {0b001111111110111101, 28}, }; + static compact_table_entry gen8_data_type_table[] = { + {0b001000000000000000001, 0}, + {0b001000000000001000000, 1}, + {0b001000000000001000001, 2}, + {0b001000000000011000001, 3}, + {0b001000000000101011101, 4}, + {0b001000000010111011101, 5}, + {0b001000000011101000001, 6}, + {0b001000000011101000101, 7}, + {0b001000000011101011101, 8}, + {0b001000001000001000001, 9}, + {0b001000011000001000000, 10}, + {0b001000011000001000001, 11}, + {0b001000101000101000101, 12}, + {0b001000111000101000100, 13}, + {0b001000111000101000101, 14}, + {0b001011100011101011101, 15}, + {0b001011101011100011101, 16}, + {0b001011101011101011100, 17}, + {0b001011101011101011101, 18}, + {0b001011111011101011100, 19}, + {0b000000000010000001100, 20}, + {0b001000000000001011101, 21}, + {0b001000000000101000101, 22}, + {0b001000001000001000000, 23}, + {0b001000101000101000100, 24}, + {0b001000111000100000100, 25}, + {0b001001001001000001001, 26}, + {0b001010111011101011101, 27}, + {0b001011111011101011101, 28}, + {0b001001111001101001100, 29}, + {0b001001001001001001000, 30}, + {0b001001011001001001000, 31}, + }; + static compact_table_entry data_type_decompact[] = { {0b001000000000000001, 0}, {0b001000000000100000, 1}, @@ -224,6 +266,25 @@ namespace gbe { }; uint32_t data; }; + union Src3ControlBits{ + struct { + uint32_t access_mode:1; + uint32_t dependency_control:2; + uint32_t nibble_control:1; + uint32_t quarter_control:2; + uint32_t thread_control:2; + uint32_t predicate_control:4; + uint32_t predicate_inverse:1; + uint32_t execution_size:3; + uint32_t conditional_modifier:4; + uint32_t acc_wr_control:1; + uint32_t flag_sub_reg_nr:1; + uint32_t flag_reg_nr:1; + uint32_t mask_control:1; + }; + uint32_t data; + }; + union DataTypeBits{ struct { uint32_t dest_reg_file:2; @@ -238,6 +299,21 @@ namespace gbe { }; uint32_t data; }; + union Gen8DataTypeBits{ + struct { + uint32_t dest_reg_file:2; + uint32_t dest_reg_type:4; + uint32_t src0_reg_file:2; + uint32_t src0_reg_type:4; + uint32_t src1_reg_file:2; + uint32_t src1_reg_type:4; + uint32_t dest_horiz_stride:2; + uint32_t dest_address_mode:1; + uint32_t pad:11; + }; + uint32_t data; + }; + union SubRegBits { struct { uint32_t dest_subreg_nr:5; @@ -260,48 +336,157 @@ namespace gbe { uint32_t data; }; - void decompactInstruction(GenCompactInstruction * p, void *insn) { - Gen7NativeInstruction *pOut = (union Gen7NativeInstruction *) insn; + void decompactInstruction(GenCompactInstruction * p, void *insn, uint32_t insn_version) { GenNativeInstruction *pNative = (union GenNativeInstruction *) insn; - - memset(pOut, 0, sizeof(Gen7NativeInstruction)); - union ControlBits control_bits; - control_bits.data = control_table[(uint32_t)p->bits1.control_index].bit_pattern; - pNative->low.low = (uint32_t)p->bits1.opcode | ((control_bits.data & 0xffff) << 8); - pOut->header.destreg_or_condmod = p->bits1.destreg_or_condmod; - pOut->header.saturate = control_bits.saturate; - pOut->header.acc_wr_control = p->bits1.acc_wr_control; - pOut->header.cmpt_control = p->bits1.cmpt_control; - pOut->header.debug_control = p->bits1.debug_control; - - union DataTypeBits data_type_bits; - union SubRegBits subreg_bits; - union SrcRegBits src0_bits; - data_type_bits.data = data_type_decompact[(uint32_t)p->bits1.data_type_index].bit_pattern; - subreg_bits.data = subreg_table[(uint32_t)p->bits1.sub_reg_index].bit_pattern; - src0_bits.data = srcreg_table[p->bits1.src0_index_lo | p->bits2.src0_index_hi << 2].bit_pattern; - - pNative->low.high |= data_type_bits.data & 0x7fff; - pOut->bits1.da1.dest_horiz_stride = data_type_bits.dest_horiz_stride; - pOut->bits1.da1.dest_address_mode = data_type_bits.dest_address_mode; - pOut->bits1.da1.dest_reg_nr = p->bits2.dest_reg_nr; - pOut->bits1.da1.dest_subreg_nr = subreg_bits.dest_subreg_nr; - - pOut->bits2.da1.src0_subreg_nr = subreg_bits.src0_subreg_nr; - pOut->bits2.da1.src0_reg_nr = p->bits2.src0_reg_nr; - pNative->high.low |= (src0_bits.data << 13); - pOut->bits2.da1.flag_sub_reg_nr = control_bits.flag_sub_reg_nr; - pOut->bits2.da1.flag_reg_nr = control_bits.flag_reg_nr; - - if(data_type_bits.src1_reg_file == GEN_IMMEDIATE_VALUE) { - uint32_t imm = (uint32_t)p->bits2.src1_reg_nr | (p->bits2.src1_index<<8); - pOut->bits3.ud = imm & 0x1000 ? (imm | 0xfffff000) : imm; + Gen7NativeInstruction *pOut = (union Gen7NativeInstruction *) insn; + /* src3 compact insn */ + if(p->bits1.opcode == GEN_OPCODE_MAD || p->bits1.opcode == GEN_OPCODE_LRP) { +#define NO_SWIZZLE ((0<<0) | (1<<2) | (2<<4) | (3<<6)) + assert(insn_version == 8); + Gen8NativeInstruction *pOut = (union Gen8NativeInstruction *) insn; + memset(pOut, 0, sizeof(Gen8NativeInstruction)); + union Src3ControlBits control_bits; + control_bits.data = src3_control_table[(uint32_t)p->src3Insn.bits1.control_index].bit_pattern; + pOut->header.opcode = p->bits1.opcode; + + pOut->bits1.da1.flag_sub_reg_nr = control_bits.flag_sub_reg_nr; + pOut->bits1.da1.flag_reg_nr = control_bits.flag_reg_nr; + pOut->header.nib_ctrl = control_bits.nibble_control; + pOut->header.execution_size = control_bits.execution_size; + pOut->header.predicate_control = control_bits.predicate_control; + pOut->header.predicate_inverse = control_bits.predicate_inverse; + pOut->header.thread_control = control_bits.thread_control; + pOut->header.quarter_control = control_bits.quarter_control; + pOut->header.dependency_control = control_bits.dependency_control; + pOut->header.access_mode = control_bits.access_mode; + pOut->header.acc_wr_control = control_bits.acc_wr_control; + pOut->header.destreg_or_condmod = control_bits.conditional_modifier; + pOut->bits1.da1.mask_control= control_bits.mask_control; + pOut->header.cmpt_control = p->bits1.cmpt_control; + pOut->header.debug_control = p->bits1.debug_control; + pOut->header.saturate = p->src3Insn.bits1.saturate; + + /* dst */ + pOut->bits1.da3src.dest_reg_nr = p->src3Insn.bits1.dst_reg_nr; + pOut->bits1.da3src.dest_writemask = 0xf; + + pOut->bits2.da3src.src0_swizzle = NO_SWIZZLE; + pOut->bits2.da3src.src0_subreg_nr = p->src3Insn.bits2.src0_subnr; + pOut->bits2.da3src.src0_reg_nr = p->src3Insn.bits2.src0_reg_nr; + pOut->bits1.da3src.src0_negate = p->src3Insn.bits1.src_index == 1; + pOut->bits2.da3src.src0_rep_ctrl = p->src3Insn.bits1.src0_rep_ctrl; + + pOut->bits2.da3src.src1_swizzle = NO_SWIZZLE; + pOut->bits2.da3src.src1_subreg_nr_low = (p->src3Insn.bits2.src1_subnr) & 0x3; + pOut->bits3.da3src.src1_subreg_nr_high = (p->src3Insn.bits2.src1_subnr) >> 2; + pOut->bits2.da3src.src1_rep_ctrl = p->src3Insn.bits2.src1_rep_ctrl; + pOut->bits3.da3src.src1_reg_nr = p->src3Insn.bits2.src1_reg_nr; + pOut->bits1.da3src.src1_negate = p->src3Insn.bits1.src_index == 2; + + pOut->bits3.da3src.src2_swizzle = NO_SWIZZLE; + pOut->bits3.da3src.src2_subreg_nr = p->src3Insn.bits2.src2_subnr; + pOut->bits3.da3src.src2_rep_ctrl = p->src3Insn.bits2.src2_rep_ctrl; + pOut->bits3.da3src.src2_reg_nr = p->src3Insn.bits2.src2_reg_nr; + pOut->bits1.da3src.src2_negate = p->src3Insn.bits1.src_index == 3; +#undef NO_SWIZZLE } else { - union SrcRegBits src1_bits; - src1_bits.data = srcreg_table[p->bits2.src1_index].bit_pattern; - pOut->bits3.da1.src1_subreg_nr = subreg_bits.src1_subreg_nr; - pOut->bits3.da1.src1_reg_nr = p->bits2.src1_reg_nr; - pNative->high.high |= (src1_bits.data << 13); + if (insn_version == 7) { + memset(pOut, 0, sizeof(Gen7NativeInstruction)); + union ControlBits control_bits; + control_bits.data = control_table[(uint32_t)p->bits1.control_index].bit_pattern; + pNative->low.low = (uint32_t)p->bits1.opcode | ((control_bits.data & 0xffff) << 8); + pOut->header.destreg_or_condmod = p->bits1.destreg_or_condmod; + pOut->header.saturate = control_bits.saturate; + pOut->header.acc_wr_control = p->bits1.acc_wr_control; + pOut->header.cmpt_control = p->bits1.cmpt_control; + pOut->header.debug_control = p->bits1.debug_control; + + union DataTypeBits data_type_bits; + union SubRegBits subreg_bits; + union SrcRegBits src0_bits; + data_type_bits.data = data_type_decompact[(uint32_t)p->bits1.data_type_index].bit_pattern; + subreg_bits.data = subreg_table[(uint32_t)p->bits1.sub_reg_index].bit_pattern; + src0_bits.data = srcreg_table[p->bits1.src0_index_lo | p->bits2.src0_index_hi << 2].bit_pattern; + + pNative->low.high |= data_type_bits.data & 0x7fff; + pOut->bits1.da1.dest_horiz_stride = data_type_bits.dest_horiz_stride; + pOut->bits1.da1.dest_address_mode = data_type_bits.dest_address_mode; + pOut->bits1.da1.dest_reg_nr = p->bits2.dest_reg_nr; + pOut->bits1.da1.dest_subreg_nr = subreg_bits.dest_subreg_nr; + + pOut->bits2.da1.src0_subreg_nr = subreg_bits.src0_subreg_nr; + pOut->bits2.da1.src0_reg_nr = p->bits2.src0_reg_nr; + pNative->high.low |= (src0_bits.data << 13); + pOut->bits2.da1.flag_sub_reg_nr = control_bits.flag_sub_reg_nr; + pOut->bits2.da1.flag_reg_nr = control_bits.flag_reg_nr; + + if(data_type_bits.src1_reg_file == GEN_IMMEDIATE_VALUE) { + uint32_t imm = (uint32_t)p->bits2.src1_reg_nr | (p->bits2.src1_index<<8); + pOut->bits3.ud = imm & 0x1000 ? (imm | 0xfffff000) : imm; + } else { + union SrcRegBits src1_bits; + src1_bits.data = srcreg_table[p->bits2.src1_index].bit_pattern; + pOut->bits3.da1.src1_subreg_nr = subreg_bits.src1_subreg_nr; + pOut->bits3.da1.src1_reg_nr = p->bits2.src1_reg_nr; + pNative->high.high |= (src1_bits.data << 13); + } + } else if (insn_version == 8) { + Gen8NativeInstruction *pOut = (union Gen8NativeInstruction *) insn; + memset(pOut, 0, sizeof(Gen8NativeInstruction)); + union ControlBits control_bits; + control_bits.data = control_table[(uint32_t)p->bits1.control_index].bit_pattern; + pOut->header.opcode = p->bits1.opcode; + + pOut->bits1.da1.flag_sub_reg_nr = control_bits.flag_sub_reg_nr; + pOut->bits1.da1.flag_reg_nr = control_bits.flag_reg_nr; + pOut->header.saturate = control_bits.saturate; + pOut->header.execution_size= control_bits.execution_size; + pOut->header.predicate_control= control_bits.predicate_control; + pOut->header.predicate_inverse= control_bits.predicate_inverse; + pOut->header.thread_control= control_bits.thread_control; + pOut->header.quarter_control= control_bits.quarter_control; + pOut->header.dependency_control = control_bits.dependency_control; + pOut->header.access_mode= control_bits.access_mode; + pOut->bits1.da1.mask_control= control_bits.mask_control; + + pOut->header.destreg_or_condmod = p->bits1.destreg_or_condmod; + pOut->header.acc_wr_control = p->bits1.acc_wr_control; + pOut->header.cmpt_control = p->bits1.cmpt_control; + pOut->header.debug_control = p->bits1.debug_control; + + union Gen8DataTypeBits data_type_bits; + union SubRegBits subreg_bits; + union SrcRegBits src0_bits; + data_type_bits.data = gen8_data_type_table[(uint32_t)p->bits1.data_type_index].bit_pattern; + subreg_bits.data = subreg_table[(uint32_t)p->bits1.sub_reg_index].bit_pattern; + src0_bits.data = srcreg_table[p->bits1.src0_index_lo | p->bits2.src0_index_hi << 2].bit_pattern; + + pOut->bits1.da1.dest_reg_file = data_type_bits.dest_reg_file; + pOut->bits1.da1.dest_reg_type = data_type_bits.dest_reg_type; + pOut->bits1.da1.dest_horiz_stride = data_type_bits.dest_horiz_stride; + pOut->bits1.da1.dest_address_mode = data_type_bits.dest_address_mode; + pOut->bits1.da1.dest_reg_nr = p->bits2.dest_reg_nr; + pOut->bits1.da1.dest_subreg_nr = subreg_bits.dest_subreg_nr; + + pOut->bits1.da1.src0_reg_file = data_type_bits.src0_reg_file; + pOut->bits1.da1.src0_reg_type = data_type_bits.src0_reg_type; + pOut->bits2.da1.src0_subreg_nr = subreg_bits.src0_subreg_nr; + pOut->bits2.da1.src0_reg_nr = p->bits2.src0_reg_nr; + pNative->high.low |= (src0_bits.data << 13); + + pOut->bits2.da1.src1_reg_file = data_type_bits.src1_reg_file; + pOut->bits2.da1.src1_reg_type = data_type_bits.src1_reg_type; + if(data_type_bits.src1_reg_file == GEN_IMMEDIATE_VALUE) { + uint32_t imm = (uint32_t)p->bits2.src1_reg_nr | (p->bits2.src1_index<<8); + pOut->bits3.ud = imm & 0x1000 ? (imm | 0xfffff000) : imm; + } else { + union SrcRegBits src1_bits; + src1_bits.data = srcreg_table[p->bits2.src1_index].bit_pattern; + pOut->bits3.da1.src1_subreg_nr = subreg_bits.src1_subreg_nr; + pOut->bits3.da1.src1_reg_nr = p->bits2.src1_reg_nr; + pNative->high.high |= (src1_bits.data << 13); + } + } } } @@ -349,6 +534,50 @@ namespace gbe { return r->index; } + int compactControlBitsSrc3(GenEncoder *p, uint32_t quarter, uint32_t execWidth) { + + const GenInstructionState *s = &p->curr; + // some quick check + if(s->nibControl != 0) + return -1; + if(s->predicate != GEN_PREDICATE_NONE) + return -1; + if(s->inversePredicate != 0) + return -1; + if(s->flag == 1) + return -1; + if(s->subFlag != 0) + return -1; + + Src3ControlBits b; + b.data = 0; + + if (execWidth == 8) + b.execution_size = GEN_WIDTH_8; + else if (execWidth == 16) + b.execution_size = GEN_WIDTH_16; + else if (execWidth == 4) + return -1; + else if (execWidth == 1) + return -1; + else + NOT_IMPLEMENTED; + + b.mask_control = s->noMask; + b.quarter_control = quarter; + b.access_mode = 1; + + compact_table_entry key; + key.bit_pattern = b.data; + + compact_table_entry *r = (compact_table_entry *)bsearch(&key, src3_control_table, + sizeof(src3_control_table)/sizeof(compact_table_entry), sizeof(compact_table_entry), cmp_key); + if (r == NULL) + return -1; + return r->index; + } + + int compactDataTypeBits(GenEncoder *p, GenRegister *dst, GenRegister *src0, GenRegister *src1) { // compact does not support any indirect acess @@ -358,35 +587,65 @@ namespace gbe { if(src0->file == GEN_IMMEDIATE_VALUE) return -1; - DataTypeBits b; - b.data = 0; + compact_table_entry *r; + if(p->getCompactVersion() == 7) { + DataTypeBits b; + b.data = 0; - b.dest_horiz_stride = dst->hstride == GEN_HORIZONTAL_STRIDE_0 ? GEN_HORIZONTAL_STRIDE_1 : dst->hstride; - b.dest_address_mode = dst->address_mode; - b.dest_reg_file = dst->file; - b.dest_reg_type = dst->type; + b.dest_horiz_stride = dst->hstride == GEN_HORIZONTAL_STRIDE_0 ? GEN_HORIZONTAL_STRIDE_1 : dst->hstride; + b.dest_address_mode = dst->address_mode; + b.dest_reg_file = dst->file; + b.dest_reg_type = dst->type; - b.src0_reg_file = src0->file; - b.src0_reg_type = src0->type; + b.src0_reg_file = src0->file; + b.src0_reg_type = src0->type; - if(src1) { - b.src1_reg_type = src1->type; - b.src1_reg_file = src1->file; - } else { - // default to zero - b.src1_reg_type = 0; - b.src1_reg_file = 0; - } + if(src1) { + b.src1_reg_type = src1->type; + b.src1_reg_file = src1->file; + } else { + // default to zero + b.src1_reg_type = 0; + b.src1_reg_file = 0; + } - compact_table_entry key; - key.bit_pattern = b.data; + compact_table_entry key; + key.bit_pattern = b.data; + + r = (compact_table_entry *)bsearch(&key, data_type_table, sizeof(data_type_table)/sizeof(compact_table_entry), + sizeof(compact_table_entry), cmp_key); + } else if(p->getCompactVersion() == 8) { + Gen8DataTypeBits b; + b.data = 0; + + b.dest_horiz_stride = dst->hstride == GEN_HORIZONTAL_STRIDE_0 ? GEN_HORIZONTAL_STRIDE_1 : dst->hstride; + b.dest_address_mode = dst->address_mode; + b.dest_reg_file = dst->file; + b.dest_reg_type = dst->type; - compact_table_entry *r = (compact_table_entry *)bsearch(&key, data_type_table, - sizeof(data_type_table)/sizeof(compact_table_entry), sizeof(compact_table_entry), cmp_key); + b.src0_reg_file = src0->file; + b.src0_reg_type = src0->type; + + if(src1) { + b.src1_reg_type = src1->type; + b.src1_reg_file = src1->file; + } else { + // default to zero + b.src1_reg_type = 0; + b.src1_reg_file = 0; + } + + compact_table_entry key; + key.bit_pattern = b.data; + + r = (compact_table_entry *)bsearch(&key, gen8_data_type_table, sizeof(gen8_data_type_table)/sizeof(compact_table_entry), + sizeof(compact_table_entry), cmp_key); + } if (r == NULL) return -1; return r->index; } + int compactSubRegBits(GenEncoder *p, GenRegister *dst, GenRegister *src0, GenRegister *src1) { SubRegBits b; b.data = 0; @@ -440,9 +699,6 @@ namespace gbe { } bool compactAlu1(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src, uint32_t condition, bool split) { - if(p->getCompactVersion() == 8) - return false; - if(split) { // TODO support it return false; @@ -478,9 +734,6 @@ namespace gbe { } bool compactAlu2(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src0, GenRegister src1, uint32_t condition, bool split) { - if(p->getCompactVersion() == 8) - return false; - if(split) { // TODO support it return false; @@ -528,4 +781,44 @@ namespace gbe { return true; } } + + bool compactAlu3(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src0, GenRegister src1, GenRegister src2) + { + if(p->getCompactVersion() < 8) + return false; + if(opcode != GEN_OPCODE_MAD && opcode != GEN_OPCODE_LRP) + return false; + assert(src0.file == GEN_GENERAL_REGISTER_FILE); + assert(src0.address_mode == GEN_ADDRESS_DIRECT); + assert(src0.nr < 128); + assert(src1.file == GEN_GENERAL_REGISTER_FILE); + assert(src1.address_mode == GEN_ADDRESS_DIRECT); + assert(src1.nr < 128); + assert(src2.file == GEN_GENERAL_REGISTER_FILE); + assert(src2.address_mode == GEN_ADDRESS_DIRECT); + assert(src2.nr < 128); + + int control_index = compactControlBitsSrc3(p, p->curr.quarterControl, p->curr.execWidth); + if( control_index == -1) return false; + if( src0.negation + src1.negation + src2.negation > 1) + return false; + + GenCompactInstruction *insn = p->nextCompact(opcode); + insn->src3Insn.bits1.control_index = control_index; + insn->src3Insn.bits1.compact_control = 1; + insn->src3Insn.bits1.src_index = src0.negation ? 1 : (src1.negation ? 2: (src2.negation ? 3 : 0)); + insn->src3Insn.bits1.dst_reg_nr = dst.nr ; + insn->src3Insn.bits1.src0_rep_ctrl = src0.vstride == GEN_VERTICAL_STRIDE_0; + insn->src3Insn.bits1.saturate = p->curr.saturate; + /* bits2 */ + insn->src3Insn.bits2.src1_rep_ctrl = src1.vstride == GEN_VERTICAL_STRIDE_0; + insn->src3Insn.bits2.src2_rep_ctrl = src2.vstride == GEN_VERTICAL_STRIDE_0; + insn->src3Insn.bits2.src0_subnr = src0.subnr/4; + insn->src3Insn.bits2.src1_subnr = src1.subnr/4; + insn->src3Insn.bits2.src2_subnr = src2.subnr/4; + insn->src3Insn.bits2.src0_reg_nr = src0.nr; + insn->src3Insn.bits2.src1_reg_nr = src1.nr; + insn->src3Insn.bits2.src2_reg_nr = src2.nr; + return true; + } }; diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp index 88010c27..ade0157f 100644 --- a/backend/src/backend/gen_program.cpp +++ b/backend/src/backend/gen_program.cpp @@ -97,10 +97,16 @@ namespace gbe { GenCompactInstruction * pCom = NULL; GenInstruction insn[2]; + uint32_t insn_version = 0; + if (IS_GEN7(deviceID) || IS_GEN75(deviceID)) + insn_version = 7; + else if (IS_GEN8(deviceID) || IS_GEN9(deviceID)) + insn_version = 8; + for (uint32_t i = 0; i < insnNum;) { pCom = (GenCompactInstruction*)(insns+i); if(pCom->bits1.cmpt_control == 1) { - decompactInstruction(pCom, &insn); + decompactInstruction(pCom, &insn, insn_version); gen_disasm(f, &insn, deviceID, 1); i++; } else { diff --git a/backend/src/backend/gen_program.hpp b/backend/src/backend/gen_program.hpp index 076f617c..ff756e04 100644 --- a/backend/src/backend/gen_program.hpp +++ b/backend/src/backend/gen_program.hpp @@ -81,7 +81,7 @@ namespace gbe GBE_CLASS(GenProgram); }; /*! decompact GEN ASM if it is in compacted format */ - extern void decompactInstruction(union GenCompactInstruction *p, void *insn); + extern void decompactInstruction(union GenCompactInstruction *p, void *insn, uint32_t insn_version); } /* namespace gbe */ #endif /* __GBE_GEN_PROGRAM_HPP__ */ -- cgit v1.2.1