/* * Copyright © 2012 Intel Corporatin * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see . * * Author: Benjamin Segovia */ /** * \file gen_context.cpp * \author Benjamin Segovia */ #include "backend/gen_context.hpp" #include "backend/gen_program.hpp" #include "backend/gen_defs.hpp" #include "backend/gen_encoder.hpp" #include "backend/gen_insn_selection.hpp" #include "backend/gen_insn_scheduling.hpp" #include "backend/gen_reg_allocation.hpp" #include "backend/gen/gen_mesa_disasm.h" #include "ir/function.hpp" #include "sys/cvar.hpp" #include namespace gbe { /////////////////////////////////////////////////////////////////////////// // GenContext implementation /////////////////////////////////////////////////////////////////////////// GenContext::GenContext(const ir::Unit &unit, const std::string &name, bool limitRegisterPressure) : Context(unit, name), limitRegisterPressure(limitRegisterPressure) { this->p = GBE_NEW(GenEncoder, simdWidth, 7); // XXX handle more than Gen7 this->sel = GBE_NEW(Selection, *this); this->ra = GBE_NEW(GenRegAllocator, *this); } GenContext::~GenContext(void) { GBE_DELETE(this->ra); GBE_DELETE(this->sel); GBE_DELETE(this->p); } void GenContext::emitInstructionStream(void) { // Emit Gen ISA for (auto &block : *sel->blockList) for (auto &insn : block.insnList) { const uint32_t opcode = insn.opcode; p->push(); // no more virtual register here in that part of the code generation GBE_ASSERT(insn.state.physicalFlag); p->curr = insn.state; switch (opcode) { #define DECL_SELECTION_IR(OPCODE, FAMILY) \ case SEL_OP_##OPCODE: this->emit##FAMILY(insn); break; #include "backend/gen_insn_selection.hxx" #undef DECL_INSN } p->pop(); } } void GenContext::patchBranches(void) { using namespace ir; for (auto pair : branchPos2) { const LabelIndex label = pair.first; const int32_t insnID = pair.second; const int32_t targetID = labelPos.find(label)->second; p->patchJMPI(insnID, (targetID-insnID-1) * 2); } } void GenContext::emitStackPointer(void) { using namespace ir; // Only emit stack pointer computation if we use a stack if (kernel->getCurbeOffset(GBE_CURBE_STACK_POINTER, 0) <= 0) return; // Check that everything is consistent in the kernel code const uint32_t perLaneSize = kernel->getStackSize(); const uint32_t perThreadSize = perLaneSize * this->simdWidth; const int32_t offset = GEN_REG_SIZE + kernel->getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER); GBE_ASSERT(perLaneSize > 0); GBE_ASSERT(isPowerOf<2>(perLaneSize) == true); GBE_ASSERT(isPowerOf<2>(perThreadSize) == true); // Use shifts rather than muls which are limited to 32x16 bit sources const uint32_t perLaneShift = logi2(perLaneSize); const uint32_t perThreadShift = logi2(perThreadSize); const GenRegister selStatckPtr = this->simdWidth == 8 ? GenRegister::ud8grf(ir::ocl::stackptr) : GenRegister::ud16grf(ir::ocl::stackptr); const GenRegister stackptr = ra->genReg(selStatckPtr); const uint32_t nr = offset / GEN_REG_SIZE; const uint32_t subnr = (offset % GEN_REG_SIZE) / sizeof(uint32_t); const GenRegister bufferptr = GenRegister::ud1grf(nr, subnr); // We compute the per-lane stack pointer here p->push(); p->curr.execWidth = 1; p->curr.predicate = GEN_PREDICATE_NONE; p->SHR(GenRegister::ud1grf(126,0), GenRegister::ud1grf(0,5), GenRegister::immud(10)); p->curr.execWidth = this->simdWidth; p->SHL(stackptr, stackptr, GenRegister::immud(perLaneShift)); p->curr.execWidth = 1; p->SHL(GenRegister::ud1grf(126,0), GenRegister::ud1grf(126,0), GenRegister::immud(perThreadShift)); p->curr.execWidth = this->simdWidth; p->ADD(stackptr, stackptr, bufferptr); p->ADD(stackptr, stackptr, GenRegister::ud1grf(126,0)); p->pop(); } void GenContext::emitLabelInstruction(const SelectionInstruction &insn) { const ir::LabelIndex label(insn.index); this->labelPos.insert(std::make_pair(label, p->store.size())); } void GenContext::emitUnaryInstruction(const SelectionInstruction &insn) { const GenRegister dst = ra->genReg(insn.dst(0)); const GenRegister src = ra->genReg(insn.src(0)); switch (insn.opcode) { case SEL_OP_MOV: p->MOV(dst, src); break; case SEL_OP_NOT: p->NOT(dst, src); break; case SEL_OP_RNDD: p->RNDD(dst, src); break; case SEL_OP_RNDU: p->RNDU(dst, src); break; case SEL_OP_RNDE: p->RNDE(dst, src); break; case SEL_OP_RNDZ: p->RNDZ(dst, src); break; default: NOT_IMPLEMENTED; } } void GenContext::emitBinaryInstruction(const SelectionInstruction &insn) { const GenRegister dst = ra->genReg(insn.dst(0)); const GenRegister src0 = ra->genReg(insn.src(0)); const GenRegister src1 = ra->genReg(insn.src(1)); switch (insn.opcode) { case SEL_OP_SEL: p->SEL(dst, src0, src1); break; case SEL_OP_AND: p->AND(dst, src0, src1); break; case SEL_OP_OR: p->OR (dst, src0, src1); break; case SEL_OP_XOR: p->XOR(dst, src0, src1); break; case SEL_OP_SHR: p->SHR(dst, src0, src1); break; case SEL_OP_SHL: p->SHL(dst, src0, src1); break; case SEL_OP_RSR: p->RSR(dst, src0, src1); break; case SEL_OP_RSL: p->RSL(dst, src0, src1); break; case SEL_OP_ASR: p->ASR(dst, src0, src1); break; case SEL_OP_ADD: p->ADD(dst, src0, src1); break; case SEL_OP_MUL: p->MUL(dst, src0, src1); break; case SEL_OP_MACH: p->MACH(dst, src0, src1); break; default: NOT_IMPLEMENTED; } } void GenContext::emitTernaryInstruction(const SelectionInstruction &insn) { const GenRegister dst = ra->genReg(insn.dst(0)); const GenRegister src0 = ra->genReg(insn.src(0)); const GenRegister src1 = ra->genReg(insn.src(1)); const GenRegister src2 = ra->genReg(insn.src(2)); switch (insn.opcode) { case SEL_OP_MAD: p->MAD(dst, src0, src1, src2); break; default: NOT_IMPLEMENTED; } } void GenContext::emitNoOpInstruction(const SelectionInstruction &insn) { NOT_IMPLEMENTED; } void GenContext::emitWaitInstruction(const SelectionInstruction &insn) { p->WAIT(); } void GenContext::emitBarrierInstruction(const SelectionInstruction &insn) { const GenRegister src = ra->genReg(insn.src(0)); p->BARRIER(src); } void GenContext::emitMathInstruction(const SelectionInstruction &insn) { const GenRegister dst = ra->genReg(insn.dst(0)); const GenRegister src0 = ra->genReg(insn.src(0)); const uint32_t function = insn.extra.function; if (insn.srcNum == 2) { const GenRegister src1 = ra->genReg(insn.src(1)); p->MATH(dst, function, src0, src1); } else p->MATH(dst, function, src0); } void GenContext::emitCompareInstruction(const SelectionInstruction &insn) { const GenRegister src0 = ra->genReg(insn.src(0)); const GenRegister src1 = ra->genReg(insn.src(1)); if (insn.opcode == SEL_OP_CMP) p->CMP(insn.extra.function, src0, src1); else { GBE_ASSERT(insn.opcode == SEL_OP_SEL_CMP); const GenRegister dst = ra->genReg(insn.dst(0)); p->SEL_CMP(insn.extra.function, dst, src0, src1); } } void GenContext::emitJumpInstruction(const SelectionInstruction &insn) { const ir::LabelIndex label(insn.index); const GenRegister src = ra->genReg(insn.src(0)); this->branchPos2.push_back(std::make_pair(label, p->store.size())); p->JMPI(src); } void GenContext::emitEotInstruction(const SelectionInstruction &insn) { p->push(); p->curr.predicate = GEN_PREDICATE_NONE; p->curr.execWidth = 8; p->curr.noMask = 1; p->EOT(0); p->pop(); } void GenContext::emitUntypedReadInstruction(const SelectionInstruction &insn) { const GenRegister dst = ra->genReg(insn.dst(0)); const GenRegister src = ra->genReg(insn.src(0)); const uint32_t bti = insn.extra.function; const uint32_t elemNum = insn.extra.elem; p->UNTYPED_READ(dst, src, bti, elemNum); } void GenContext::emitUntypedWriteInstruction(const SelectionInstruction &insn) { const GenRegister src = ra->genReg(insn.src(0)); const uint32_t bti = insn.extra.function; const uint32_t elemNum = insn.extra.elem; p->UNTYPED_WRITE(src, bti, elemNum); } void GenContext::emitByteGatherInstruction(const SelectionInstruction &insn) { const GenRegister dst = ra->genReg(insn.dst(0)); const GenRegister src = ra->genReg(insn.src(0)); const uint32_t bti = insn.extra.function; const uint32_t elemSize = insn.extra.elem; p->BYTE_GATHER(dst, src, bti, elemSize); } void GenContext::emitByteScatterInstruction(const SelectionInstruction &insn) { const GenRegister src = ra->genReg(insn.src(0)); const uint32_t bti = insn.extra.function; const uint32_t elemSize = insn.extra.elem; p->BYTE_SCATTER(src, bti, elemSize); } BVAR(OCL_OUTPUT_ASM, false); bool GenContext::emitCode(void) { GenKernel *genKernel = static_cast(this->kernel); sel->select(); schedulePreRegAllocation(*this, *this->sel); if (UNLIKELY(ra->allocate(*this->sel) == false)) return false; schedulePostRegAllocation(*this, *this->sel); this->emitStackPointer(); this->emitInstructionStream(); this->patchBranches(); genKernel->insnNum = p->store.size(); genKernel->insns = GBE_NEW_ARRAY_NO_ARG(GenInstruction, genKernel->insnNum); std::memcpy(genKernel->insns, &p->store[0], genKernel->insnNum * sizeof(GenInstruction)); if (OCL_OUTPUT_ASM) for (uint32_t insnID = 0; insnID < genKernel->insnNum; ++insnID) gen_disasm(stdout, &p->store[insnID]); return true; } Kernel *GenContext::allocateKernel(void) { return GBE_NEW(GenKernel, name); } } /* namespace gbe */