diff options
-rw-r--r-- | backend/src/backend/gen_context.cpp | 7 | ||||
-rw-r--r-- | backend/src/backend/gen_context.hpp | 1 | ||||
-rw-r--r-- | backend/src/backend/gen_defs.hpp | 22 | ||||
-rw-r--r-- | backend/src/backend/gen_encoder.cpp | 9 | ||||
-rw-r--r-- | backend/src/backend/gen_encoder.hpp | 3 | ||||
-rw-r--r-- | backend/src/backend/gen_insn_gen7_schedule_info.hxx | 1 | ||||
-rw-r--r-- | backend/src/backend/gen_insn_selection.cpp | 33 | ||||
-rw-r--r-- | backend/src/backend/gen_insn_selection.hxx | 1 | ||||
-rw-r--r-- | backend/src/ir/instruction.cpp | 44 | ||||
-rw-r--r-- | backend/src/ir/instruction.hpp | 23 | ||||
-rw-r--r-- | backend/src/llvm/llvm_gen_backend.cpp | 6 | ||||
-rw-r--r-- | backend/src/ocl_stdlib.h | 9 | ||||
-rw-r--r-- | backend/src/ocl_stdlib_str.cpp | 9 |
13 files changed, 144 insertions, 24 deletions
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 5eace280..59e738c0 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -181,7 +181,12 @@ namespace gbe } void GenContext::emitWaitInstruction(const SelectionInstruction &insn) { - NOT_IMPLEMENTED; + p->WAIT(); + } + + void GenContext::emitBarrierInstruction(const SelectionInstruction &insn) { + const GenRegister src = ra->genReg(insn.src(0)); + p->BARRIER(src); } void GenContext::emitMathInstruction(const SelectionInstruction &insn) { diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp index 20d2f0de..d0355fe3 100644 --- a/backend/src/backend/gen_context.hpp +++ b/backend/src/backend/gen_context.hpp @@ -83,6 +83,7 @@ namespace gbe void emitEotInstruction(const SelectionInstruction &insn); void emitNoOpInstruction(const SelectionInstruction &insn); void emitWaitInstruction(const SelectionInstruction &insn); + void emitBarrierInstruction(const SelectionInstruction &insn); void emitMathInstruction(const SelectionInstruction &insn); void emitUntypedReadInstruction(const SelectionInstruction &insn); void emitUntypedWriteInstruction(const SelectionInstruction &insn); diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp index c3bb5084..008a2981 100644 --- a/backend/src/backend/gen_defs.hpp +++ b/backend/src/backend/gen_defs.hpp @@ -359,6 +359,15 @@ enum GenMessageTarget { #define GEN_MAX_NUM_BUFFER_ENTRIES (1 << 27) +/* Message gateway */ +#define GEN_OPEN_GATEWAY 0b000 +#define GEN_CLOSE_GATEWAY 0b001 +#define GEN_FORWARD_MSG 0b010 +#define GEN_GET_TIME_STAMP 0b011 +#define GEN_BARRIER_MSG 0b100 +#define GEN_UPDATE_GATEWAT_STATE 0b101 +#define GEN_MMIO_READ_WRITE 0b110 + ///////////////////////////////////////////////////////////////////////////// // Gen EU structures ///////////////////////////////////////////////////////////////////////////// @@ -612,6 +621,19 @@ struct GenInstruction } generic_gen5; struct { + uint32_t sub_function_id:3; + uint32_t pad0:11; + uint32_t ack_req:1; + uint32_t notify:2; + uint32_t pad1:2; + uint32_t header:1; + uint32_t response_length:5; + uint32_t msg_length:4; + uint32_t pad2:2; + uint32_t end_of_thread:1; + } msg_gateway; + + struct { uint32_t opcode:1; uint32_t request:1; uint32_t pad0:2; diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp index 413e23cb..bed98120 100644 --- a/backend/src/backend/gen_encoder.cpp +++ b/backend/src/backend/gen_encoder.cpp @@ -683,6 +683,15 @@ namespace gbe this->setSrc1(insn, GenRegister::immud(0x0)); } + void GenEncoder::BARRIER(GenRegister src) { + GenInstruction *insn = this->next(GEN_OPCODE_SEND); + this->setHeader(insn); + this->setDst(insn, GenRegister::null()); + this->setSrc0(insn, src); + setMessageDescriptor(this, insn, GEN_SFID_MESSAGE_GATEWAY, 1, 0); + insn->bits3.msg_gateway.sub_function_id = GEN_BARRIER_MSG; + } + void GenEncoder::JMPI(GenRegister src) { alu2(this, GEN_OPCODE_JMPI, GenRegister::ip(), GenRegister::ip(), src); } diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp index 5e96e38a..3ad52e71 100644 --- a/backend/src/backend/gen_encoder.hpp +++ b/backend/src/backend/gen_encoder.hpp @@ -116,7 +116,8 @@ namespace gbe #undef ALU1 #undef ALU2 #undef ALU3 - + /*! Barrier message (to synchronize threads of a workgroup) */ + void BARRIER(GenRegister src); /*! Jump indexed instruction */ void JMPI(GenRegister src); /*! Compare instructions */ diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx index 38b76b7a..d9daad60 100644 --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx @@ -9,6 +9,7 @@ DECL_GEN7_SCHEDULE(Eot, 20, 1, 1) DECL_GEN7_SCHEDULE(NoOp, 20, 2, 2) DECL_GEN7_SCHEDULE(Wait, 20, 2, 2) DECL_GEN7_SCHEDULE(Math, 20, 4, 2) +DECL_GEN7_SCHEDULE(Barrier, 80, 1, 1) DECL_GEN7_SCHEDULE(UntypedRead, 80, 1, 1) DECL_GEN7_SCHEDULE(UntypedWrite, 80, 1, 1) DECL_GEN7_SCHEDULE(ByteGather, 80, 1, 1) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index e525a675..e8e898f7 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -429,7 +429,8 @@ namespace gbe #undef ALU1 #undef ALU2 #undef ALU3 - + /*! Encode a barrier instruction */ + void BARRIER(GenRegister src); /*! Encode a label instruction */ void LABEL(ir::LabelIndex label); /*! Jump indexed instruction */ @@ -668,6 +669,11 @@ namespace gbe insn->index = uint16_t(index); } + void Selection::Opaque::BARRIER(GenRegister src) { + SelectionInstruction *insn = this->appendInsn(SEL_OP_BARRIER, 0, 1); + insn->src(0) = src; + } + void Selection::Opaque::JMPI(Reg src, ir::LabelIndex index) { SelectionInstruction *insn = this->appendInsn(SEL_OP_JMPI, 0, 1); insn->src(0) = src; @@ -1451,7 +1457,6 @@ namespace gbe } DECL_NOT_IMPLEMENTED_ONE_TO_MANY(SampleInstruction); DECL_NOT_IMPLEMENTED_ONE_TO_MANY(TypedWriteInstruction); - DECL_NOT_IMPLEMENTED_ONE_TO_MANY(FenceInstruction); #undef DECL_NOT_IMPLEMENTED_ONE_TO_MANY /*! Load immediate pattern */ @@ -1483,6 +1488,28 @@ namespace gbe DECL_CTOR(LoadImmInstruction, 1,1); }; + /*! Sync instruction */ + DECL_PATTERN(SyncInstruction) + { + INLINE bool emitOne(Selection::Opaque &sel, const ir::SyncInstruction &insn) const + { + using namespace ir; + const uint32_t params = insn.getParameters(); + GBE_ASSERTM(params == syncLocalBarrier, + "Only barrier(CLK_LOCAL_MEM_FENCE) is supported right now " + "for the synchronization primitives"); + const ir::Register reg = sel.reg(FAMILY_DWORD); + + // A barrier is OK to start the thread synchronization *and* SLM fence + sel.BARRIER(GenRegister::f8grf(reg)); + // Now we wait for the other threads + sel.WAIT(); + return true; + } + + DECL_CTOR(SyncInstruction, 1,1); + }; + INLINE uint32_t getByteScatterGatherSize(ir::Type type) { using namespace ir; switch (type) { @@ -1973,7 +2000,7 @@ namespace gbe this->insert<BinaryInstructionPattern>(); this->insert<SampleInstructionPattern>(); this->insert<TypedWriteInstructionPattern>(); - this->insert<FenceInstructionPattern>(); + this->insert<SyncInstructionPattern>(); this->insert<LoadImmInstructionPattern>(); this->insert<LoadInstructionPattern>(); this->insert<StoreInstructionPattern>(); diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx index bf0bb833..2d14e21c 100644 --- a/backend/src/backend/gen_insn_selection.hxx +++ b/backend/src/backend/gen_insn_selection.hxx @@ -27,6 +27,7 @@ DECL_SELECTION_IR(EOT, EotInstruction) DECL_SELECTION_IR(NOP, NoOpInstruction) DECL_SELECTION_IR(WAIT, WaitInstruction) DECL_SELECTION_IR(MATH, MathInstruction) +DECL_SELECTION_IR(BARRIER, BarrierInstruction) DECL_SELECTION_IR(UNTYPED_READ, UntypedReadInstruction) DECL_SELECTION_IR(UNTYPED_WRITE, UntypedWriteInstruction) DECL_SELECTION_IR(BYTE_GATHER, ByteGatherInstruction) diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index 74124575..2d5a3f1a 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -436,16 +436,14 @@ namespace ir { public NDstPolicy<SyncInstruction, 0> { public: - INLINE SyncInstruction(AddressSpace addrSpace) { + INLINE SyncInstruction(uint32_t parameters) { this->opcode = OP_SYNC; - this->addrSpace = addrSpace; - } - bool wellFormed(const Function &fn, std::string &why) const; - INLINE void out(std::ostream &out, const Function &fn) const { - this->outOpcode(out); - out << "." << addrSpace; + this->parameters = parameters; } - AddressSpace addrSpace; //!< The loads and stores to order + INLINE uint32_t getParameters(void) const { return this->parameters; } + INLINE bool wellFormed(const Function &fn, std::string &why) const; + INLINE void out(std::ostream &out, const Function &fn) const; + uint32_t parameters; Register dst[], src[]; }; @@ -717,9 +715,20 @@ namespace ir { return true; } - // Nothing can go wrong here INLINE bool SyncInstruction::wellFormed(const Function &fn, std::string &whyNot) const { + const uint32_t maxParams = SYNC_WORKGROUP_EXEC | + SYNC_LOCAL_READ_FENCE | + SYNC_LOCAL_WRITE_FENCE | + SYNC_GLOBAL_READ_FENCE | + SYNC_GLOBAL_WRITE_FENCE; + if (UNLIKELY(this->parameters > maxParams)) { + whyNot = "Invalid parameters for sync instruction"; + return false; + } else if (UNLIKELY(this->parameters == 0)) { + whyNot = "Missing parameters for sync instruction"; + return false; + } return true; } @@ -820,6 +829,18 @@ namespace ir { fn.outImmediate(out, immediateIndex); } + static const char *syncStr[syncFieldNum] = { + "workgroup", "local_read", "local_write", "global_read", "global_write" + }; + + INLINE void SyncInstruction::out(std::ostream &out, const Function &fn) const { + this->outOpcode(out); + for (uint32_t field = 0; field < syncFieldNum; ++field) + if (this->parameters & (1 << field)) + out << "." << syncStr[field]; + } + + } /* namespace internal */ std::ostream &operator<< (std::ostream &out, AddressSpace addrSpace) { @@ -1083,6 +1104,7 @@ DECL_MEM_FN(LoadImmInstruction, Type, getType(void), getType()) DECL_MEM_FN(LabelInstruction, LabelIndex, getLabelIndex(void), getLabelIndex()) DECL_MEM_FN(BranchInstruction, bool, isPredicated(void), isPredicated()) DECL_MEM_FN(BranchInstruction, LabelIndex, getLabelIndex(void), getLabelIndex()) +DECL_MEM_FN(SyncInstruction, uint32_t, getParameters(void), getParameters()) #undef DECL_MEM_FN @@ -1204,8 +1226,8 @@ DECL_MEM_FN(BranchInstruction, LabelIndex, getLabelIndex(void), getLabelIndex()) #undef DECL_EMIT_FUNCTION // FENCE - Instruction FENCE(AddressSpace space) { - return internal::SyncInstruction(space).convert(); + Instruction SYNC(uint32_t parameters) { + return internal::SyncInstruction(parameters).convert(); } // LABEL diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 70889243..11008006 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -347,11 +347,32 @@ namespace ir { static bool isClassOf(const Instruction &insn); }; + /*! Mapped to OpenCL (mem_fence, read_mem_fence, write_mem_fence, barrier) */ + enum { + SYNC_WORKGROUP_EXEC = 1<<0, + SYNC_LOCAL_READ_FENCE = 1<<1, + SYNC_LOCAL_WRITE_FENCE = 1<<2, + SYNC_GLOBAL_READ_FENCE = 1<<3, + SYNC_GLOBAL_WRITE_FENCE = 1<<4, + SYNC_INVALID = 1<<5 + }; + + /*! 5 bits to encode all possible synchronization capablities */ + static const uint32_t syncFieldNum = 5u; + + /*! When barrier(CLK_LOCAL_MEM_FENCE) is issued */ + static const uint32_t syncLocalBarrier = SYNC_WORKGROUP_EXEC |SYNC_LOCAL_WRITE_FENCE | SYNC_LOCAL_READ_FENCE; + + /*! When barrier(CLK_GLOBAL_MEM_FENCE) is issued */ + static const uint32_t syncGlobalBarrier = SYNC_WORKGROUP_EXEC | SYNC_GLOBAL_WRITE_FENCE | SYNC_GLOBAL_READ_FENCE; + /*! Sync instructions are used to order loads and stores for a given memory * space and/or to serialize threads at a given point in the program */ class SyncInstruction : public Instruction { public: + /*! Get the parameters (bitfields) of the sync instructions (see above) */ + uint32_t getParameters(void) const; /*! Return true if the given instruction is an instance of this class */ static bool isClassOf(const Instruction &insn); }; @@ -484,6 +505,8 @@ namespace ir { Instruction STORE(Type type, Tuple src, Register offset, AddressSpace space, uint32_t valueNum, bool dwAligned); /*! loadi.type dst value */ Instruction LOADI(Type type, Register dst, ImmediateIndex value); + /*! sync.params... (see Sync instruction) */ + Instruction SYNC(uint32_t parameters); /*! typed write TODO */ Instruction TYPED_WRITE(void); /*! sample TODO */ diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 91e6d3b8..7e6fd7bf 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -1656,6 +1656,9 @@ namespace gbe break; case GEN_OCL_FORCE_SIMD8: case GEN_OCL_FORCE_SIMD16: + case GEN_OCL_LBARRIER: + case GEN_OCL_GBARRIER: + case GEN_OCL_LGBARRIER: break; default: GBE_ASSERTM(false, "Function call are not supported yet"); @@ -1757,6 +1760,9 @@ namespace gbe case GEN_OCL_RNDD: this->emitUnaryCallInst(I,CS,ir::OP_RNDD); break; case GEN_OCL_FORCE_SIMD8: ctx.setSimdWidth(8); break; case GEN_OCL_FORCE_SIMD16: ctx.setSimdWidth(16); break; + case GEN_OCL_LBARRIER: ctx.SYNC(ir::syncLocalBarrier); break; + case GEN_OCL_GBARRIER: ctx.SYNC(ir::syncGlobalBarrier); break; + case GEN_OCL_LGBARRIER: ctx.SYNC(ir::syncLocalBarrier | ir::syncGlobalBarrier); break; default: break; } } diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h index bf22af5f..196446fd 100644 --- a/backend/src/ocl_stdlib.h +++ b/backend/src/ocl_stdlib.h @@ -425,12 +425,13 @@ INLINE OVERLOADABLE float16 mix(float16 x, float16 y, float a) { return mix(x,y, #define CLK_LOCAL_MEM_FENCE (1 << 0) #define CLK_GLOBAL_MEM_FENCE (1 << 1) -extern void __gen_ocl_barrier_local(void); -extern void __gen_ocl_barrier_global(void); -extern void __gen_ocl_barrier_local_and_global(void); +void __gen_ocl_barrier_local(void); +void __gen_ocl_barrier_global(void); +void __gen_ocl_barrier_local_and_global(void); +typedef uint cl_mem_fence_flags; INLINE void barrier(cl_mem_fence_flags flags) { - if (flags == CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE) + if (flags == (CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE)) __gen_ocl_barrier_local_and_global(); else if (flags == CLK_LOCAL_MEM_FENCE) __gen_ocl_barrier_local(); diff --git a/backend/src/ocl_stdlib_str.cpp b/backend/src/ocl_stdlib_str.cpp index 9828d962..abb623bc 100644 --- a/backend/src/ocl_stdlib_str.cpp +++ b/backend/src/ocl_stdlib_str.cpp @@ -428,12 +428,13 @@ std::string ocl_stdlib_str = "#define CLK_LOCAL_MEM_FENCE (1 << 0)\n" "#define CLK_GLOBAL_MEM_FENCE (1 << 1)\n" "\n" -"extern void __gen_ocl_barrier_local(void);\n" -"extern void __gen_ocl_barrier_global(void);\n" -"extern void __gen_ocl_barrier_local_and_global(void);\n" +"void __gen_ocl_barrier_local(void);\n" +"void __gen_ocl_barrier_global(void);\n" +"void __gen_ocl_barrier_local_and_global(void);\n" "\n" +"typedef uint cl_mem_fence_flags;\n" "INLINE void barrier(cl_mem_fence_flags flags) {\n" -" if (flags == CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE)\n" +" if (flags == (CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE))\n" " __gen_ocl_barrier_local_and_global();\n" " else if (flags == CLK_LOCAL_MEM_FENCE)\n" " __gen_ocl_barrier_local();\n" |