summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--backend/src/backend/gen_context.cpp7
-rw-r--r--backend/src/backend/gen_context.hpp1
-rw-r--r--backend/src/backend/gen_defs.hpp22
-rw-r--r--backend/src/backend/gen_encoder.cpp9
-rw-r--r--backend/src/backend/gen_encoder.hpp3
-rw-r--r--backend/src/backend/gen_insn_gen7_schedule_info.hxx1
-rw-r--r--backend/src/backend/gen_insn_selection.cpp33
-rw-r--r--backend/src/backend/gen_insn_selection.hxx1
-rw-r--r--backend/src/ir/instruction.cpp44
-rw-r--r--backend/src/ir/instruction.hpp23
-rw-r--r--backend/src/llvm/llvm_gen_backend.cpp6
-rw-r--r--backend/src/ocl_stdlib.h9
-rw-r--r--backend/src/ocl_stdlib_str.cpp9
13 files changed, 144 insertions, 24 deletions
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 5eace280..59e738c0 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -181,7 +181,12 @@ namespace gbe
}
void GenContext::emitWaitInstruction(const SelectionInstruction &insn) {
- NOT_IMPLEMENTED;
+ p->WAIT();
+ }
+
+ void GenContext::emitBarrierInstruction(const SelectionInstruction &insn) {
+ const GenRegister src = ra->genReg(insn.src(0));
+ p->BARRIER(src);
}
void GenContext::emitMathInstruction(const SelectionInstruction &insn) {
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 20d2f0de..d0355fe3 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -83,6 +83,7 @@ namespace gbe
void emitEotInstruction(const SelectionInstruction &insn);
void emitNoOpInstruction(const SelectionInstruction &insn);
void emitWaitInstruction(const SelectionInstruction &insn);
+ void emitBarrierInstruction(const SelectionInstruction &insn);
void emitMathInstruction(const SelectionInstruction &insn);
void emitUntypedReadInstruction(const SelectionInstruction &insn);
void emitUntypedWriteInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
index c3bb5084..008a2981 100644
--- a/backend/src/backend/gen_defs.hpp
+++ b/backend/src/backend/gen_defs.hpp
@@ -359,6 +359,15 @@ enum GenMessageTarget {
#define GEN_MAX_NUM_BUFFER_ENTRIES (1 << 27)
+/* Message gateway */
+#define GEN_OPEN_GATEWAY 0b000
+#define GEN_CLOSE_GATEWAY 0b001
+#define GEN_FORWARD_MSG 0b010
+#define GEN_GET_TIME_STAMP 0b011
+#define GEN_BARRIER_MSG 0b100
+#define GEN_UPDATE_GATEWAT_STATE 0b101
+#define GEN_MMIO_READ_WRITE 0b110
+
/////////////////////////////////////////////////////////////////////////////
// Gen EU structures
/////////////////////////////////////////////////////////////////////////////
@@ -612,6 +621,19 @@ struct GenInstruction
} generic_gen5;
struct {
+ uint32_t sub_function_id:3;
+ uint32_t pad0:11;
+ uint32_t ack_req:1;
+ uint32_t notify:2;
+ uint32_t pad1:2;
+ uint32_t header:1;
+ uint32_t response_length:5;
+ uint32_t msg_length:4;
+ uint32_t pad2:2;
+ uint32_t end_of_thread:1;
+ } msg_gateway;
+
+ struct {
uint32_t opcode:1;
uint32_t request:1;
uint32_t pad0:2;
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index 413e23cb..bed98120 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -683,6 +683,15 @@ namespace gbe
this->setSrc1(insn, GenRegister::immud(0x0));
}
+ void GenEncoder::BARRIER(GenRegister src) {
+ GenInstruction *insn = this->next(GEN_OPCODE_SEND);
+ this->setHeader(insn);
+ this->setDst(insn, GenRegister::null());
+ this->setSrc0(insn, src);
+ setMessageDescriptor(this, insn, GEN_SFID_MESSAGE_GATEWAY, 1, 0);
+ insn->bits3.msg_gateway.sub_function_id = GEN_BARRIER_MSG;
+ }
+
void GenEncoder::JMPI(GenRegister src) {
alu2(this, GEN_OPCODE_JMPI, GenRegister::ip(), GenRegister::ip(), src);
}
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index 5e96e38a..3ad52e71 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -116,7 +116,8 @@ namespace gbe
#undef ALU1
#undef ALU2
#undef ALU3
-
+ /*! Barrier message (to synchronize threads of a workgroup) */
+ void BARRIER(GenRegister src);
/*! Jump indexed instruction */
void JMPI(GenRegister src);
/*! Compare instructions */
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index 38b76b7a..d9daad60 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -9,6 +9,7 @@ DECL_GEN7_SCHEDULE(Eot, 20, 1, 1)
DECL_GEN7_SCHEDULE(NoOp, 20, 2, 2)
DECL_GEN7_SCHEDULE(Wait, 20, 2, 2)
DECL_GEN7_SCHEDULE(Math, 20, 4, 2)
+DECL_GEN7_SCHEDULE(Barrier, 80, 1, 1)
DECL_GEN7_SCHEDULE(UntypedRead, 80, 1, 1)
DECL_GEN7_SCHEDULE(UntypedWrite, 80, 1, 1)
DECL_GEN7_SCHEDULE(ByteGather, 80, 1, 1)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index e525a675..e8e898f7 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -429,7 +429,8 @@ namespace gbe
#undef ALU1
#undef ALU2
#undef ALU3
-
+ /*! Encode a barrier instruction */
+ void BARRIER(GenRegister src);
/*! Encode a label instruction */
void LABEL(ir::LabelIndex label);
/*! Jump indexed instruction */
@@ -668,6 +669,11 @@ namespace gbe
insn->index = uint16_t(index);
}
+ void Selection::Opaque::BARRIER(GenRegister src) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_BARRIER, 0, 1);
+ insn->src(0) = src;
+ }
+
void Selection::Opaque::JMPI(Reg src, ir::LabelIndex index) {
SelectionInstruction *insn = this->appendInsn(SEL_OP_JMPI, 0, 1);
insn->src(0) = src;
@@ -1451,7 +1457,6 @@ namespace gbe
}
DECL_NOT_IMPLEMENTED_ONE_TO_MANY(SampleInstruction);
DECL_NOT_IMPLEMENTED_ONE_TO_MANY(TypedWriteInstruction);
- DECL_NOT_IMPLEMENTED_ONE_TO_MANY(FenceInstruction);
#undef DECL_NOT_IMPLEMENTED_ONE_TO_MANY
/*! Load immediate pattern */
@@ -1483,6 +1488,28 @@ namespace gbe
DECL_CTOR(LoadImmInstruction, 1,1);
};
+ /*! Sync instruction */
+ DECL_PATTERN(SyncInstruction)
+ {
+ INLINE bool emitOne(Selection::Opaque &sel, const ir::SyncInstruction &insn) const
+ {
+ using namespace ir;
+ const uint32_t params = insn.getParameters();
+ GBE_ASSERTM(params == syncLocalBarrier,
+ "Only barrier(CLK_LOCAL_MEM_FENCE) is supported right now "
+ "for the synchronization primitives");
+ const ir::Register reg = sel.reg(FAMILY_DWORD);
+
+ // A barrier is OK to start the thread synchronization *and* SLM fence
+ sel.BARRIER(GenRegister::f8grf(reg));
+ // Now we wait for the other threads
+ sel.WAIT();
+ return true;
+ }
+
+ DECL_CTOR(SyncInstruction, 1,1);
+ };
+
INLINE uint32_t getByteScatterGatherSize(ir::Type type) {
using namespace ir;
switch (type) {
@@ -1973,7 +2000,7 @@ namespace gbe
this->insert<BinaryInstructionPattern>();
this->insert<SampleInstructionPattern>();
this->insert<TypedWriteInstructionPattern>();
- this->insert<FenceInstructionPattern>();
+ this->insert<SyncInstructionPattern>();
this->insert<LoadImmInstructionPattern>();
this->insert<LoadInstructionPattern>();
this->insert<StoreInstructionPattern>();
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index bf0bb833..2d14e21c 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -27,6 +27,7 @@ DECL_SELECTION_IR(EOT, EotInstruction)
DECL_SELECTION_IR(NOP, NoOpInstruction)
DECL_SELECTION_IR(WAIT, WaitInstruction)
DECL_SELECTION_IR(MATH, MathInstruction)
+DECL_SELECTION_IR(BARRIER, BarrierInstruction)
DECL_SELECTION_IR(UNTYPED_READ, UntypedReadInstruction)
DECL_SELECTION_IR(UNTYPED_WRITE, UntypedWriteInstruction)
DECL_SELECTION_IR(BYTE_GATHER, ByteGatherInstruction)
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 74124575..2d5a3f1a 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -436,16 +436,14 @@ namespace ir {
public NDstPolicy<SyncInstruction, 0>
{
public:
- INLINE SyncInstruction(AddressSpace addrSpace) {
+ INLINE SyncInstruction(uint32_t parameters) {
this->opcode = OP_SYNC;
- this->addrSpace = addrSpace;
- }
- bool wellFormed(const Function &fn, std::string &why) const;
- INLINE void out(std::ostream &out, const Function &fn) const {
- this->outOpcode(out);
- out << "." << addrSpace;
+ this->parameters = parameters;
}
- AddressSpace addrSpace; //!< The loads and stores to order
+ INLINE uint32_t getParameters(void) const { return this->parameters; }
+ INLINE bool wellFormed(const Function &fn, std::string &why) const;
+ INLINE void out(std::ostream &out, const Function &fn) const;
+ uint32_t parameters;
Register dst[], src[];
};
@@ -717,9 +715,20 @@ namespace ir {
return true;
}
- // Nothing can go wrong here
INLINE bool SyncInstruction::wellFormed(const Function &fn, std::string &whyNot) const
{
+ const uint32_t maxParams = SYNC_WORKGROUP_EXEC |
+ SYNC_LOCAL_READ_FENCE |
+ SYNC_LOCAL_WRITE_FENCE |
+ SYNC_GLOBAL_READ_FENCE |
+ SYNC_GLOBAL_WRITE_FENCE;
+ if (UNLIKELY(this->parameters > maxParams)) {
+ whyNot = "Invalid parameters for sync instruction";
+ return false;
+ } else if (UNLIKELY(this->parameters == 0)) {
+ whyNot = "Missing parameters for sync instruction";
+ return false;
+ }
return true;
}
@@ -820,6 +829,18 @@ namespace ir {
fn.outImmediate(out, immediateIndex);
}
+ static const char *syncStr[syncFieldNum] = {
+ "workgroup", "local_read", "local_write", "global_read", "global_write"
+ };
+
+ INLINE void SyncInstruction::out(std::ostream &out, const Function &fn) const {
+ this->outOpcode(out);
+ for (uint32_t field = 0; field < syncFieldNum; ++field)
+ if (this->parameters & (1 << field))
+ out << "." << syncStr[field];
+ }
+
+
} /* namespace internal */
std::ostream &operator<< (std::ostream &out, AddressSpace addrSpace) {
@@ -1083,6 +1104,7 @@ DECL_MEM_FN(LoadImmInstruction, Type, getType(void), getType())
DECL_MEM_FN(LabelInstruction, LabelIndex, getLabelIndex(void), getLabelIndex())
DECL_MEM_FN(BranchInstruction, bool, isPredicated(void), isPredicated())
DECL_MEM_FN(BranchInstruction, LabelIndex, getLabelIndex(void), getLabelIndex())
+DECL_MEM_FN(SyncInstruction, uint32_t, getParameters(void), getParameters())
#undef DECL_MEM_FN
@@ -1204,8 +1226,8 @@ DECL_MEM_FN(BranchInstruction, LabelIndex, getLabelIndex(void), getLabelIndex())
#undef DECL_EMIT_FUNCTION
// FENCE
- Instruction FENCE(AddressSpace space) {
- return internal::SyncInstruction(space).convert();
+ Instruction SYNC(uint32_t parameters) {
+ return internal::SyncInstruction(parameters).convert();
}
// LABEL
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 70889243..11008006 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -347,11 +347,32 @@ namespace ir {
static bool isClassOf(const Instruction &insn);
};
+ /*! Mapped to OpenCL (mem_fence, read_mem_fence, write_mem_fence, barrier) */
+ enum {
+ SYNC_WORKGROUP_EXEC = 1<<0,
+ SYNC_LOCAL_READ_FENCE = 1<<1,
+ SYNC_LOCAL_WRITE_FENCE = 1<<2,
+ SYNC_GLOBAL_READ_FENCE = 1<<3,
+ SYNC_GLOBAL_WRITE_FENCE = 1<<4,
+ SYNC_INVALID = 1<<5
+ };
+
+ /*! 5 bits to encode all possible synchronization capablities */
+ static const uint32_t syncFieldNum = 5u;
+
+ /*! When barrier(CLK_LOCAL_MEM_FENCE) is issued */
+ static const uint32_t syncLocalBarrier = SYNC_WORKGROUP_EXEC |SYNC_LOCAL_WRITE_FENCE | SYNC_LOCAL_READ_FENCE;
+
+ /*! When barrier(CLK_GLOBAL_MEM_FENCE) is issued */
+ static const uint32_t syncGlobalBarrier = SYNC_WORKGROUP_EXEC | SYNC_GLOBAL_WRITE_FENCE | SYNC_GLOBAL_READ_FENCE;
+
/*! Sync instructions are used to order loads and stores for a given memory
* space and/or to serialize threads at a given point in the program
*/
class SyncInstruction : public Instruction {
public:
+ /*! Get the parameters (bitfields) of the sync instructions (see above) */
+ uint32_t getParameters(void) const;
/*! Return true if the given instruction is an instance of this class */
static bool isClassOf(const Instruction &insn);
};
@@ -484,6 +505,8 @@ namespace ir {
Instruction STORE(Type type, Tuple src, Register offset, AddressSpace space, uint32_t valueNum, bool dwAligned);
/*! loadi.type dst value */
Instruction LOADI(Type type, Register dst, ImmediateIndex value);
+ /*! sync.params... (see Sync instruction) */
+ Instruction SYNC(uint32_t parameters);
/*! typed write TODO */
Instruction TYPED_WRITE(void);
/*! sample TODO */
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 91e6d3b8..7e6fd7bf 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -1656,6 +1656,9 @@ namespace gbe
break;
case GEN_OCL_FORCE_SIMD8:
case GEN_OCL_FORCE_SIMD16:
+ case GEN_OCL_LBARRIER:
+ case GEN_OCL_GBARRIER:
+ case GEN_OCL_LGBARRIER:
break;
default:
GBE_ASSERTM(false, "Function call are not supported yet");
@@ -1757,6 +1760,9 @@ namespace gbe
case GEN_OCL_RNDD: this->emitUnaryCallInst(I,CS,ir::OP_RNDD); break;
case GEN_OCL_FORCE_SIMD8: ctx.setSimdWidth(8); break;
case GEN_OCL_FORCE_SIMD16: ctx.setSimdWidth(16); break;
+ case GEN_OCL_LBARRIER: ctx.SYNC(ir::syncLocalBarrier); break;
+ case GEN_OCL_GBARRIER: ctx.SYNC(ir::syncGlobalBarrier); break;
+ case GEN_OCL_LGBARRIER: ctx.SYNC(ir::syncLocalBarrier | ir::syncGlobalBarrier); break;
default: break;
}
}
diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h
index bf22af5f..196446fd 100644
--- a/backend/src/ocl_stdlib.h
+++ b/backend/src/ocl_stdlib.h
@@ -425,12 +425,13 @@ INLINE OVERLOADABLE float16 mix(float16 x, float16 y, float a) { return mix(x,y,
#define CLK_LOCAL_MEM_FENCE (1 << 0)
#define CLK_GLOBAL_MEM_FENCE (1 << 1)
-extern void __gen_ocl_barrier_local(void);
-extern void __gen_ocl_barrier_global(void);
-extern void __gen_ocl_barrier_local_and_global(void);
+void __gen_ocl_barrier_local(void);
+void __gen_ocl_barrier_global(void);
+void __gen_ocl_barrier_local_and_global(void);
+typedef uint cl_mem_fence_flags;
INLINE void barrier(cl_mem_fence_flags flags) {
- if (flags == CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE)
+ if (flags == (CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE))
__gen_ocl_barrier_local_and_global();
else if (flags == CLK_LOCAL_MEM_FENCE)
__gen_ocl_barrier_local();
diff --git a/backend/src/ocl_stdlib_str.cpp b/backend/src/ocl_stdlib_str.cpp
index 9828d962..abb623bc 100644
--- a/backend/src/ocl_stdlib_str.cpp
+++ b/backend/src/ocl_stdlib_str.cpp
@@ -428,12 +428,13 @@ std::string ocl_stdlib_str =
"#define CLK_LOCAL_MEM_FENCE (1 << 0)\n"
"#define CLK_GLOBAL_MEM_FENCE (1 << 1)\n"
"\n"
-"extern void __gen_ocl_barrier_local(void);\n"
-"extern void __gen_ocl_barrier_global(void);\n"
-"extern void __gen_ocl_barrier_local_and_global(void);\n"
+"void __gen_ocl_barrier_local(void);\n"
+"void __gen_ocl_barrier_global(void);\n"
+"void __gen_ocl_barrier_local_and_global(void);\n"
"\n"
+"typedef uint cl_mem_fence_flags;\n"
"INLINE void barrier(cl_mem_fence_flags flags) {\n"
-" if (flags == CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE)\n"
+" if (flags == (CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE))\n"
" __gen_ocl_barrier_local_and_global();\n"
" else if (flags == CLK_LOCAL_MEM_FENCE)\n"
" __gen_ocl_barrier_local();\n"