From c422c2fa2a8a6f4e98f2328ec4441867c373b500 Mon Sep 17 00:00:00 2001 From: Benjamin Segovia Date: Fri, 9 Nov 2012 20:50:58 -0800 Subject: Started the boiler plate for barrier (and fences) instructions Improved the comment on the instruction selection --- backend/src/backend/gen_insn_selection.cpp | 30 ++++++++++++++++++++++++++++++ backend/src/ir/instruction.cpp | 20 ++++++++++---------- backend/src/ir/instruction.hpp | 10 +++++----- backend/src/ir/instruction.hxx | 2 +- backend/src/llvm/llvm_gen_ocl_function.hxx | 5 +++++ backend/src/ocl_stdlib.h | 19 +++++++++++++++++++ backend/src/ocl_stdlib_str.cpp | 19 +++++++++++++++++++ 7 files changed, 89 insertions(+), 16 deletions(-) (limited to 'backend/src') diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 2e091e9d..e525a675 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -57,6 +57,27 @@ * of the pattern): this creates a library of patterns that may be used in * run-time. * + * Predication / Masking and CFG linearization + * =========================================== + * + * The current version is based on an unfortunate choice. Basically, the problem + * to solve is how to map unstructured branches (i.e. regular gotos) onto Gen. + * Gen has a native support for structured branches (if/else/endif/while...) but + * nothing really native for unstructured branches. + * + * The idea we implemented is simple. We stole one flag register (here f0.0) to + * mask all the instructions (and only activate the proper SIMD lanes) and we + * use the CFG linearization technique to properly handle the control flow. This + * is not really good for one particular reason: Gen instructions must use the + * *same* flag register for the predicates (used for masking) and the + * conditional modifier (used as a destination for CMP). This leads to extra + * complications with compare instructions and select instructions. Basically, + * we need to insert extra MOVs. + * + * Also, there is some extra kludge to handle the predicates for JMPI. + * + * See TODO for a better idea for branching and masking + * * TODO: * ===== * @@ -70,6 +91,15 @@ * matched with other instructions in the dominated block. This leads to the * interesting approach which consists in traversing the dominator tree in post * order + * + * About masking and branching, a much better idea (that I found later unfortunately) + * is to replace the use of the flag by uses of if/endif to enclose the basic + * block. So, instead of using predication, we use auto-masking. The very cool + * consequence is that we can reintegrate back the structured branches. + * Basically, we will be able to identify branches that can be mapped to + * structured branches and mix nicely unstructured branches (which will use + * jpmi, if/endif to mask the blocks) and structured branches (which are pretty + * fast) */ #include "backend/gen_insn_selection.hpp" diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index 31f0fd0a..74124575 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -430,14 +430,14 @@ namespace ir { Type type; //!< Type of the immediate }; - class ALIGNED_INSTRUCTION FenceInstruction : + class ALIGNED_INSTRUCTION SyncInstruction : public BasePolicy, - public NSrcPolicy, - public NDstPolicy + public NSrcPolicy, + public NDstPolicy { public: - INLINE FenceInstruction(AddressSpace addrSpace) { - this->opcode = OP_FENCE; + INLINE SyncInstruction(AddressSpace addrSpace) { + this->opcode = OP_SYNC; this->addrSpace = addrSpace; } bool wellFormed(const Function &fn, std::string &why) const; @@ -718,7 +718,7 @@ namespace ir { } // Nothing can go wrong here - INLINE bool FenceInstruction::wellFormed(const Function &fn, std::string &whyNot) const + INLINE bool SyncInstruction::wellFormed(const Function &fn, std::string &whyNot) const { return true; } @@ -915,9 +915,9 @@ START_INTROSPECTION(StoreInstruction) #include "ir/instruction.hxx" END_INTROSPECTION(StoreInstruction) -START_INTROSPECTION(FenceInstruction) +START_INTROSPECTION(SyncInstruction) #include "ir/instruction.hxx" -END_INTROSPECTION(FenceInstruction) +END_INTROSPECTION(SyncInstruction) START_INTROSPECTION(LabelInstruction) #include "ir/instruction.hxx" @@ -1056,7 +1056,7 @@ END_FUNCTION(Instruction, Register) bool Instruction::hasSideEffect(void) const { return opcode == OP_STORE || opcode == OP_TYPED_WRITE || - opcode == OP_FENCE; + opcode == OP_SYNC; } #define DECL_MEM_FN(CLASS, RET, PROTOTYPE, CALL) \ @@ -1205,7 +1205,7 @@ DECL_MEM_FN(BranchInstruction, LabelIndex, getLabelIndex(void), getLabelIndex()) // FENCE Instruction FENCE(AddressSpace space) { - return internal::FenceInstruction(space).convert(); + return internal::SyncInstruction(space).convert(); } // LABEL diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 7034ae42..70889243 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -347,10 +347,10 @@ namespace ir { static bool isClassOf(const Instruction &insn); }; - /*! Fence instructions are used to order loads and stores for a given memory - * space + /*! Sync instructions are used to order loads and stores for a given memory + * space and/or to serialize threads at a given point in the program */ - class FenceInstruction : public Instruction { + class SyncInstruction : public Instruction { public: /*! Return true if the given instruction is an instance of this class */ static bool isClassOf(const Instruction &insn); @@ -488,8 +488,8 @@ namespace ir { Instruction TYPED_WRITE(void); /*! sample TODO */ Instruction SAMPLE(void); - /*! fence.space */ - Instruction FENCE(AddressSpace space); + /*! sync.space */ + Instruction SYNC(AddressSpace space); /*! label labelIndex */ Instruction LABEL(LabelIndex labelIndex); diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index 57e6f03f..6aedc1f9 100644 --- a/backend/src/ir/instruction.hxx +++ b/backend/src/ir/instruction.hxx @@ -66,6 +66,6 @@ DECL_INSN(LOAD, LoadInstruction) DECL_INSN(STORE, StoreInstruction) DECL_INSN(TYPED_WRITE, TypedWriteInstruction) DECL_INSN(SAMPLE, SampleInstruction) -DECL_INSN(FENCE, FenceInstruction) +DECL_INSN(SYNC, SyncInstruction) DECL_INSN(LABEL, LabelInstruction) diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index 6de7810e..551db3cb 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -31,6 +31,11 @@ DECL_LLVM_GEN_FUNCTION(RNDE, __gen_ocl_rnde) DECL_LLVM_GEN_FUNCTION(RNDU, __gen_ocl_rndu) DECL_LLVM_GEN_FUNCTION(RNDD, __gen_ocl_rndd) +// Barrier function +DECL_LLVM_GEN_FUNCTION(LBARRIER, __gen_ocl_barrier_local) +DECL_LLVM_GEN_FUNCTION(GBARRIER, __gen_ocl_barrier_global) +DECL_LLVM_GEN_FUNCTION(LGBARRIER, __gen_ocl_barrier_local_and_global) + // To force SIMD8/16 compilation DECL_LLVM_GEN_FUNCTION(FORCE_SIMD8, __gen_ocl_force_simd8) DECL_LLVM_GEN_FUNCTION(FORCE_SIMD16, __gen_ocl_force_simd16) diff --git a/backend/src/ocl_stdlib.h b/backend/src/ocl_stdlib.h index 561599ec..bf22af5f 100644 --- a/backend/src/ocl_stdlib.h +++ b/backend/src/ocl_stdlib.h @@ -419,6 +419,25 @@ INLINE OVERLOADABLE float4 mix(float4 x, float4 y, float a) { return mix(x,y,(fl INLINE OVERLOADABLE float8 mix(float8 x, float8 y, float a) { return mix(x,y,(float8)(a));} INLINE OVERLOADABLE float16 mix(float16 x, float16 y, float a) { return mix(x,y,(float16)(a));} +///////////////////////////////////////////////////////////////////////////// +// Synchronization functions +///////////////////////////////////////////////////////////////////////////// +#define CLK_LOCAL_MEM_FENCE (1 << 0) +#define CLK_GLOBAL_MEM_FENCE (1 << 1) + +extern void __gen_ocl_barrier_local(void); +extern void __gen_ocl_barrier_global(void); +extern void __gen_ocl_barrier_local_and_global(void); + +INLINE void barrier(cl_mem_fence_flags flags) { + if (flags == CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE) + __gen_ocl_barrier_local_and_global(); + else if (flags == CLK_LOCAL_MEM_FENCE) + __gen_ocl_barrier_local(); + else if (flags == CLK_GLOBAL_MEM_FENCE) + __gen_ocl_barrier_global(); +} + ///////////////////////////////////////////////////////////////////////////// // Force the compilation to SIMD8 or SIMD16 ///////////////////////////////////////////////////////////////////////////// diff --git a/backend/src/ocl_stdlib_str.cpp b/backend/src/ocl_stdlib_str.cpp index 32291786..9828d962 100644 --- a/backend/src/ocl_stdlib_str.cpp +++ b/backend/src/ocl_stdlib_str.cpp @@ -423,6 +423,25 @@ std::string ocl_stdlib_str = "INLINE OVERLOADABLE float16 mix(float16 x, float16 y, float a) { return mix(x,y,(float16)(a));}\n" "\n" "/////////////////////////////////////////////////////////////////////////////\n" +"// Synchronization functions\n" +"/////////////////////////////////////////////////////////////////////////////\n" +"#define CLK_LOCAL_MEM_FENCE (1 << 0)\n" +"#define CLK_GLOBAL_MEM_FENCE (1 << 1)\n" +"\n" +"extern void __gen_ocl_barrier_local(void);\n" +"extern void __gen_ocl_barrier_global(void);\n" +"extern void __gen_ocl_barrier_local_and_global(void);\n" +"\n" +"INLINE void barrier(cl_mem_fence_flags flags) {\n" +" if (flags == CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE)\n" +" __gen_ocl_barrier_local_and_global();\n" +" else if (flags == CLK_LOCAL_MEM_FENCE)\n" +" __gen_ocl_barrier_local();\n" +" else if (flags == CLK_GLOBAL_MEM_FENCE)\n" +" __gen_ocl_barrier_global();\n" +"}\n" +"\n" +"/////////////////////////////////////////////////////////////////////////////\n" "// Force the compilation to SIMD8 or SIMD16\n" "/////////////////////////////////////////////////////////////////////////////\n" "\n" -- cgit v1.2.1