diff options
author | Homer Hsing <homer.xing@intel.com> | 2013-10-10 10:13:41 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@linux.intel.com> | 2013-10-10 13:22:38 +0800 |
commit | 04efbda63e00bef950ac35dde9285b3002ba9ba4 (patch) | |
tree | a50955c21d1472ed231e009b7f0e591fdff2fd51 | |
parent | 1ad7e368cf9e1ac2f5256b70b20e1e46a06a92e0 (diff) | |
download | beignet-04efbda63e00bef950ac35dde9285b3002ba9ba4.tar.gz |
saturated conversion of native GPU data type, larger to narrower
This patch supports saturated conversion of
native GPU data type (char/short/int/float),
from a larger-range data type to a narrower-range data type.
For instance, convert_uchar_sat(int)
Several test cases are in this patch.
v2: add uint->int, int->uint
Signed-off-by: Homer Hsing <homer.xing@intel.com>
Reviewed-by: "Yang, Rong R" <rong.r.yang@intel.com>
-rw-r--r-- | backend/src/backend/gen_insn_selection.cpp | 13 | ||||
-rw-r--r-- | backend/src/ir/instruction.cpp | 10 | ||||
-rw-r--r-- | backend/src/ir/instruction.hpp | 2 | ||||
-rw-r--r-- | backend/src/ir/instruction.hxx | 1 | ||||
-rw-r--r-- | backend/src/llvm/llvm_gen_backend.cpp | 75 | ||||
-rw-r--r-- | backend/src/llvm/llvm_gen_ocl_function.hxx | 31 | ||||
-rw-r--r-- | backend/src/ocl_stdlib.tmpl.h | 30 | ||||
-rw-r--r-- | kernels/builtin_convert_sat.cl | 32 | ||||
-rw-r--r-- | utests/CMakeLists.txt | 1 | ||||
-rw-r--r-- | utests/builtin_convert_sat.cpp | 71 |
10 files changed, 259 insertions, 7 deletions
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index bd528859..cddd76ef 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -2538,15 +2538,20 @@ namespace gbe const GenRegister dst = sel.selReg(insn.getDst(0), dstType); const GenRegister src = sel.selReg(insn.getSrc(0), srcType); + if(insn.getOpcode() == ir::OP_SAT_CVT) { + sel.push(); + sel.curr.saturate = 1; + } + // We need two instructions to make the conversion if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && (srcFamily == FAMILY_DWORD || srcFamily == FAMILY_QWORD)) { GenRegister unpacked; if (dstFamily == FAMILY_WORD) { - const uint32_t type = TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W; + const uint32_t type = dstType == TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W; unpacked = GenRegister::unpacked_uw(sel.reg(FAMILY_DWORD)); unpacked = GenRegister::retype(unpacked, type); } else { - const uint32_t type = TYPE_U8 ? GEN_TYPE_UB : GEN_TYPE_B; + const uint32_t type = dstType == TYPE_U8 ? GEN_TYPE_UB : GEN_TYPE_B; unpacked = GenRegister::unpacked_ub(sel.reg(FAMILY_DWORD)); unpacked = GenRegister::retype(unpacked, type); } @@ -2581,6 +2586,10 @@ namespace gbe } } else sel.MOV(dst, src); + + if(insn.getOpcode() == ir::OP_SAT_CVT) + sel.pop(); + return true; } DECL_CTOR(ConvertInstruction, 1, 1); diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index 7c6c6c6c..9b3e6998 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -252,9 +252,10 @@ namespace ir { ConvertInstruction(Type dstType, Type srcType, Register dst, - Register src) + Register src, + bool saturated=false) { - this->opcode = OP_CVT; + this->opcode = saturated ? OP_SAT_CVT : OP_CVT; this->dst[0] = dst; this->src[0] = src; this->dstType = dstType; @@ -1469,6 +1470,11 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType()) return internal::ConvertInstruction(dstType, srcType, dst, src).convert(); } + // saturated convert + Instruction SAT_CVT(Type dstType, Type srcType, Register dst, Register src) { + return internal::ConvertInstruction(dstType, srcType, dst, src, true).convert(); + } + // For all unary functions with given opcode Instruction ATOMIC(AtomicOps atomicOp, Register dst, AddressSpace space, Tuple src) { return internal::AtomicInstruction(atomicOp, dst, space, src).convert(); diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 27a34d1f..90c819b4 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -623,6 +623,8 @@ namespace ir { Instruction GT(Type type, Register dst, Register src0, Register src1); /*! cvt.{dstType <- srcType} dst src */ Instruction CVT(Type dstType, Type srcType, Register dst, Register src); + /*! sat_cvt.{dstType <- srcType} dst src */ + Instruction SAT_CVT(Type dstType, Type srcType, Register dst, Register src); /*! atomic dst addr.space {src1 {src2}} */ Instruction ATOMIC(AtomicOps opcode, Register dst, AddressSpace space, Tuple src); /*! bra labelIndex */ diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index 1a9f8675..cd60349f 100644 --- a/backend/src/ir/instruction.hxx +++ b/backend/src/ir/instruction.hxx @@ -61,6 +61,7 @@ DECL_INSN(LT, CompareInstruction) DECL_INSN(GE, CompareInstruction) DECL_INSN(GT, CompareInstruction) DECL_INSN(CVT, ConvertInstruction) +DECL_INSN(SAT_CVT, ConvertInstruction) DECL_INSN(ATOMIC, AtomicInstruction) DECL_INSN(BRA, BranchInstruction) DECL_INSN(RET, BranchInstruction) diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 8b73ac9e..5b6857d8 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -1910,6 +1910,30 @@ namespace gbe case GEN_OCL_I64RHADD: case GEN_OCL_I64_MAD_SAT: case GEN_OCL_I64_MAD_SATU: + case GEN_OCL_SAT_CONV_U8_TO_I8: + case GEN_OCL_SAT_CONV_I16_TO_I8: + case GEN_OCL_SAT_CONV_U16_TO_I8: + case GEN_OCL_SAT_CONV_I32_TO_I8: + case GEN_OCL_SAT_CONV_U32_TO_I8: + case GEN_OCL_SAT_CONV_F32_TO_I8: + case GEN_OCL_SAT_CONV_I8_TO_U8: + case GEN_OCL_SAT_CONV_I16_TO_U8: + case GEN_OCL_SAT_CONV_U16_TO_U8: + case GEN_OCL_SAT_CONV_I32_TO_U8: + case GEN_OCL_SAT_CONV_U32_TO_U8: + case GEN_OCL_SAT_CONV_F32_TO_U8: + case GEN_OCL_SAT_CONV_U16_TO_I16: + case GEN_OCL_SAT_CONV_I32_TO_I16: + case GEN_OCL_SAT_CONV_U32_TO_I16: + case GEN_OCL_SAT_CONV_F32_TO_I16: + case GEN_OCL_SAT_CONV_I16_TO_U16: + case GEN_OCL_SAT_CONV_I32_TO_U16: + case GEN_OCL_SAT_CONV_U32_TO_U16: + case GEN_OCL_SAT_CONV_F32_TO_U16: + case GEN_OCL_SAT_CONV_U32_TO_I32: + case GEN_OCL_SAT_CONV_F32_TO_I32: + case GEN_OCL_SAT_CONV_I32_TO_U32: + case GEN_OCL_SAT_CONV_F32_TO_U32: this->newRegister(&I); break; default: @@ -2415,6 +2439,57 @@ namespace gbe ctx.I64RHADD(ir::TYPE_U64, dst, src0, src1); break; } +#define DEF(DST_TYPE, SRC_TYPE) \ + { ctx.SAT_CVT(DST_TYPE, SRC_TYPE, getRegister(&I), getRegister(I.getOperand(0))); break; } + case GEN_OCL_SAT_CONV_U8_TO_I8: + DEF(ir::TYPE_S8, ir::TYPE_U8); + case GEN_OCL_SAT_CONV_I16_TO_I8: + DEF(ir::TYPE_S8, ir::TYPE_S16); + case GEN_OCL_SAT_CONV_U16_TO_I8: + DEF(ir::TYPE_S8, ir::TYPE_U16); + case GEN_OCL_SAT_CONV_I32_TO_I8: + DEF(ir::TYPE_S8, ir::TYPE_S32); + case GEN_OCL_SAT_CONV_U32_TO_I8: + DEF(ir::TYPE_S8, ir::TYPE_U32); + case GEN_OCL_SAT_CONV_F32_TO_I8: + DEF(ir::TYPE_S8, ir::TYPE_FLOAT); + case GEN_OCL_SAT_CONV_I8_TO_U8: + DEF(ir::TYPE_U8, ir::TYPE_S8); + case GEN_OCL_SAT_CONV_I16_TO_U8: + DEF(ir::TYPE_U8, ir::TYPE_S16); + case GEN_OCL_SAT_CONV_U16_TO_U8: + DEF(ir::TYPE_U8, ir::TYPE_U16); + case GEN_OCL_SAT_CONV_I32_TO_U8: + DEF(ir::TYPE_U8, ir::TYPE_S32); + case GEN_OCL_SAT_CONV_U32_TO_U8: + DEF(ir::TYPE_U8, ir::TYPE_U32); + case GEN_OCL_SAT_CONV_F32_TO_U8: + DEF(ir::TYPE_U8, ir::TYPE_FLOAT); + case GEN_OCL_SAT_CONV_U16_TO_I16: + DEF(ir::TYPE_S16, ir::TYPE_U16); + case GEN_OCL_SAT_CONV_I32_TO_I16: + DEF(ir::TYPE_S16, ir::TYPE_S32); + case GEN_OCL_SAT_CONV_U32_TO_I16: + DEF(ir::TYPE_S16, ir::TYPE_U32); + case GEN_OCL_SAT_CONV_F32_TO_I16: + DEF(ir::TYPE_S16, ir::TYPE_FLOAT); + case GEN_OCL_SAT_CONV_I16_TO_U16: + DEF(ir::TYPE_U16, ir::TYPE_S16); + case GEN_OCL_SAT_CONV_I32_TO_U16: + DEF(ir::TYPE_U16, ir::TYPE_S32); + case GEN_OCL_SAT_CONV_U32_TO_U16: + DEF(ir::TYPE_U16, ir::TYPE_U32); + case GEN_OCL_SAT_CONV_F32_TO_U16: + DEF(ir::TYPE_U16, ir::TYPE_FLOAT); + case GEN_OCL_SAT_CONV_U32_TO_I32: + DEF(ir::TYPE_S32, ir::TYPE_U32); + case GEN_OCL_SAT_CONV_F32_TO_I32: + DEF(ir::TYPE_S32, ir::TYPE_FLOAT); + case GEN_OCL_SAT_CONV_I32_TO_U32: + DEF(ir::TYPE_U32, ir::TYPE_S32); + case GEN_OCL_SAT_CONV_F32_TO_U32: + DEF(ir::TYPE_U32, ir::TYPE_FLOAT); +#undef DEF default: break; } } diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index 321fc4e7..3f44be88 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -146,3 +146,34 @@ DECL_LLVM_GEN_FUNCTION(UPSAMPLE_LONG, _Z18__gen_ocl_upsamplell) // get sampler info DECL_LLVM_GEN_FUNCTION(GET_SAMPLER_INFO, __gen_ocl_get_sampler_info) + +// saturate convert +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U8_TO_I8, _Z16convert_char_sath) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I16_TO_I8, _Z16convert_char_sats) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U16_TO_I8, _Z16convert_char_satt) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I32_TO_I8, _Z16convert_char_sati) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U32_TO_I8, _Z16convert_char_satj) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_I8, _Z16convert_char_satf) + +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I8_TO_U8, _Z17convert_uchar_satc) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I16_TO_U8, _Z17convert_uchar_sats) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U16_TO_U8, _Z17convert_uchar_satt) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I32_TO_U8, _Z17convert_uchar_sati) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U32_TO_U8, _Z17convert_uchar_satj) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_U8, _Z17convert_uchar_satf) + +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U16_TO_I16, _Z17convert_short_satt) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I32_TO_I16, _Z17convert_short_sati) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U32_TO_I16, _Z17convert_short_satj) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_I16, _Z17convert_short_satf) + +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I16_TO_U16, _Z18convert_ushort_sats) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I32_TO_U16, _Z18convert_ushort_sati) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U32_TO_U16, _Z18convert_ushort_satj) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_U16, _Z18convert_ushort_satf) + +DECL_LLVM_GEN_FUNCTION(SAT_CONV_U32_TO_I32, _Z15convert_int_satj) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_I32, _Z15convert_int_satf) + +DECL_LLVM_GEN_FUNCTION(SAT_CONV_I32_TO_U32, _Z16convert_uint_sati) +DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_U32, _Z16convert_uint_satf) diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index a4a1562f..66639147 100644 --- a/backend/src/ocl_stdlib.tmpl.h +++ b/backend/src/ocl_stdlib.tmpl.h @@ -219,9 +219,33 @@ UDEF(uint); UDEF(ulong); #undef UDEF -uchar INLINE_OVERLOADABLE convert_uchar_sat(float x) { - return add_sat((uchar)x, (uchar)0); -} +#define DEF(DSTTYPE, SRCTYPE) \ + OVERLOADABLE DSTTYPE convert_ ## DSTTYPE ## _sat(SRCTYPE x); +DEF(char, uchar); +DEF(char, short); +DEF(char, ushort); +DEF(char, int); +DEF(char, uint); +DEF(char, float); +DEF(uchar, char); +DEF(uchar, short); +DEF(uchar, ushort); +DEF(uchar, int); +DEF(uchar, uint); +DEF(uchar, float); +DEF(short, ushort); +DEF(short, int); +DEF(short, uint); +DEF(short, float); +DEF(ushort, short); +DEF(ushort, int); +DEF(ushort, uint); +DEF(ushort, float); +DEF(int, uint); +DEF(int, float); +DEF(uint, int); +DEF(uint, float); +#undef DEF INLINE_OVERLOADABLE int isfinite(float x) { return __builtin_isfinite(x); } INLINE_OVERLOADABLE int isinf(float x) { return __builtin_isinf(x); } diff --git a/kernels/builtin_convert_sat.cl b/kernels/builtin_convert_sat.cl new file mode 100644 index 00000000..281c8904 --- /dev/null +++ b/kernels/builtin_convert_sat.cl @@ -0,0 +1,32 @@ +#define DEF(DSTTYPE, SRCTYPE) \ + kernel void builtin_convert_ ## SRCTYPE ## _to_ ## DSTTYPE ## _sat(global SRCTYPE *src, global DSTTYPE *dst) { \ + int i = get_global_id(0); \ + dst[i] = convert_ ## DSTTYPE ## _sat(src[i]); \ +} + +DEF(char, uchar); +DEF(char, short); +DEF(char, ushort); +DEF(char, int); +DEF(char, uint); +DEF(char, float); +DEF(uchar, char); +DEF(uchar, short); +DEF(uchar, ushort); +DEF(uchar, int); +DEF(uchar, uint); +DEF(uchar, float); +DEF(short, ushort); +DEF(short, int); +DEF(short, uint); +DEF(short, float); +DEF(ushort, short); +DEF(ushort, int); +DEF(ushort, uint); +DEF(ushort, float); +DEF(int, uint); +DEF(int, float); +DEF(uint, int); +DEF(uint, float); +#undef DEF + diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index f18bd46f..31b85e38 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -126,6 +126,7 @@ set (utests_sources builtin_num_groups.cpp builtin_local_id.cpp builtin_acos_asin.cpp + builtin_convert_sat.cpp runtime_createcontext.cpp runtime_null_kernel_arg.cpp runtime_event.cpp diff --git a/utests/builtin_convert_sat.cpp b/utests/builtin_convert_sat.cpp new file mode 100644 index 00000000..e16ce16a --- /dev/null +++ b/utests/builtin_convert_sat.cpp @@ -0,0 +1,71 @@ +#include <cstdint> +#include "utest_helper.hpp" + +typedef unsigned char uchar; +typedef unsigned short ushort; + +int64_t my_rand(void) { + int64_t x = rand() - RAND_MAX/2; + int64_t y = rand() - RAND_MAX/2; + return x * y; +} + +#define DEF(DST_TYPE, SRC_TYPE, DST_MIN, DST_MAX) \ +void builtin_convert_ ## SRC_TYPE ## _to_ ## DST_TYPE ## _sat(void) \ +{ \ + const int n = 128; \ + OCL_CREATE_KERNEL_FROM_FILE("builtin_convert_sat", "builtin_convert_" # SRC_TYPE "_to_" # DST_TYPE "_sat"); \ + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(SRC_TYPE), NULL); \ + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(DST_TYPE), NULL); \ + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); \ + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); \ + globals[0] = n; \ + locals[0] = 16; \ + OCL_MAP_BUFFER(0); \ + for (int i = 0; i < n; i++) \ + ((SRC_TYPE *)buf_data[0])[i] = my_rand(); \ + OCL_UNMAP_BUFFER(0); \ + OCL_NDRANGE(1); \ + OCL_MAP_BUFFER(0); \ + OCL_MAP_BUFFER(1); \ + for (int i = 0; i < n; i++) { \ + SRC_TYPE src = ((SRC_TYPE *)buf_data[0])[i]; \ + DST_TYPE dst; \ + if ((double)src > (double)DST_MAX) \ + dst = DST_MAX; \ + else if ((double)src < (double)DST_MIN) \ + dst = DST_MIN; \ + else \ + dst = src; \ + OCL_ASSERT(((DST_TYPE *)buf_data[1])[i] == dst); \ + } \ + OCL_UNMAP_BUFFER(0); \ + OCL_UNMAP_BUFFER(1); \ +} \ +MAKE_UTEST_FROM_FUNCTION(builtin_convert_ ## SRC_TYPE ## _to_ ## DST_TYPE ## _sat); + +DEF(char, uchar, -128, 127); +DEF(char, short, -128, 127); +DEF(char, ushort, -128, 127); +DEF(char, int, -128, 127); +DEF(char, uint, -128, 127); +DEF(char, float, -128, 127); +DEF(uchar, char, 0, 255); +DEF(uchar, short, 0, 255); +DEF(uchar, ushort, 0, 255); +DEF(uchar, int, 0, 255); +DEF(uchar, uint, 0, 255); +DEF(uchar, float, 0, 255); +DEF(short, ushort, -32768, 32767); +DEF(short, int, -32768, 32767); +DEF(short, uint, -32768, 32767); +DEF(short, float, -32768, 32767); +DEF(ushort, short, 0, 65535); +DEF(ushort, int, 0, 65535); +DEF(ushort, uint, 0, 65535); +DEF(ushort, float, 0, 65535); +DEF(int, uint, -0x7FFFFFFF-1, 0x7FFFFFFF); +DEF(int, float, -0x7FFFFFFF-1, 0x7FFFFFFF); +DEF(uint, int, 0, 0xffffffffu); +DEF(uint, float, 0, 0xffffffffu); +#undef DEF |