summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHomer Hsing <homer.xing@intel.com>2013-10-10 10:13:41 +0800
committerZhigang Gong <zhigang.gong@linux.intel.com>2013-10-10 13:22:38 +0800
commit04efbda63e00bef950ac35dde9285b3002ba9ba4 (patch)
treea50955c21d1472ed231e009b7f0e591fdff2fd51
parent1ad7e368cf9e1ac2f5256b70b20e1e46a06a92e0 (diff)
downloadbeignet-04efbda63e00bef950ac35dde9285b3002ba9ba4.tar.gz
saturated conversion of native GPU data type, larger to narrower
This patch supports saturated conversion of native GPU data type (char/short/int/float), from a larger-range data type to a narrower-range data type. For instance, convert_uchar_sat(int) Several test cases are in this patch. v2: add uint->int, int->uint Signed-off-by: Homer Hsing <homer.xing@intel.com> Reviewed-by: "Yang, Rong R" <rong.r.yang@intel.com>
-rw-r--r--backend/src/backend/gen_insn_selection.cpp13
-rw-r--r--backend/src/ir/instruction.cpp10
-rw-r--r--backend/src/ir/instruction.hpp2
-rw-r--r--backend/src/ir/instruction.hxx1
-rw-r--r--backend/src/llvm/llvm_gen_backend.cpp75
-rw-r--r--backend/src/llvm/llvm_gen_ocl_function.hxx31
-rw-r--r--backend/src/ocl_stdlib.tmpl.h30
-rw-r--r--kernels/builtin_convert_sat.cl32
-rw-r--r--utests/CMakeLists.txt1
-rw-r--r--utests/builtin_convert_sat.cpp71
10 files changed, 259 insertions, 7 deletions
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index bd528859..cddd76ef 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -2538,15 +2538,20 @@ namespace gbe
const GenRegister dst = sel.selReg(insn.getDst(0), dstType);
const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
+ if(insn.getOpcode() == ir::OP_SAT_CVT) {
+ sel.push();
+ sel.curr.saturate = 1;
+ }
+
// We need two instructions to make the conversion
if (dstFamily != FAMILY_DWORD && dstFamily != FAMILY_QWORD && (srcFamily == FAMILY_DWORD || srcFamily == FAMILY_QWORD)) {
GenRegister unpacked;
if (dstFamily == FAMILY_WORD) {
- const uint32_t type = TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W;
+ const uint32_t type = dstType == TYPE_U16 ? GEN_TYPE_UW : GEN_TYPE_W;
unpacked = GenRegister::unpacked_uw(sel.reg(FAMILY_DWORD));
unpacked = GenRegister::retype(unpacked, type);
} else {
- const uint32_t type = TYPE_U8 ? GEN_TYPE_UB : GEN_TYPE_B;
+ const uint32_t type = dstType == TYPE_U8 ? GEN_TYPE_UB : GEN_TYPE_B;
unpacked = GenRegister::unpacked_ub(sel.reg(FAMILY_DWORD));
unpacked = GenRegister::retype(unpacked, type);
}
@@ -2581,6 +2586,10 @@ namespace gbe
}
} else
sel.MOV(dst, src);
+
+ if(insn.getOpcode() == ir::OP_SAT_CVT)
+ sel.pop();
+
return true;
}
DECL_CTOR(ConvertInstruction, 1, 1);
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 7c6c6c6c..9b3e6998 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -252,9 +252,10 @@ namespace ir {
ConvertInstruction(Type dstType,
Type srcType,
Register dst,
- Register src)
+ Register src,
+ bool saturated=false)
{
- this->opcode = OP_CVT;
+ this->opcode = saturated ? OP_SAT_CVT : OP_CVT;
this->dst[0] = dst;
this->src[0] = src;
this->dstType = dstType;
@@ -1469,6 +1470,11 @@ DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), getInfoType())
return internal::ConvertInstruction(dstType, srcType, dst, src).convert();
}
+ // saturated convert
+ Instruction SAT_CVT(Type dstType, Type srcType, Register dst, Register src) {
+ return internal::ConvertInstruction(dstType, srcType, dst, src, true).convert();
+ }
+
// For all unary functions with given opcode
Instruction ATOMIC(AtomicOps atomicOp, Register dst, AddressSpace space, Tuple src) {
return internal::AtomicInstruction(atomicOp, dst, space, src).convert();
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 27a34d1f..90c819b4 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -623,6 +623,8 @@ namespace ir {
Instruction GT(Type type, Register dst, Register src0, Register src1);
/*! cvt.{dstType <- srcType} dst src */
Instruction CVT(Type dstType, Type srcType, Register dst, Register src);
+ /*! sat_cvt.{dstType <- srcType} dst src */
+ Instruction SAT_CVT(Type dstType, Type srcType, Register dst, Register src);
/*! atomic dst addr.space {src1 {src2}} */
Instruction ATOMIC(AtomicOps opcode, Register dst, AddressSpace space, Tuple src);
/*! bra labelIndex */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index 1a9f8675..cd60349f 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -61,6 +61,7 @@ DECL_INSN(LT, CompareInstruction)
DECL_INSN(GE, CompareInstruction)
DECL_INSN(GT, CompareInstruction)
DECL_INSN(CVT, ConvertInstruction)
+DECL_INSN(SAT_CVT, ConvertInstruction)
DECL_INSN(ATOMIC, AtomicInstruction)
DECL_INSN(BRA, BranchInstruction)
DECL_INSN(RET, BranchInstruction)
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 8b73ac9e..5b6857d8 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -1910,6 +1910,30 @@ namespace gbe
case GEN_OCL_I64RHADD:
case GEN_OCL_I64_MAD_SAT:
case GEN_OCL_I64_MAD_SATU:
+ case GEN_OCL_SAT_CONV_U8_TO_I8:
+ case GEN_OCL_SAT_CONV_I16_TO_I8:
+ case GEN_OCL_SAT_CONV_U16_TO_I8:
+ case GEN_OCL_SAT_CONV_I32_TO_I8:
+ case GEN_OCL_SAT_CONV_U32_TO_I8:
+ case GEN_OCL_SAT_CONV_F32_TO_I8:
+ case GEN_OCL_SAT_CONV_I8_TO_U8:
+ case GEN_OCL_SAT_CONV_I16_TO_U8:
+ case GEN_OCL_SAT_CONV_U16_TO_U8:
+ case GEN_OCL_SAT_CONV_I32_TO_U8:
+ case GEN_OCL_SAT_CONV_U32_TO_U8:
+ case GEN_OCL_SAT_CONV_F32_TO_U8:
+ case GEN_OCL_SAT_CONV_U16_TO_I16:
+ case GEN_OCL_SAT_CONV_I32_TO_I16:
+ case GEN_OCL_SAT_CONV_U32_TO_I16:
+ case GEN_OCL_SAT_CONV_F32_TO_I16:
+ case GEN_OCL_SAT_CONV_I16_TO_U16:
+ case GEN_OCL_SAT_CONV_I32_TO_U16:
+ case GEN_OCL_SAT_CONV_U32_TO_U16:
+ case GEN_OCL_SAT_CONV_F32_TO_U16:
+ case GEN_OCL_SAT_CONV_U32_TO_I32:
+ case GEN_OCL_SAT_CONV_F32_TO_I32:
+ case GEN_OCL_SAT_CONV_I32_TO_U32:
+ case GEN_OCL_SAT_CONV_F32_TO_U32:
this->newRegister(&I);
break;
default:
@@ -2415,6 +2439,57 @@ namespace gbe
ctx.I64RHADD(ir::TYPE_U64, dst, src0, src1);
break;
}
+#define DEF(DST_TYPE, SRC_TYPE) \
+ { ctx.SAT_CVT(DST_TYPE, SRC_TYPE, getRegister(&I), getRegister(I.getOperand(0))); break; }
+ case GEN_OCL_SAT_CONV_U8_TO_I8:
+ DEF(ir::TYPE_S8, ir::TYPE_U8);
+ case GEN_OCL_SAT_CONV_I16_TO_I8:
+ DEF(ir::TYPE_S8, ir::TYPE_S16);
+ case GEN_OCL_SAT_CONV_U16_TO_I8:
+ DEF(ir::TYPE_S8, ir::TYPE_U16);
+ case GEN_OCL_SAT_CONV_I32_TO_I8:
+ DEF(ir::TYPE_S8, ir::TYPE_S32);
+ case GEN_OCL_SAT_CONV_U32_TO_I8:
+ DEF(ir::TYPE_S8, ir::TYPE_U32);
+ case GEN_OCL_SAT_CONV_F32_TO_I8:
+ DEF(ir::TYPE_S8, ir::TYPE_FLOAT);
+ case GEN_OCL_SAT_CONV_I8_TO_U8:
+ DEF(ir::TYPE_U8, ir::TYPE_S8);
+ case GEN_OCL_SAT_CONV_I16_TO_U8:
+ DEF(ir::TYPE_U8, ir::TYPE_S16);
+ case GEN_OCL_SAT_CONV_U16_TO_U8:
+ DEF(ir::TYPE_U8, ir::TYPE_U16);
+ case GEN_OCL_SAT_CONV_I32_TO_U8:
+ DEF(ir::TYPE_U8, ir::TYPE_S32);
+ case GEN_OCL_SAT_CONV_U32_TO_U8:
+ DEF(ir::TYPE_U8, ir::TYPE_U32);
+ case GEN_OCL_SAT_CONV_F32_TO_U8:
+ DEF(ir::TYPE_U8, ir::TYPE_FLOAT);
+ case GEN_OCL_SAT_CONV_U16_TO_I16:
+ DEF(ir::TYPE_S16, ir::TYPE_U16);
+ case GEN_OCL_SAT_CONV_I32_TO_I16:
+ DEF(ir::TYPE_S16, ir::TYPE_S32);
+ case GEN_OCL_SAT_CONV_U32_TO_I16:
+ DEF(ir::TYPE_S16, ir::TYPE_U32);
+ case GEN_OCL_SAT_CONV_F32_TO_I16:
+ DEF(ir::TYPE_S16, ir::TYPE_FLOAT);
+ case GEN_OCL_SAT_CONV_I16_TO_U16:
+ DEF(ir::TYPE_U16, ir::TYPE_S16);
+ case GEN_OCL_SAT_CONV_I32_TO_U16:
+ DEF(ir::TYPE_U16, ir::TYPE_S32);
+ case GEN_OCL_SAT_CONV_U32_TO_U16:
+ DEF(ir::TYPE_U16, ir::TYPE_U32);
+ case GEN_OCL_SAT_CONV_F32_TO_U16:
+ DEF(ir::TYPE_U16, ir::TYPE_FLOAT);
+ case GEN_OCL_SAT_CONV_U32_TO_I32:
+ DEF(ir::TYPE_S32, ir::TYPE_U32);
+ case GEN_OCL_SAT_CONV_F32_TO_I32:
+ DEF(ir::TYPE_S32, ir::TYPE_FLOAT);
+ case GEN_OCL_SAT_CONV_I32_TO_U32:
+ DEF(ir::TYPE_U32, ir::TYPE_S32);
+ case GEN_OCL_SAT_CONV_F32_TO_U32:
+ DEF(ir::TYPE_U32, ir::TYPE_FLOAT);
+#undef DEF
default: break;
}
}
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 321fc4e7..3f44be88 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -146,3 +146,34 @@ DECL_LLVM_GEN_FUNCTION(UPSAMPLE_LONG, _Z18__gen_ocl_upsamplell)
// get sampler info
DECL_LLVM_GEN_FUNCTION(GET_SAMPLER_INFO, __gen_ocl_get_sampler_info)
+
+// saturate convert
+DECL_LLVM_GEN_FUNCTION(SAT_CONV_U8_TO_I8, _Z16convert_char_sath)
+DECL_LLVM_GEN_FUNCTION(SAT_CONV_I16_TO_I8, _Z16convert_char_sats)
+DECL_LLVM_GEN_FUNCTION(SAT_CONV_U16_TO_I8, _Z16convert_char_satt)
+DECL_LLVM_GEN_FUNCTION(SAT_CONV_I32_TO_I8, _Z16convert_char_sati)
+DECL_LLVM_GEN_FUNCTION(SAT_CONV_U32_TO_I8, _Z16convert_char_satj)
+DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_I8, _Z16convert_char_satf)
+
+DECL_LLVM_GEN_FUNCTION(SAT_CONV_I8_TO_U8, _Z17convert_uchar_satc)
+DECL_LLVM_GEN_FUNCTION(SAT_CONV_I16_TO_U8, _Z17convert_uchar_sats)
+DECL_LLVM_GEN_FUNCTION(SAT_CONV_U16_TO_U8, _Z17convert_uchar_satt)
+DECL_LLVM_GEN_FUNCTION(SAT_CONV_I32_TO_U8, _Z17convert_uchar_sati)
+DECL_LLVM_GEN_FUNCTION(SAT_CONV_U32_TO_U8, _Z17convert_uchar_satj)
+DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_U8, _Z17convert_uchar_satf)
+
+DECL_LLVM_GEN_FUNCTION(SAT_CONV_U16_TO_I16, _Z17convert_short_satt)
+DECL_LLVM_GEN_FUNCTION(SAT_CONV_I32_TO_I16, _Z17convert_short_sati)
+DECL_LLVM_GEN_FUNCTION(SAT_CONV_U32_TO_I16, _Z17convert_short_satj)
+DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_I16, _Z17convert_short_satf)
+
+DECL_LLVM_GEN_FUNCTION(SAT_CONV_I16_TO_U16, _Z18convert_ushort_sats)
+DECL_LLVM_GEN_FUNCTION(SAT_CONV_I32_TO_U16, _Z18convert_ushort_sati)
+DECL_LLVM_GEN_FUNCTION(SAT_CONV_U32_TO_U16, _Z18convert_ushort_satj)
+DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_U16, _Z18convert_ushort_satf)
+
+DECL_LLVM_GEN_FUNCTION(SAT_CONV_U32_TO_I32, _Z15convert_int_satj)
+DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_I32, _Z15convert_int_satf)
+
+DECL_LLVM_GEN_FUNCTION(SAT_CONV_I32_TO_U32, _Z16convert_uint_sati)
+DECL_LLVM_GEN_FUNCTION(SAT_CONV_F32_TO_U32, _Z16convert_uint_satf)
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index a4a1562f..66639147 100644
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -219,9 +219,33 @@ UDEF(uint);
UDEF(ulong);
#undef UDEF
-uchar INLINE_OVERLOADABLE convert_uchar_sat(float x) {
- return add_sat((uchar)x, (uchar)0);
-}
+#define DEF(DSTTYPE, SRCTYPE) \
+ OVERLOADABLE DSTTYPE convert_ ## DSTTYPE ## _sat(SRCTYPE x);
+DEF(char, uchar);
+DEF(char, short);
+DEF(char, ushort);
+DEF(char, int);
+DEF(char, uint);
+DEF(char, float);
+DEF(uchar, char);
+DEF(uchar, short);
+DEF(uchar, ushort);
+DEF(uchar, int);
+DEF(uchar, uint);
+DEF(uchar, float);
+DEF(short, ushort);
+DEF(short, int);
+DEF(short, uint);
+DEF(short, float);
+DEF(ushort, short);
+DEF(ushort, int);
+DEF(ushort, uint);
+DEF(ushort, float);
+DEF(int, uint);
+DEF(int, float);
+DEF(uint, int);
+DEF(uint, float);
+#undef DEF
INLINE_OVERLOADABLE int isfinite(float x) { return __builtin_isfinite(x); }
INLINE_OVERLOADABLE int isinf(float x) { return __builtin_isinf(x); }
diff --git a/kernels/builtin_convert_sat.cl b/kernels/builtin_convert_sat.cl
new file mode 100644
index 00000000..281c8904
--- /dev/null
+++ b/kernels/builtin_convert_sat.cl
@@ -0,0 +1,32 @@
+#define DEF(DSTTYPE, SRCTYPE) \
+ kernel void builtin_convert_ ## SRCTYPE ## _to_ ## DSTTYPE ## _sat(global SRCTYPE *src, global DSTTYPE *dst) { \
+ int i = get_global_id(0); \
+ dst[i] = convert_ ## DSTTYPE ## _sat(src[i]); \
+}
+
+DEF(char, uchar);
+DEF(char, short);
+DEF(char, ushort);
+DEF(char, int);
+DEF(char, uint);
+DEF(char, float);
+DEF(uchar, char);
+DEF(uchar, short);
+DEF(uchar, ushort);
+DEF(uchar, int);
+DEF(uchar, uint);
+DEF(uchar, float);
+DEF(short, ushort);
+DEF(short, int);
+DEF(short, uint);
+DEF(short, float);
+DEF(ushort, short);
+DEF(ushort, int);
+DEF(ushort, uint);
+DEF(ushort, float);
+DEF(int, uint);
+DEF(int, float);
+DEF(uint, int);
+DEF(uint, float);
+#undef DEF
+
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index f18bd46f..31b85e38 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -126,6 +126,7 @@ set (utests_sources
builtin_num_groups.cpp
builtin_local_id.cpp
builtin_acos_asin.cpp
+ builtin_convert_sat.cpp
runtime_createcontext.cpp
runtime_null_kernel_arg.cpp
runtime_event.cpp
diff --git a/utests/builtin_convert_sat.cpp b/utests/builtin_convert_sat.cpp
new file mode 100644
index 00000000..e16ce16a
--- /dev/null
+++ b/utests/builtin_convert_sat.cpp
@@ -0,0 +1,71 @@
+#include <cstdint>
+#include "utest_helper.hpp"
+
+typedef unsigned char uchar;
+typedef unsigned short ushort;
+
+int64_t my_rand(void) {
+ int64_t x = rand() - RAND_MAX/2;
+ int64_t y = rand() - RAND_MAX/2;
+ return x * y;
+}
+
+#define DEF(DST_TYPE, SRC_TYPE, DST_MIN, DST_MAX) \
+void builtin_convert_ ## SRC_TYPE ## _to_ ## DST_TYPE ## _sat(void) \
+{ \
+ const int n = 128; \
+ OCL_CREATE_KERNEL_FROM_FILE("builtin_convert_sat", "builtin_convert_" # SRC_TYPE "_to_" # DST_TYPE "_sat"); \
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(SRC_TYPE), NULL); \
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(DST_TYPE), NULL); \
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); \
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); \
+ globals[0] = n; \
+ locals[0] = 16; \
+ OCL_MAP_BUFFER(0); \
+ for (int i = 0; i < n; i++) \
+ ((SRC_TYPE *)buf_data[0])[i] = my_rand(); \
+ OCL_UNMAP_BUFFER(0); \
+ OCL_NDRANGE(1); \
+ OCL_MAP_BUFFER(0); \
+ OCL_MAP_BUFFER(1); \
+ for (int i = 0; i < n; i++) { \
+ SRC_TYPE src = ((SRC_TYPE *)buf_data[0])[i]; \
+ DST_TYPE dst; \
+ if ((double)src > (double)DST_MAX) \
+ dst = DST_MAX; \
+ else if ((double)src < (double)DST_MIN) \
+ dst = DST_MIN; \
+ else \
+ dst = src; \
+ OCL_ASSERT(((DST_TYPE *)buf_data[1])[i] == dst); \
+ } \
+ OCL_UNMAP_BUFFER(0); \
+ OCL_UNMAP_BUFFER(1); \
+} \
+MAKE_UTEST_FROM_FUNCTION(builtin_convert_ ## SRC_TYPE ## _to_ ## DST_TYPE ## _sat);
+
+DEF(char, uchar, -128, 127);
+DEF(char, short, -128, 127);
+DEF(char, ushort, -128, 127);
+DEF(char, int, -128, 127);
+DEF(char, uint, -128, 127);
+DEF(char, float, -128, 127);
+DEF(uchar, char, 0, 255);
+DEF(uchar, short, 0, 255);
+DEF(uchar, ushort, 0, 255);
+DEF(uchar, int, 0, 255);
+DEF(uchar, uint, 0, 255);
+DEF(uchar, float, 0, 255);
+DEF(short, ushort, -32768, 32767);
+DEF(short, int, -32768, 32767);
+DEF(short, uint, -32768, 32767);
+DEF(short, float, -32768, 32767);
+DEF(ushort, short, 0, 65535);
+DEF(ushort, int, 0, 65535);
+DEF(ushort, uint, 0, 65535);
+DEF(ushort, float, 0, 65535);
+DEF(int, uint, -0x7FFFFFFF-1, 0x7FFFFFFF);
+DEF(int, float, -0x7FFFFFFF-1, 0x7FFFFFFF);
+DEF(uint, int, 0, 0xffffffffu);
+DEF(uint, float, 0, 0xffffffffu);
+#undef DEF