diff options
author | Homer Hsing <homer.xing@intel.com> | 2013-08-12 10:12:16 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@linux.intel.com> | 2013-08-12 16:36:14 +0800 |
commit | 1475592c5ea56fc0bedfc7bb198aca988ac9a326 (patch) | |
tree | 91cb3a6f1d7e9a057bea326ca210d1d83ac7297a | |
parent | 4248f34ae49569a28aaf7804b4ad3d56c7e3d7c1 (diff) | |
download | beignet-1475592c5ea56fc0bedfc7bb198aca988ac9a326.tar.gz |
support converting shorter int to 64bit int
converting byte/word/dword to int64
also add test case
v2: define temporary reg as dest reg of instruction
Signed-off-by: Homer Hsing <homer.xing@intel.com>
Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
-rw-r--r-- | backend/src/backend/gen_context.cpp | 25 | ||||
-rw-r--r-- | backend/src/backend/gen_insn_selection.cpp | 9 | ||||
-rw-r--r-- | backend/src/backend/gen_insn_selection.hxx | 1 | ||||
-rw-r--r-- | backend/src/backend/gen_register.hpp | 6 | ||||
-rw-r--r-- | kernels/compiler_long_convert.cl | 7 | ||||
-rw-r--r-- | utests/CMakeLists.txt | 1 | ||||
-rw-r--r-- | utests/compiler_long_convert.cpp | 67 |
7 files changed, 116 insertions, 0 deletions
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 76e6b683..4d6da8c3 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -162,6 +162,31 @@ namespace gbe case SEL_OP_MOV_DF: p->MOV_DF(dst, src, tmp); break; + case SEL_OP_CONVI_TO_I64: { + GenRegister middle; + if (src.type == GEN_TYPE_B || src.type == GEN_TYPE_D) { + middle = tmp; + middle.type = src.is_signed_int() ? GEN_TYPE_D : GEN_TYPE_UD; + p->MOV(middle, src); + } else { + middle = src; + } + int execWidth = p->curr.execWidth; + p->push(); + p->curr.execWidth = 8; + for (int nib = 0; nib < execWidth / 4; nib ++) { + p->curr.chooseNib(nib); + p->MOV(dst.bottom_half(), middle); + if(middle.is_signed_int()) + p->ASR(dst.top_half(), middle, GenRegister::immud(31)); + else + p->MOV(dst.top_half(), GenRegister::immd(0)); + dst = GenRegister::suboffset(dst, 4); + middle = GenRegister::suboffset(middle, 4); + } + p->pop(); + break; + } default: NOT_IMPLEMENTED; } diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 06d19028..46da37f0 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -455,6 +455,7 @@ namespace gbe ALU2WithTemp(RHADD) ALU2(UPSAMPLE_SHORT) ALU2(UPSAMPLE_INT) + ALU1WithTemp(CONVI_TO_I64) #undef ALU1 #undef ALU1WithTemp #undef ALU2 @@ -2257,6 +2258,14 @@ namespace gbe } else if (dst.isdf()) { ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD); sel.MOV_DF(dst, src, sel.selReg(r)); + } else if (dst.isint64()) { + switch(src.type) { + case GEN_TYPE_F: + case GEN_TYPE_DF: + NOT_IMPLEMENTED; + default: + sel.CONVI_TO_I64(dst, src, sel.selReg(sel.reg(FAMILY_DWORD))); + } } else sel.MOV(dst, src); return true; diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx index 56600781..9e24dd9b 100644 --- a/backend/src/backend/gen_insn_selection.hxx +++ b/backend/src/backend/gen_insn_selection.hxx @@ -58,3 +58,4 @@ DECL_SELECTION_IR(HADD, BinaryWithTempInstruction) DECL_SELECTION_IR(RHADD, BinaryWithTempInstruction) DECL_SELECTION_IR(UPSAMPLE_SHORT, BinaryInstruction) DECL_SELECTION_IR(UPSAMPLE_INT, BinaryInstruction) +DECL_SELECTION_IR(CONVI_TO_I64, UnaryWithTempInstruction) diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp index 2cad4c0a..c9533192 100644 --- a/backend/src/backend/gen_register.hpp +++ b/backend/src/backend/gen_register.hpp @@ -273,6 +273,12 @@ namespace gbe return r; } + INLINE bool is_signed_int(void) const { + if ((type == GEN_TYPE_B || type == GEN_TYPE_W || type == GEN_TYPE_D || type == GEN_TYPE_L) && file == GEN_GENERAL_REGISTER_FILE) + return true; + return false; + } + INLINE bool isdf(void) const { if (type == GEN_TYPE_DF && file == GEN_GENERAL_REGISTER_FILE) return true; diff --git a/kernels/compiler_long_convert.cl b/kernels/compiler_long_convert.cl new file mode 100644 index 00000000..f22914f0 --- /dev/null +++ b/kernels/compiler_long_convert.cl @@ -0,0 +1,7 @@ +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +kernel void compiler_long_convert(global char *src1, global short *src2, global int *src3, global long *dst1, global long *dst2, global long *dst3) { + int i = get_global_id(0); + dst1[i] = src1[i]; + dst2[i] = src2[i]; + dst3[i] = src3[i]; +} diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index e7d3e72c..3922220b 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -125,6 +125,7 @@ set (utests_sources compiler_double_4.cpp compiler_long.cpp compiler_long_2.cpp + compiler_long_convert.cpp utest_assert.cpp utest.cpp utest_file_map.cpp diff --git a/utests/compiler_long_convert.cpp b/utests/compiler_long_convert.cpp new file mode 100644 index 00000000..18e13ee3 --- /dev/null +++ b/utests/compiler_long_convert.cpp @@ -0,0 +1,67 @@ +#include <cstdint> +#include <cstring> +#include <iostream> +#include "utest_helper.hpp" + +void compiler_long_convert(void) +{ + const size_t n = 16; + char src1[n]; + short src2[n]; + int src3[n]; + + // Setup kernel and buffers + OCL_CREATE_KERNEL("compiler_long_convert"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(char), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(short), NULL); + OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(int), NULL); + OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(int64_t), NULL); + OCL_CREATE_BUFFER(buf[4], 0, n * sizeof(int64_t), NULL); + OCL_CREATE_BUFFER(buf[5], 0, n * sizeof(int64_t), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); + OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]); + OCL_SET_ARG(4, sizeof(cl_mem), &buf[4]); + OCL_SET_ARG(5, sizeof(cl_mem), &buf[5]); + globals[0] = n; + locals[0] = 16; + + // Run random tests + for (int32_t i = 0; i < (int32_t) n; ++i) { + src1[i] = -i; + src2[i] = -i; + src3[i] = -i; + } + OCL_MAP_BUFFER(0); + OCL_MAP_BUFFER(1); + OCL_MAP_BUFFER(2); + memcpy(buf_data[0], src1, sizeof(src1)); + memcpy(buf_data[1], src2, sizeof(src2)); + memcpy(buf_data[2], src3, sizeof(src3)); + OCL_UNMAP_BUFFER(0); + OCL_UNMAP_BUFFER(1); + OCL_UNMAP_BUFFER(2); + + // Run the kernel on GPU + OCL_NDRANGE(1); + + // Compare + OCL_MAP_BUFFER(3); + OCL_MAP_BUFFER(4); + OCL_MAP_BUFFER(5); + int64_t *dst1 = ((int64_t *)buf_data[3]); + int64_t *dst2 = ((int64_t *)buf_data[4]); + int64_t *dst3 = ((int64_t *)buf_data[5]); + for (int32_t i = 0; i < (int32_t) n; ++i) { + //printf("%lx %lx %lx\n", dst1[i], dst2[i], dst3[i]); + OCL_ASSERT(dst1[i] == -(int64_t)i); + OCL_ASSERT(dst2[i] == -(int64_t)i); + OCL_ASSERT(dst3[i] == -(int64_t)i); + } + OCL_UNMAP_BUFFER(3); + OCL_UNMAP_BUFFER(4); + OCL_UNMAP_BUFFER(5); +} + +MAKE_UTEST_FROM_FUNCTION(compiler_long_convert); |