summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJunyan He <junyan.he@linux.intel.com>2014-03-26 18:27:48 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-04-02 13:06:55 +0800
commitda6fdcd1e21d710c5253417634a9b23543ea0271 (patch)
treead2f24b1dbbed56fb8bed4bc4b7df63f291d05e6 /src
parentebd86bf4ca91b6ac14e85a44ade4fde00178f743 (diff)
downloadbeignet-da6fdcd1e21d710c5253417634a9b23543ea0271.tar.gz
Add three copy cl files for Enqueue Copy usage.
Add these three cl files, one for src and dst are not aligned but have same offset to 4. second for src's %4 offset is bigger than the dst's third for src's %4 offset is small than the dst's Signed-off-by: Junyan He <junyan.he@linux.intel.com> Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
Diffstat (limited to 'src')
-rw-r--r--src/CMakeLists.txt4
-rw-r--r--src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl28
-rw-r--r--src/kernels/cl_internal_copy_buf_unalign_same_offset.cl19
-rw-r--r--src/kernels/cl_internal_copy_buf_unalign_src_offset.cl29
4 files changed, 79 insertions, 1 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 4c342353..d690d9ad 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -18,7 +18,9 @@ endforeach (KF)
endmacro (MakeKernelBinStr)
set (KERNEL_STR_FILES)
-set (KERNEL_NAMES cl_internal_copy_buf_align1 cl_internal_copy_buf_align4 cl_internal_copy_buf_align16)
+set (KERNEL_NAMES cl_internal_copy_buf_align1 cl_internal_copy_buf_align4
+cl_internal_copy_buf_align16 cl_internal_copy_buf_unalign_same_offset
+cl_internal_copy_buf_unalign_dst_offset cl_internal_copy_buf_unalign_src_offset)
MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}")
set(OPENCL_SRC
diff --git a/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl b/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl
new file mode 100644
index 00000000..13f41626
--- /dev/null
+++ b/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl
@@ -0,0 +1,28 @@
+kernel void __cl_cpy_region_unalign_dst_offset ( global int* src, unsigned int src_offset,
+ global int* dst, unsigned int dst_offset,
+ unsigned int size,
+ unsigned int first_mask, unsigned int last_mask,
+ unsigned int shift, unsigned int dw_mask)
+{
+ int i = get_global_id(0);
+ unsigned int tmp = 0;
+
+ if (i > size -1)
+ return;
+
+ /* last dw, need to be careful, not to overflow the source. */
+ if ((i == size - 1) && ((last_mask & (~(~dw_mask >> shift))) == 0)) {
+ tmp = ((src[src_offset + i] & ~dw_mask) >> shift);
+ } else {
+ tmp = ((src[src_offset + i] & ~dw_mask) >> shift)
+ | ((src[src_offset + i + 1] & dw_mask) << (32 - shift));
+ }
+
+ if (i == 0) {
+ dst[dst_offset] = (dst[dst_offset] & first_mask) | (tmp & (~first_mask));
+ } else if (i == size - 1) {
+ dst[i+dst_offset] = (tmp & last_mask) | (dst[i+dst_offset] & (~last_mask));
+ } else {
+ dst[i+dst_offset] = tmp;
+ }
+}
diff --git a/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl b/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl
new file mode 100644
index 00000000..85102461
--- /dev/null
+++ b/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl
@@ -0,0 +1,19 @@
+kernel void __cl_cpy_region_unalign_same_offset ( global int* src, unsigned int src_offset,
+ global int* dst, unsigned int dst_offset,
+ unsigned int size,
+ unsigned int first_mask, unsigned int last_mask)
+{
+ int i = get_global_id(0);
+ if (i > size -1)
+ return;
+
+ if (i == 0) {
+ dst[dst_offset] = (dst[dst_offset] & first_mask)
+ | (src[src_offset] & (~first_mask));
+ } else if (i == size - 1) {
+ dst[i+dst_offset] = (src[i+src_offset] & last_mask)
+ | (dst[i+dst_offset] & (~last_mask));
+ } else {
+ dst[i+dst_offset] = src[i+src_offset];
+ }
+}
diff --git a/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl b/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl
new file mode 100644
index 00000000..f98368ac
--- /dev/null
+++ b/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl
@@ -0,0 +1,29 @@
+kernel void __cl_cpy_region_unalign_src_offset ( global int* src, unsigned int src_offset,
+ global int* dst, unsigned int dst_offset,
+ unsigned int size,
+ unsigned int first_mask, unsigned int last_mask,
+ unsigned int shift, unsigned int dw_mask, int src_less)
+{
+ int i = get_global_id(0);
+ unsigned int tmp = 0;
+
+ if (i > size -1)
+ return;
+
+ if (i == 0) {
+ tmp = ((src[src_offset + i] & dw_mask) << shift);
+ } else if (src_less && i == size - 1) { // not exceed the bound of source
+ tmp = ((src[src_offset + i - 1] & ~dw_mask) >> (32 - shift));
+ } else {
+ tmp = ((src[src_offset + i - 1] & ~dw_mask) >> (32 - shift))
+ | ((src[src_offset + i] & dw_mask) << shift);
+ }
+
+ if (i == 0) {
+ dst[dst_offset] = (dst[dst_offset] & first_mask) | (tmp & (~first_mask));
+ } else if (i == size - 1) {
+ dst[i+dst_offset] = (tmp & last_mask) | (dst[i+dst_offset] & (~last_mask));
+ } else {
+ dst[i+dst_offset] = tmp;
+ }
+}