summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuo <xionghu.luo@intel.com>2014-06-24 10:09:12 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-06-24 23:31:57 +0800
commitcca4b253d88cbdbe9737c9c6071bde81f7b85e9e (patch)
tree3bc2750068e8ecb5be0ffcd66d36c14134d98d7f
parente6b6c50e84193c54207d7fefde41eba9b27fb135 (diff)
downloadbeignet-cca4b253d88cbdbe9737c9c6071bde81f7b85e9e.tar.gz
add cpu copy for 1Darray and 2darray related copy APIs.
detail cases: 1Darray, 2Darray, 2Darrayto2D, 2Darrayto3D, 2Dto2Darray, 3Dto2Darray. 1d used gpu copy. v2: fixed 1d array to 1d array copy, don't need to switch depth and height. Signed-off-by: Luo <xionghu.luo@intel.com> Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com> Signed-off-by: Zhigang Gong <zhigang.gong@intel.com>
-rw-r--r--src/CMakeLists.txt4
-rw-r--r--src/cl_context.h1
-rw-r--r--src/cl_mem.c69
-rw-r--r--src/cl_mem.h4
-rw-r--r--src/kernels/cl_internal_copy_image_1d_to_1d.cl19
5 files changed, 91 insertions, 6 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 7ae84fe0..46426d96 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -41,8 +41,8 @@ set (KERNEL_STR_FILES)
set (KERNEL_NAMES cl_internal_copy_buf_align4
cl_internal_copy_buf_align16 cl_internal_copy_buf_unalign_same_offset
cl_internal_copy_buf_unalign_dst_offset cl_internal_copy_buf_unalign_src_offset
-cl_internal_copy_buf_rect cl_internal_copy_image_2d_to_2d cl_internal_copy_image_3d_to_2d
-cl_internal_copy_image_2d_to_3d cl_internal_copy_image_3d_to_3d
+cl_internal_copy_buf_rect cl_internal_copy_image_1d_to_1d cl_internal_copy_image_2d_to_2d
+cl_internal_copy_image_3d_to_2d cl_internal_copy_image_2d_to_3d cl_internal_copy_image_3d_to_3d
cl_internal_copy_image_2d_to_buffer cl_internal_copy_image_3d_to_buffer
cl_internal_copy_buffer_to_image_2d cl_internal_copy_buffer_to_image_3d
cl_internal_fill_buf_align8 cl_internal_fill_buf_align4
diff --git a/src/cl_context.h b/src/cl_context.h
index 0e4db734..75afbf60 100644
--- a/src/cl_context.h
+++ b/src/cl_context.h
@@ -47,6 +47,7 @@ enum _cl_internal_ker_type {
CL_ENQUEUE_COPY_BUFFER_UNALIGN_DST_OFFSET,
CL_ENQUEUE_COPY_BUFFER_UNALIGN_SRC_OFFSET,
CL_ENQUEUE_COPY_BUFFER_RECT,
+ CL_ENQUEUE_COPY_IMAGE_1D_TO_1D, //copy image 1d to image 1d
CL_ENQUEUE_COPY_IMAGE_2D_TO_2D, //copy image 2d to image 2d
CL_ENQUEUE_COPY_IMAGE_3D_TO_2D, //copy image 3d to image 2d
CL_ENQUEUE_COPY_IMAGE_2D_TO_3D, //copy image 2d to image 3d
diff --git a/src/cl_mem.c b/src/cl_mem.c
index f860b385..05ca9f1b 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -546,6 +546,34 @@ cl_mem_copy_image_region(const size_t *origin, const size_t *region,
}
}
+void
+cl_mem_copy_image_to_image(const size_t *dst_origin,const size_t *src_origin, const size_t *region,
+ const struct _cl_mem_image *dst_image, const struct _cl_mem_image *src_image)
+{
+ char* dst= cl_mem_map_auto((cl_mem)dst_image);
+ char* src= cl_mem_map_auto((cl_mem)src_image);
+ size_t dst_offset = dst_image->bpp * dst_origin[0] + dst_image->row_pitch * dst_origin[1] + dst_image->slice_pitch * dst_origin[2];
+ size_t src_offset = src_image->bpp * src_origin[0] + src_image->row_pitch * src_origin[1] + src_image->slice_pitch * src_origin[2];
+ dst= (char*)dst+ dst_offset;
+ src= (char*)src+ src_offset;
+ cl_uint y, z;
+ for (z = 0; z < region[2]; z++) {
+ const char* src_ptr = src;
+ char* dst_ptr = dst;
+ for (y = 0; y < region[1]; y++) {
+ memcpy(dst_ptr, src_ptr, src_image->bpp*region[0]);
+ src_ptr += src_image->row_pitch;
+ dst_ptr += dst_image->row_pitch;
+ }
+ src = (char*)src + src_image->slice_pitch;
+ dst = (char*)dst + dst_image->slice_pitch;
+ }
+
+ cl_mem_unmap_auto((cl_mem)src_image);
+ cl_mem_unmap_auto((cl_mem)dst_image);
+
+}
+
static void
cl_mem_copy_image(struct _cl_mem_image *image,
size_t row_pitch,
@@ -1447,33 +1475,66 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image
assert(src_image->base.ctx == dst_image->base.ctx);
/* setup the kernel and run. */
- if(src_image->image_type == CL_MEM_OBJECT_IMAGE2D) {
+ if(src_image->image_type == CL_MEM_OBJECT_IMAGE1D) {
+ if(dst_image->image_type == CL_MEM_OBJECT_IMAGE1D) {
+ extern char cl_internal_copy_image_1d_to_1d_str[];
+ extern int cl_internal_copy_image_1d_to_1d_str_size;
+
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_1D_TO_1D,
+ cl_internal_copy_image_1d_to_1d_str, (size_t)cl_internal_copy_image_1d_to_1d_str_size, NULL);
+ }
+ } else if(src_image->image_type == CL_MEM_OBJECT_IMAGE2D) {
if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D) {
extern char cl_internal_copy_image_2d_to_2d_str[];
extern int cl_internal_copy_image_2d_to_2d_str_size;
ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_2D,
cl_internal_copy_image_2d_to_2d_str, (size_t)cl_internal_copy_image_2d_to_2d_str_size, NULL);
- }else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) {
+ } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) {
extern char cl_internal_copy_image_2d_to_3d_str[];
extern int cl_internal_copy_image_2d_to_3d_str_size;
ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_3D,
cl_internal_copy_image_2d_to_3d_str, (size_t)cl_internal_copy_image_2d_to_3d_str_size, NULL);
+ } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
+
+ cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image);
+ return CL_SUCCESS;
}
- }else if(src_image->image_type == CL_MEM_OBJECT_IMAGE3D) {
+ } else if(src_image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
+ if(dst_image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
+
+ cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image);
+ return CL_SUCCESS;
+ }
+ } else if(src_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
+ if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
+
+ cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image);
+ return CL_SUCCESS;
+ } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D) {
+ cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image);
+ return CL_SUCCESS;
+ } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) {
+ cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image);
+ return CL_SUCCESS;
+ }
+ } else if(src_image->image_type == CL_MEM_OBJECT_IMAGE3D) {
if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D) {
extern char cl_internal_copy_image_3d_to_2d_str[];
extern int cl_internal_copy_image_3d_to_2d_str_size;
ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_2D,
cl_internal_copy_image_3d_to_2d_str, (size_t)cl_internal_copy_image_3d_to_2d_str_size, NULL);
- }else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) {
+ } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) {
extern char cl_internal_copy_image_3d_to_3d_str[];
extern int cl_internal_copy_image_3d_to_3d_str_size;
ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_3D,
cl_internal_copy_image_3d_to_3d_str, (size_t)cl_internal_copy_image_3d_to_3d_str_size, NULL);
+ } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
+ cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image);
+ return CL_SUCCESS;
}
}
diff --git a/src/cl_mem.h b/src/cl_mem.h
index 8ed8e2d5..a2fb8512 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -264,6 +264,10 @@ cl_mem_copy_image_region(const size_t *origin, const size_t *region,
const void *src, size_t src_row_pitch, size_t src_slice_pitch,
const struct _cl_mem_image *image);
+void
+cl_mem_copy_image_to_image(const size_t *dst_origin,const size_t *src_origin, const size_t *region,
+ const struct _cl_mem_image *dst_image, const struct _cl_mem_image *src_image);
+
extern cl_mem cl_mem_new_libva_buffer(cl_context ctx,
unsigned int bo_name,
cl_int *errcode);
diff --git a/src/kernels/cl_internal_copy_image_1d_to_1d.cl b/src/kernels/cl_internal_copy_image_1d_to_1d.cl
new file mode 100644
index 00000000..dca82b25
--- /dev/null
+++ b/src/kernels/cl_internal_copy_image_1d_to_1d.cl
@@ -0,0 +1,19 @@
+kernel void __cl_copy_image_1d_to_1d(__read_only image1d_t src_image, __write_only image1d_t dst_image,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2,
+ unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2)
+{
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int k = get_global_id(2);
+ int4 color;
+ const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
+ int src_coord;
+ int dst_coord;
+ if((i >= region0) || (j>= region1) || (k>=region2))
+ return;
+ src_coord = src_origin0 + i;
+ dst_coord = dst_origin0 + i;
+ color = read_imagei(src_image, sampler, src_coord);
+ write_imagei(dst_image, dst_coord, color);
+}