summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuo <xionghu.luo@intel.com>2014-06-23 06:03:30 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-06-24 22:35:30 +0800
commit3b308f245587cef7eb4787baeacef0c8119b02c2 (patch)
tree976b48c6ec6d6acc2c6cb24fd967c9fd7bba6d85
parent7c69e7be62715e06ff34e9d76d841d61e03d4dd5 (diff)
downloadbeignet-3b308f245587cef7eb4787baeacef0c8119b02c2.tar.gz
implement API clEnqueueFillImage.
enqueues a command to fill an image object with a specified color. fix typo cl_context_get_static_kernel_from_bin. v2: fix image 1d array bug. Signed-off-by: Luo <xionghu.luo@intel.com> Signed-off-by: Zhigang Gong <zhigang.gong@linux.intel.com>
-rw-r--r--src/CMakeLists.txt4
-rw-r--r--src/cl_api.c78
-rw-r--r--src/cl_context.c2
-rw-r--r--src/cl_context.h7
-rw-r--r--src/cl_enqueue.c1
-rw-r--r--src/cl_enqueue.h1
-rw-r--r--src/cl_gt_device.h7
-rw-r--r--src/cl_khr_icd.c2
-rw-r--r--src/cl_mem.c106
-rw-r--r--src/cl_mem.h3
-rw-r--r--src/kernels/cl_internal_fill_image_1d.cl14
-rw-r--r--src/kernels/cl_internal_fill_image_1d_array.cl15
-rw-r--r--src/kernels/cl_internal_fill_image_2d.cl15
-rw-r--r--src/kernels/cl_internal_fill_image_2d_array.cl16
-rw-r--r--src/kernels/cl_internal_fill_image_3d.cl16
15 files changed, 261 insertions, 26 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 8651af6c..5c89e55b 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -47,7 +47,9 @@ cl_internal_copy_image_2d_to_buffer cl_internal_copy_image_3d_to_buffer
cl_internal_copy_buffer_to_image_2d cl_internal_copy_buffer_to_image_3d
cl_internal_fill_buf_align8 cl_internal_fill_buf_align4
cl_internal_fill_buf_align2 cl_internal_fill_buf_unalign
-cl_internal_fill_buf_align128)
+cl_internal_fill_buf_align128 cl_internal_fill_image_1d
+cl_internal_fill_image_1d_array cl_internal_fill_image_2d
+cl_internal_fill_image_2d_array cl_internal_fill_image_3d)
set (BUILT_IN_NAME cl_internal_built_in_kernel)
MakeBuiltInKernelStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}")
MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}")
diff --git a/src/cl_api.c b/src/cl_api.c
index 32f91d72..90422432 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -1812,6 +1812,79 @@ error:
}
cl_int
+clEnqueueFillImage(cl_command_queue command_queue,
+ cl_mem image,
+ const void * fill_color,
+ const size_t * porigin,
+ const size_t * pregion,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event)
+{
+ cl_int err = CL_SUCCESS;
+ enqueue_data *data, no_wait_data = { 0 };
+
+ CHECK_QUEUE(command_queue);
+ CHECK_IMAGE(image, src_image);
+ FIXUP_IMAGE_REGION(src_image, pregion, region);
+ FIXUP_IMAGE_ORIGIN(src_image, porigin, origin);
+
+ if (command_queue->ctx != image->ctx) {
+ err = CL_INVALID_CONTEXT;
+ goto error;
+ }
+
+ if (fill_color == NULL) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+
+ if (!origin || !region || origin[0] + region[0] > src_image->w || origin[1] + region[1] > src_image->h || origin[2] + region[2] > src_image->depth) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+
+ if (src_image->image_type == CL_MEM_OBJECT_IMAGE2D && (origin[2] != 0 || region[2] != 1)){
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+
+ if (src_image->image_type == CL_MEM_OBJECT_IMAGE1D && (origin[2] != 0 ||origin[1] != 0 || region[2] != 1 || region[1] != 1)){
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+
+ err = cl_image_fill(command_queue, fill_color, src_image, origin, region);
+ if (err) {
+ goto error;
+ }
+
+ TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, image->ctx);
+
+ data = &no_wait_data;
+ data->type = EnqueueFillImage;
+ data->queue = command_queue;
+
+ if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
+ event, data, CL_COMMAND_FILL_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) {
+ if (event && (*event)->type != CL_COMMAND_USER
+ && (*event)->queue->props & CL_QUEUE_PROFILING_ENABLE) {
+ cl_event_get_timestamp(*event, CL_PROFILING_COMMAND_SUBMIT);
+ }
+
+ err = cl_command_queue_flush(command_queue);
+ }
+
+ if(b_output_kernel_perf)
+ time_end(command_queue->ctx, "beignet internal kernel : cl_fill_image", "", command_queue);
+
+ return 0;
+
+ error:
+ return err;
+}
+
+cl_int
clEnqueueFillBuffer(cl_command_queue command_queue,
cl_mem buffer,
const void * pattern,
@@ -2637,9 +2710,12 @@ clEnqueueMapImage(cl_command_queue command_queue,
goto error;
}
- *image_row_pitch = image->row_pitch;
if (image_slice_pitch)
*image_slice_pitch = image->slice_pitch;
+ if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
+ *image_row_pitch = image->slice_pitch;
+ else
+ *image_row_pitch = image->row_pitch;
if ((map_flags & CL_MAP_READ &&
mem->flags & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) ||
diff --git a/src/cl_context.c b/src/cl_context.c
index 8f42a585..152faf32 100644
--- a/src/cl_context.c
+++ b/src/cl_context.c
@@ -319,7 +319,7 @@ cl_context_get_static_kernel(cl_context ctx, cl_int index, const char * str_kern
}
cl_kernel
-cl_context_get_static_kernel_form_bin(cl_context ctx, cl_int index,
+cl_context_get_static_kernel_from_bin(cl_context ctx, cl_int index,
const char * str_kernel, size_t size, const char * str_option)
{
cl_int ret;
diff --git a/src/cl_context.h b/src/cl_context.h
index cba0a0aa..0e4db734 100644
--- a/src/cl_context.h
+++ b/src/cl_context.h
@@ -63,6 +63,11 @@ enum _cl_internal_ker_type {
CL_ENQUEUE_FILL_BUFFER_ALIGN8_32, //fill buffer with 16 aligne pattern, pattern size=32
CL_ENQUEUE_FILL_BUFFER_ALIGN8_64, //fill buffer with 16 aligne pattern, pattern size=64
CL_ENQUEUE_FILL_BUFFER_ALIGN128, //fill buffer with 128 aligne pattern, pattern size=128
+ CL_ENQUEUE_FILL_IMAGE_1D, //fill image 1d
+ CL_ENQUEUE_FILL_IMAGE_1D_ARRAY, //fill image 1d array
+ CL_ENQUEUE_FILL_IMAGE_2D, //fill image 2d
+ CL_ENQUEUE_FILL_IMAGE_2D_ARRAY, //fill image 2d array
+ CL_ENQUEUE_FILL_IMAGE_3D, //fill image 3d
CL_INTERNAL_KERNEL_MAX
};
@@ -153,7 +158,7 @@ extern cl_buffer_mgr cl_context_get_bufmgr(cl_context ctx);
extern cl_kernel cl_context_get_static_kernel(cl_context ctx, cl_int index, const char *str_kernel, const char * str_option);
/* Get the internal used kernel from binary*/
-extern cl_kernel cl_context_get_static_kernel_form_bin(cl_context ctx, cl_int index,
+extern cl_kernel cl_context_get_static_kernel_from_bin(cl_context ctx, cl_int index,
const char * str_kernel, size_t size, const char * str_option);
#endif /* __CL_CONTEXT_H__ */
diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c
index bc0ca2c6..52c824d3 100644
--- a/src/cl_enqueue.c
+++ b/src/cl_enqueue.c
@@ -432,6 +432,7 @@ cl_int cl_enqueue_handle(cl_event event, enqueue_data* data)
case EnqueueCopyImageToBuffer:
case EnqueueNDRangeKernel:
case EnqueueFillBuffer:
+ case EnqueueFillImage:
cl_gpgpu_event_resume((cl_gpgpu_event)data->ptr);
return CL_SUCCESS;
case EnqueueNativeKernel:
diff --git a/src/cl_enqueue.h b/src/cl_enqueue.h
index 65276027..a9b36017 100644
--- a/src/cl_enqueue.h
+++ b/src/cl_enqueue.h
@@ -43,6 +43,7 @@ typedef enum {
EnqueueMarker,
EnqueueBarrier,
EnqueueFillBuffer,
+ EnqueueFillImage,
EnqueueMigrateMemObj,
EnqueueInvalid
} enqueue_type;
diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
index ba7d66cd..d7855cdc 100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -102,7 +102,12 @@ DECL_INFO_STRING(built_in_kernels, "__cl_copy_region_align4;"
"__cl_fill_region_align8_4;"
"__cl_fill_region_align8_8;"
"__cl_fill_region_align8_16;"
- "__cl_fill_region_align128;")
+ "__cl_fill_region_align128;"
+ "__cl_fill_image_1d;"
+ "__cl_fill_image_1d_array;"
+ "__cl_fill_image_2d;"
+ "__cl_fill_image_2d_array;"
+ "__cl_fill_image_3d;")
DECL_INFO_STRING(driver_version, LIBCL_DRIVER_VERSION_STRING)
#undef DECL_INFO_STRING
diff --git a/src/cl_khr_icd.c b/src/cl_khr_icd.c
index b23c29d2..6d49db03 100644
--- a/src/cl_khr_icd.c
+++ b/src/cl_khr_icd.c
@@ -150,7 +150,7 @@ struct _cl_icd_dispatch const cl_khr_icd_dispatch = {
clUnloadPlatformCompiler,
clGetKernelArgInfo,
clEnqueueFillBuffer,
- CL_1_2_NOTYET(clEnqueueFillImage),
+ clEnqueueFillImage,
clEnqueueMigrateMemObjects,
clEnqueueMarkerWithWaitList,
clEnqueueBarrierWithWaitList,
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 46d9af1a..f860b385 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -1051,7 +1051,7 @@ cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
extern char cl_internal_copy_buf_align16_str[];
extern int cl_internal_copy_buf_align16_str_size;
- ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_ALIGN16,
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_ALIGN16,
cl_internal_copy_buf_align16_str, (size_t)cl_internal_copy_buf_align16_str_size, NULL);
cb = cb/16;
aligned = 1;
@@ -1059,7 +1059,7 @@ cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
extern char cl_internal_copy_buf_align4_str[];
extern int cl_internal_copy_buf_align4_str_size;
- ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_ALIGN4,
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_ALIGN4,
cl_internal_copy_buf_align4_str, (size_t)cl_internal_copy_buf_align4_str_size, NULL);
cb = cb/4;
aligned = 1;
@@ -1106,7 +1106,7 @@ cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
extern char cl_internal_copy_buf_unalign_same_offset_str[];
extern int cl_internal_copy_buf_unalign_same_offset_str_size;
- ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_UNALIGN_SAME_OFFSET,
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_UNALIGN_SAME_OFFSET,
cl_internal_copy_buf_unalign_same_offset_str,
(size_t)cl_internal_copy_buf_unalign_same_offset_str_size, NULL);
@@ -1133,7 +1133,7 @@ cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
unsigned int dw_mask = masks[align_diff];
int shift = align_diff * 8;
- ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_UNALIGN_DST_OFFSET,
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_UNALIGN_DST_OFFSET,
cl_internal_copy_buf_unalign_dst_offset_str,
(size_t)cl_internal_copy_buf_unalign_dst_offset_str_size, NULL);
@@ -1163,7 +1163,7 @@ cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
int shift = align_diff * 8;
int src_less = !(src_offset % 4) && !((src_offset + cb) % 4);
- ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_UNALIGN_SRC_OFFSET,
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_UNALIGN_SRC_OFFSET,
cl_internal_copy_buf_unalign_src_offset_str,
(size_t)cl_internal_copy_buf_unalign_src_offset_str_size, NULL);
@@ -1188,6 +1188,72 @@ cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
}
LOCAL cl_int
+cl_image_fill(cl_command_queue queue, const void * pattern, struct _cl_mem_image* src_image,
+ const size_t * origin, const size_t * region)
+{
+ cl_int ret = CL_SUCCESS;
+ cl_kernel ker = NULL;
+ size_t global_off[] = {0,0,0};
+ size_t global_sz[] = {1,1,1};
+ size_t local_sz[] = {LOCAL_SZ_0,LOCAL_SZ_1,LOCAL_SZ_2};
+
+ if(region[1] == 1) local_sz[1] = 1;
+ if(region[2] == 1) local_sz[2] = 1;
+ global_sz[0] = ((region[0] + local_sz[0] - 1) / local_sz[0]) * local_sz[0];
+ global_sz[1] = ((region[1] + local_sz[1] - 1) / local_sz[1]) * local_sz[1];
+ global_sz[2] = ((region[2] + local_sz[2] - 1) / local_sz[2]) * local_sz[2];
+
+ if(src_image->image_type == CL_MEM_OBJECT_IMAGE1D) {
+ extern char cl_internal_fill_image_1d_str[];
+ extern int cl_internal_fill_image_1d_str_size;
+
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_IMAGE_1D,
+ cl_internal_fill_image_1d_str, (size_t)cl_internal_fill_image_1d_str_size, NULL);
+ }else if(src_image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
+ extern char cl_internal_fill_image_1d_array_str[];
+ extern int cl_internal_fill_image_1d_array_str_size;
+
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_IMAGE_1D_ARRAY,
+ cl_internal_fill_image_1d_array_str, (size_t)cl_internal_fill_image_1d_array_str_size, NULL);
+ }else if(src_image->image_type == CL_MEM_OBJECT_IMAGE2D) {
+ extern char cl_internal_fill_image_2d_str[];
+ extern int cl_internal_fill_image_2d_str_size;
+
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_IMAGE_2D,
+ cl_internal_fill_image_2d_str, (size_t)cl_internal_fill_image_2d_str_size, NULL);
+ }else if(src_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
+ extern char cl_internal_fill_image_2d_array_str[];
+ extern int cl_internal_fill_image_2d_array_str_size;
+
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_IMAGE_2D_ARRAY,
+ cl_internal_fill_image_2d_array_str, (size_t)cl_internal_fill_image_2d_array_str_size, NULL);
+ }else if(src_image->image_type == CL_MEM_OBJECT_IMAGE3D) {
+ extern char cl_internal_fill_image_3d_str[];
+ extern int cl_internal_fill_image_3d_str_size;
+
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_IMAGE_3D,
+ cl_internal_fill_image_3d_str, (size_t)cl_internal_fill_image_3d_str_size, NULL);
+ }else{
+ return CL_IMAGE_FORMAT_NOT_SUPPORTED;
+ }
+
+ if (!ker)
+ return CL_OUT_OF_RESOURCES;
+
+ cl_kernel_set_arg(ker, 0, sizeof(cl_mem), &src_image);
+ cl_kernel_set_arg(ker, 1, sizeof(float)*4, pattern);
+ cl_kernel_set_arg(ker, 2, sizeof(cl_int), &region[0]);
+ cl_kernel_set_arg(ker, 3, sizeof(cl_int), &region[1]);
+ cl_kernel_set_arg(ker, 4, sizeof(cl_int), &region[2]);
+ cl_kernel_set_arg(ker, 5, sizeof(cl_int), &origin[0]);
+ cl_kernel_set_arg(ker, 6, sizeof(cl_int), &origin[1]);
+ cl_kernel_set_arg(ker, 7, sizeof(cl_int), &origin[2]);
+
+ ret = cl_command_queue_ND_range(queue, ker, 3, global_off, global_sz, local_sz);
+ return ret;
+}
+
+LOCAL cl_int
cl_mem_fill(cl_command_queue queue, const void * pattern, size_t pattern_size,
cl_mem buffer, size_t offset, size_t size)
{
@@ -1212,7 +1278,7 @@ cl_mem_fill(cl_command_queue queue, const void * pattern, size_t pattern_size,
extern char cl_internal_fill_buf_align128_str[];
extern int cl_internal_fill_buf_align128_str_size;
- ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN128,
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN128,
cl_internal_fill_buf_align128_str, (size_t)cl_internal_fill_buf_align128_str_size, NULL);
is_128 = 1;
pattern_size = pattern_size / 2;
@@ -1223,13 +1289,13 @@ cl_mem_fill(cl_command_queue queue, const void * pattern, size_t pattern_size,
extern int cl_internal_fill_buf_align8_str_size;
int order = ffs(pattern_size / 8) - 1;
- ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN8_8 + order,
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN8_8 + order,
cl_internal_fill_buf_align8_str, (size_t)cl_internal_fill_buf_align8_str_size, NULL);
} else if (pattern_size == 4) {
extern char cl_internal_fill_buf_align4_str[];
extern int cl_internal_fill_buf_align4_str_size;
- ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN4,
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN4,
cl_internal_fill_buf_align4_str, (size_t)cl_internal_fill_buf_align4_str_size, NULL);
} else if (size >= 4 && size % 4 == 0 && offset % 4 == 0) {
/* The unaligned case. But if copy size and offset are aligned to 4, we can fake
@@ -1246,7 +1312,7 @@ cl_mem_fill(cl_command_queue queue, const void * pattern, size_t pattern_size,
= pattern_comb[3] = *(char *)pattern;
}
- ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN4,
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN4,
cl_internal_fill_buf_align4_str, (size_t)cl_internal_fill_buf_align4_str_size, NULL);
pattern_size = 4;
pattern = pattern_comb;
@@ -1256,12 +1322,12 @@ cl_mem_fill(cl_command_queue queue, const void * pattern, size_t pattern_size,
else if (pattern_size == 2) {
extern char cl_internal_fill_buf_align2_str[];
extern int cl_internal_fill_buf_align2_str_size;
- ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN2,
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_ALIGN2,
cl_internal_fill_buf_align2_str, (size_t)cl_internal_fill_buf_align2_str_size, NULL);
} else if (pattern_size == 1) {
extern char cl_internal_fill_buf_unalign_str[];
extern int cl_internal_fill_buf_unalign_str_size;
- ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_UNALIGN,
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_FILL_BUFFER_UNALIGN,
cl_internal_fill_buf_unalign_str, (size_t)cl_internal_fill_buf_unalign_str_size, NULL);
} else
assert(0);
@@ -1314,7 +1380,7 @@ cl_mem_copy_buffer_rect(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf,
extern char cl_internal_copy_buf_rect_str[];
extern int cl_internal_copy_buf_rect_str_size;
- ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_RECT,
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_RECT,
cl_internal_copy_buf_rect_str, (size_t)cl_internal_copy_buf_rect_str_size, NULL);
if (!ker)
@@ -1386,13 +1452,13 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image
extern char cl_internal_copy_image_2d_to_2d_str[];
extern int cl_internal_copy_image_2d_to_2d_str_size;
- ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_2D,
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_2D,
cl_internal_copy_image_2d_to_2d_str, (size_t)cl_internal_copy_image_2d_to_2d_str_size, NULL);
}else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) {
extern char cl_internal_copy_image_2d_to_3d_str[];
extern int cl_internal_copy_image_2d_to_3d_str_size;
- ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_3D,
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_3D,
cl_internal_copy_image_2d_to_3d_str, (size_t)cl_internal_copy_image_2d_to_3d_str_size, NULL);
}
}else if(src_image->image_type == CL_MEM_OBJECT_IMAGE3D) {
@@ -1400,13 +1466,13 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image
extern char cl_internal_copy_image_3d_to_2d_str[];
extern int cl_internal_copy_image_3d_to_2d_str_size;
- ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_2D,
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_2D,
cl_internal_copy_image_3d_to_2d_str, (size_t)cl_internal_copy_image_3d_to_2d_str_size, NULL);
}else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) {
extern char cl_internal_copy_image_3d_to_3d_str[];
extern int cl_internal_copy_image_3d_to_3d_str_size;
- ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_3D,
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_3D,
cl_internal_copy_image_3d_to_3d_str, (size_t)cl_internal_copy_image_3d_to_3d_str_size, NULL);
}
}
@@ -1475,13 +1541,13 @@ cl_mem_copy_image_to_buffer(cl_command_queue queue, struct _cl_mem_image* image,
extern char cl_internal_copy_image_2d_to_buffer_str[];
extern int cl_internal_copy_image_2d_to_buffer_str_size;
- ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER,
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER,
cl_internal_copy_image_2d_to_buffer_str, (size_t)cl_internal_copy_image_2d_to_buffer_str_size, NULL);
}else if(image->image_type == CL_MEM_OBJECT_IMAGE3D) {
extern char cl_internal_copy_image_3d_to_buffer_str[];
extern int cl_internal_copy_image_3d_to_buffer_str_size;
- ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER,
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER,
cl_internal_copy_image_3d_to_buffer_str, (size_t)cl_internal_copy_image_3d_to_buffer_str_size, NULL);
}
@@ -1549,13 +1615,13 @@ cl_mem_copy_buffer_to_image(cl_command_queue queue, cl_mem buffer, struct _cl_me
extern char cl_internal_copy_buffer_to_image_2d_str[];
extern int cl_internal_copy_buffer_to_image_2d_str_size;
- ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D,
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D,
cl_internal_copy_buffer_to_image_2d_str, (size_t)cl_internal_copy_buffer_to_image_2d_str_size, NULL);
}else if(image->image_type == CL_MEM_OBJECT_IMAGE3D) {
extern char cl_internal_copy_buffer_to_image_3d_str[];
extern int cl_internal_copy_buffer_to_image_3d_str_size;
- ker = cl_context_get_static_kernel_form_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D,
+ ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D,
cl_internal_copy_buffer_to_image_3d_str, (size_t)cl_internal_copy_buffer_to_image_3d_str_size, NULL);
}
if (!ker)
diff --git a/src/cl_mem.h b/src/cl_mem.h
index d5890930..8ed8e2d5 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -205,6 +205,9 @@ extern cl_int cl_mem_copy(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf
extern cl_int cl_mem_fill(cl_command_queue queue, const void * pattern, size_t pattern_size,
cl_mem buffer, size_t offset, size_t size);
+extern cl_int cl_image_fill(cl_command_queue queue, const void * pattern, struct _cl_mem_image*,
+ const size_t *, const size_t *);
+
/* api clEnqueueCopyBufferRect help function */
extern cl_int cl_mem_copy_buffer_rect(cl_command_queue, cl_mem, cl_mem,
const size_t *, const size_t *, const size_t *,
diff --git a/src/kernels/cl_internal_fill_image_1d.cl b/src/kernels/cl_internal_fill_image_1d.cl
new file mode 100644
index 00000000..b3b0cbf3
--- /dev/null
+++ b/src/kernels/cl_internal_fill_image_1d.cl
@@ -0,0 +1,14 @@
+kernel void __cl_fill_image_1d( __write_only image1d_t image, float4 pattern,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int origin0, unsigned int origin1, unsigned int origin2)
+{
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int k = get_global_id(2);
+ int coord;
+ if((i >= region0) || (j>= region1) || (k>=region2))
+ return;
+ coord = origin0 + i;
+ write_imagef(image, coord, pattern);
+
+}
diff --git a/src/kernels/cl_internal_fill_image_1d_array.cl b/src/kernels/cl_internal_fill_image_1d_array.cl
new file mode 100644
index 00000000..f1eb2412
--- /dev/null
+++ b/src/kernels/cl_internal_fill_image_1d_array.cl
@@ -0,0 +1,15 @@
+kernel void __cl_fill_image_1d_array( __write_only image1d_array_t image, float4 pattern,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int origin0, unsigned int origin1, unsigned int origin2)
+{
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int k = get_global_id(2);
+ int2 coord;
+ if((i >= region0) || (j>= region1) || (k>=region2))
+ return;
+ coord.x = origin0 + i;
+ coord.y = origin2 + k;
+ write_imagef(image, coord, pattern);
+
+}
diff --git a/src/kernels/cl_internal_fill_image_2d.cl b/src/kernels/cl_internal_fill_image_2d.cl
new file mode 100644
index 00000000..0e29f3e1
--- /dev/null
+++ b/src/kernels/cl_internal_fill_image_2d.cl
@@ -0,0 +1,15 @@
+kernel void __cl_fill_image_2d( __write_only image2d_t image, float4 pattern,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int origin0, unsigned int origin1, unsigned int origin2)
+{
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int k = get_global_id(2);
+ int2 coord;
+ if((i >= region0) || (j>= region1) || (k>=region2))
+ return;
+ coord.x = origin0 + i;
+ coord.y = origin1 + j;
+ write_imagef(image, coord, pattern);
+
+}
diff --git a/src/kernels/cl_internal_fill_image_2d_array.cl b/src/kernels/cl_internal_fill_image_2d_array.cl
new file mode 100644
index 00000000..f29c9e76
--- /dev/null
+++ b/src/kernels/cl_internal_fill_image_2d_array.cl
@@ -0,0 +1,16 @@
+kernel void __cl_fill_image_2d_array( __write_only image2d_array_t image, float4 pattern,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int origin0, unsigned int origin1, unsigned int origin2)
+{
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int k = get_global_id(2);
+ int4 coord;
+ if((i >= region0) || (j>= region1) || (k>=region2))
+ return;
+ coord.x = origin0 + i;
+ coord.y = origin1 + j;
+ coord.z = origin2 + k;
+ write_imagef(image, coord, pattern);
+
+}
diff --git a/src/kernels/cl_internal_fill_image_3d.cl b/src/kernels/cl_internal_fill_image_3d.cl
new file mode 100644
index 00000000..042b8ab2
--- /dev/null
+++ b/src/kernels/cl_internal_fill_image_3d.cl
@@ -0,0 +1,16 @@
+kernel void __cl_fill_image_3d( __write_only image3d_t image, float4 pattern,
+ unsigned int region0, unsigned int region1, unsigned int region2,
+ unsigned int origin0, unsigned int origin1, unsigned int origin2)
+{
+ int i = get_global_id(0);
+ int j = get_global_id(1);
+ int k = get_global_id(2);
+ int4 coord;
+ if((i >= region0) || (j>= region1) || (k>=region2))
+ return;
+ coord.x = origin0 + i;
+ coord.y = origin1 + j;
+ coord.z = origin2 + k;
+ write_imagef(image, coord, pattern);
+
+}