summaryrefslogtreecommitdiff
path: root/src/cl_context.c
diff options
context:
space:
mode:
authorJunyan He <junyan.he@linux.intel.com>2014-06-13 13:30:42 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-06-13 13:50:39 +0800
commitc35ccb35e8642afb4bbc351421633b53e6538e3f (patch)
tree3adae42f6114333c59bad69e286dfb8a77b5028a /src/cl_context.c
parentff0d08b798608fbf6539fbaea016e7a90ecfe782 (diff)
downloadbeignet-c35ccb35e8642afb4bbc351421633b53e6538e3f.tar.gz
Implement the clEnqueueFillBuffer API.
We use the floatn's assigment to do the copy. 128 pattern size is according to double16, and because the double problem on our platform, we use to float16 to handle this. unaligned cases is not optimized now, just use the char assigment. Signed-off-by: Junyan He <junyan.he@linux.intel.com> Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
Diffstat (limited to 'src/cl_context.c')
-rw-r--r--src/cl_context.c79
1 files changed, 73 insertions, 6 deletions
diff --git a/src/cl_context.c b/src/cl_context.c
index 1911bf21..a0da7b0a 100644
--- a/src/cl_context.c
+++ b/src/cl_context.c
@@ -188,6 +188,7 @@ error:
LOCAL void
cl_context_delete(cl_context ctx)
{
+ int i = 0;
if (UNLIKELY(ctx == NULL))
return;
@@ -195,6 +196,18 @@ cl_context_delete(cl_context ctx)
if (atomic_dec(&ctx->ref_n) > 1)
return;
+ /* delete the internal programs. */
+ for (i = CL_INTERNAL_KERNEL_MIN; i < CL_INTERNAL_KERNEL_MAX; i++) {
+ if (ctx->internel_kernels[i]) {
+ cl_kernel_delete(ctx->internel_kernels[i]);
+ ctx->internel_kernels[i] = NULL;
+
+ assert(ctx->internal_prgs[i]);
+ cl_program_delete(ctx->internal_prgs[i]);
+ ctx->internal_prgs[i] = NULL;
+ }
+ }
+
/* All object lists should have been freed. Otherwise, the reference counter
* of the context cannot be 0
*/
@@ -250,8 +263,7 @@ cl_kernel
cl_context_get_static_kernel(cl_context ctx, cl_int index, const char * str_kernel, const char * str_option)
{
cl_int ret;
- if (!ctx->internal_prgs[index])
- {
+ if (!ctx->internal_prgs[index]) {
size_t length = strlen(str_kernel) + 1;
ctx->internal_prgs[index] = cl_program_create_from_source(ctx, 1, &str_kernel, &length, NULL);
@@ -264,7 +276,35 @@ cl_context_get_static_kernel(cl_context ctx, cl_int index, const char * str_kern
ctx->internal_prgs[index]->is_built = 1;
- ctx->internel_kernels[index] = cl_kernel_dup(ctx->internal_prgs[index]->ker[0]);
+ /* All CL_ENQUEUE_FILL_BUFFER_ALIGN16_xxx use the same program, different kernel. */
+ if (index >= CL_ENQUEUE_FILL_BUFFER_ALIGN8_8 && index <= CL_ENQUEUE_FILL_BUFFER_ALIGN8_64) {
+ int i = CL_ENQUEUE_FILL_BUFFER_ALIGN8_8;
+ for (; i <= CL_ENQUEUE_FILL_BUFFER_ALIGN8_64; i++) {
+ if (index != i) {
+ assert(ctx->internal_prgs[i] == NULL);
+ assert(ctx->internel_kernels[i] == NULL);
+ cl_program_add_ref(ctx->internal_prgs[index]);
+ ctx->internal_prgs[i] = ctx->internal_prgs[index];
+ }
+
+ if (i == CL_ENQUEUE_FILL_BUFFER_ALIGN8_8) {
+ ctx->internel_kernels[i] = cl_program_create_kernel(ctx->internal_prgs[index],
+ "__cl_fill_region_align8_2", NULL);
+ } else if (i == CL_ENQUEUE_FILL_BUFFER_ALIGN8_16) {
+ ctx->internel_kernels[i] = cl_program_create_kernel(ctx->internal_prgs[index],
+ "__cl_fill_region_align8_4", NULL);
+ } else if (i == CL_ENQUEUE_FILL_BUFFER_ALIGN8_32) {
+ ctx->internel_kernels[i] = cl_program_create_kernel(ctx->internal_prgs[index],
+ "__cl_fill_region_align8_8", NULL);
+ } else if (i == CL_ENQUEUE_FILL_BUFFER_ALIGN8_64) {
+ ctx->internel_kernels[i] = cl_program_create_kernel(ctx->internal_prgs[index],
+ "__cl_fill_region_align8_16", NULL);
+ } else
+ assert(0);
+ }
+ } else {
+ ctx->internel_kernels[index] = cl_kernel_dup(ctx->internal_prgs[index]->ker[0]);
+ }
}
return ctx->internel_kernels[index];
@@ -276,8 +316,7 @@ cl_context_get_static_kernel_form_bin(cl_context ctx, cl_int index,
{
cl_int ret;
cl_int binary_status = CL_SUCCESS;
- if (!ctx->internal_prgs[index])
- {
+ if (!ctx->internal_prgs[index]) {
ctx->internal_prgs[index] = cl_program_create_from_binary(ctx, 1, &ctx->device,
&size, (const unsigned char **)&str_kernel, &binary_status, &ret);
@@ -290,7 +329,35 @@ cl_context_get_static_kernel_form_bin(cl_context ctx, cl_int index,
ctx->internal_prgs[index]->is_built = 1;
- ctx->internel_kernels[index] = cl_kernel_dup(ctx->internal_prgs[index]->ker[0]);
+ /* All CL_ENQUEUE_FILL_BUFFER_ALIGN16_xxx use the same program, different kernel. */
+ if (index >= CL_ENQUEUE_FILL_BUFFER_ALIGN8_8 && index <= CL_ENQUEUE_FILL_BUFFER_ALIGN8_64) {
+ int i = CL_ENQUEUE_FILL_BUFFER_ALIGN8_8;
+ for (; i <= CL_ENQUEUE_FILL_BUFFER_ALIGN8_64; i++) {
+ if (index != i) {
+ assert(ctx->internal_prgs[i] == NULL);
+ assert(ctx->internel_kernels[i] == NULL);
+ cl_program_add_ref(ctx->internal_prgs[index]);
+ ctx->internal_prgs[i] = ctx->internal_prgs[index];
+ }
+
+ if (i == CL_ENQUEUE_FILL_BUFFER_ALIGN8_8) {
+ ctx->internel_kernels[i] = cl_program_create_kernel(ctx->internal_prgs[index],
+ "__cl_fill_region_align8_2", NULL);
+ } else if (i == CL_ENQUEUE_FILL_BUFFER_ALIGN8_16) {
+ ctx->internel_kernels[i] = cl_program_create_kernel(ctx->internal_prgs[index],
+ "__cl_fill_region_align8_4", NULL);
+ } else if (i == CL_ENQUEUE_FILL_BUFFER_ALIGN8_32) {
+ ctx->internel_kernels[i] = cl_program_create_kernel(ctx->internal_prgs[index],
+ "__cl_fill_region_align8_8", NULL);
+ } else if (i == CL_ENQUEUE_FILL_BUFFER_ALIGN8_64) {
+ ctx->internel_kernels[i] = cl_program_create_kernel(ctx->internal_prgs[index],
+ "__cl_fill_region_align8_16", NULL);
+ } else
+ assert(0);
+ }
+ } else {
+ ctx->internel_kernels[index] = cl_kernel_dup(ctx->internal_prgs[index]->ker[0]);
+ }
}
return ctx->internel_kernels[index];