diff options
author | Junyan He <junyan.he@linux.intel.com> | 2014-06-13 13:30:42 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@intel.com> | 2014-06-13 13:50:39 +0800 |
commit | c35ccb35e8642afb4bbc351421633b53e6538e3f (patch) | |
tree | 3adae42f6114333c59bad69e286dfb8a77b5028a /src/cl_context.c | |
parent | ff0d08b798608fbf6539fbaea016e7a90ecfe782 (diff) | |
download | beignet-c35ccb35e8642afb4bbc351421633b53e6538e3f.tar.gz |
Implement the clEnqueueFillBuffer API.
We use the floatn's assigment to do the copy.
128 pattern size is according to double16, and because
the double problem on our platform, we use to float16
to handle this.
unaligned cases is not optimized now, just use the char
assigment.
Signed-off-by: Junyan He <junyan.he@linux.intel.com>
Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
Diffstat (limited to 'src/cl_context.c')
-rw-r--r-- | src/cl_context.c | 79 |
1 files changed, 73 insertions, 6 deletions
diff --git a/src/cl_context.c b/src/cl_context.c index 1911bf21..a0da7b0a 100644 --- a/src/cl_context.c +++ b/src/cl_context.c @@ -188,6 +188,7 @@ error: LOCAL void cl_context_delete(cl_context ctx) { + int i = 0; if (UNLIKELY(ctx == NULL)) return; @@ -195,6 +196,18 @@ cl_context_delete(cl_context ctx) if (atomic_dec(&ctx->ref_n) > 1) return; + /* delete the internal programs. */ + for (i = CL_INTERNAL_KERNEL_MIN; i < CL_INTERNAL_KERNEL_MAX; i++) { + if (ctx->internel_kernels[i]) { + cl_kernel_delete(ctx->internel_kernels[i]); + ctx->internel_kernels[i] = NULL; + + assert(ctx->internal_prgs[i]); + cl_program_delete(ctx->internal_prgs[i]); + ctx->internal_prgs[i] = NULL; + } + } + /* All object lists should have been freed. Otherwise, the reference counter * of the context cannot be 0 */ @@ -250,8 +263,7 @@ cl_kernel cl_context_get_static_kernel(cl_context ctx, cl_int index, const char * str_kernel, const char * str_option) { cl_int ret; - if (!ctx->internal_prgs[index]) - { + if (!ctx->internal_prgs[index]) { size_t length = strlen(str_kernel) + 1; ctx->internal_prgs[index] = cl_program_create_from_source(ctx, 1, &str_kernel, &length, NULL); @@ -264,7 +276,35 @@ cl_context_get_static_kernel(cl_context ctx, cl_int index, const char * str_kern ctx->internal_prgs[index]->is_built = 1; - ctx->internel_kernels[index] = cl_kernel_dup(ctx->internal_prgs[index]->ker[0]); + /* All CL_ENQUEUE_FILL_BUFFER_ALIGN16_xxx use the same program, different kernel. */ + if (index >= CL_ENQUEUE_FILL_BUFFER_ALIGN8_8 && index <= CL_ENQUEUE_FILL_BUFFER_ALIGN8_64) { + int i = CL_ENQUEUE_FILL_BUFFER_ALIGN8_8; + for (; i <= CL_ENQUEUE_FILL_BUFFER_ALIGN8_64; i++) { + if (index != i) { + assert(ctx->internal_prgs[i] == NULL); + assert(ctx->internel_kernels[i] == NULL); + cl_program_add_ref(ctx->internal_prgs[index]); + ctx->internal_prgs[i] = ctx->internal_prgs[index]; + } + + if (i == CL_ENQUEUE_FILL_BUFFER_ALIGN8_8) { + ctx->internel_kernels[i] = cl_program_create_kernel(ctx->internal_prgs[index], + "__cl_fill_region_align8_2", NULL); + } else if (i == CL_ENQUEUE_FILL_BUFFER_ALIGN8_16) { + ctx->internel_kernels[i] = cl_program_create_kernel(ctx->internal_prgs[index], + "__cl_fill_region_align8_4", NULL); + } else if (i == CL_ENQUEUE_FILL_BUFFER_ALIGN8_32) { + ctx->internel_kernels[i] = cl_program_create_kernel(ctx->internal_prgs[index], + "__cl_fill_region_align8_8", NULL); + } else if (i == CL_ENQUEUE_FILL_BUFFER_ALIGN8_64) { + ctx->internel_kernels[i] = cl_program_create_kernel(ctx->internal_prgs[index], + "__cl_fill_region_align8_16", NULL); + } else + assert(0); + } + } else { + ctx->internel_kernels[index] = cl_kernel_dup(ctx->internal_prgs[index]->ker[0]); + } } return ctx->internel_kernels[index]; @@ -276,8 +316,7 @@ cl_context_get_static_kernel_form_bin(cl_context ctx, cl_int index, { cl_int ret; cl_int binary_status = CL_SUCCESS; - if (!ctx->internal_prgs[index]) - { + if (!ctx->internal_prgs[index]) { ctx->internal_prgs[index] = cl_program_create_from_binary(ctx, 1, &ctx->device, &size, (const unsigned char **)&str_kernel, &binary_status, &ret); @@ -290,7 +329,35 @@ cl_context_get_static_kernel_form_bin(cl_context ctx, cl_int index, ctx->internal_prgs[index]->is_built = 1; - ctx->internel_kernels[index] = cl_kernel_dup(ctx->internal_prgs[index]->ker[0]); + /* All CL_ENQUEUE_FILL_BUFFER_ALIGN16_xxx use the same program, different kernel. */ + if (index >= CL_ENQUEUE_FILL_BUFFER_ALIGN8_8 && index <= CL_ENQUEUE_FILL_BUFFER_ALIGN8_64) { + int i = CL_ENQUEUE_FILL_BUFFER_ALIGN8_8; + for (; i <= CL_ENQUEUE_FILL_BUFFER_ALIGN8_64; i++) { + if (index != i) { + assert(ctx->internal_prgs[i] == NULL); + assert(ctx->internel_kernels[i] == NULL); + cl_program_add_ref(ctx->internal_prgs[index]); + ctx->internal_prgs[i] = ctx->internal_prgs[index]; + } + + if (i == CL_ENQUEUE_FILL_BUFFER_ALIGN8_8) { + ctx->internel_kernels[i] = cl_program_create_kernel(ctx->internal_prgs[index], + "__cl_fill_region_align8_2", NULL); + } else if (i == CL_ENQUEUE_FILL_BUFFER_ALIGN8_16) { + ctx->internel_kernels[i] = cl_program_create_kernel(ctx->internal_prgs[index], + "__cl_fill_region_align8_4", NULL); + } else if (i == CL_ENQUEUE_FILL_BUFFER_ALIGN8_32) { + ctx->internel_kernels[i] = cl_program_create_kernel(ctx->internal_prgs[index], + "__cl_fill_region_align8_8", NULL); + } else if (i == CL_ENQUEUE_FILL_BUFFER_ALIGN8_64) { + ctx->internel_kernels[i] = cl_program_create_kernel(ctx->internal_prgs[index], + "__cl_fill_region_align8_16", NULL); + } else + assert(0); + } + } else { + ctx->internel_kernels[index] = cl_kernel_dup(ctx->internal_prgs[index]->ker[0]); + } } return ctx->internel_kernels[index]; |