diff options
author | Luo Xionghu <xionghu.luo@intel.com> | 2017-03-10 01:30:33 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2017-03-13 16:56:34 +0800 |
commit | 20fd72f6b2d178bdabf76159a84f7514e0fd3f75 (patch) | |
tree | fbd6b742bfc8d0d76be3daa17167c745690efdfa /src | |
parent | c777c714d1149c1b35b2cb044748377425f424c5 (diff) | |
download | beignet-20fd72f6b2d178bdabf76159a84f7514e0fd3f75.tar.gz |
add extension intel_planar_yuv.
create a w* (3/2*h) size bo for the whole CL_NV12_INTEL format
surface, and the y surface (format CL_R) share the first w * h
part, uv surface (format CL_RG) share the left w * 1/2h part; set
correct bo offset for uv surface per different platforms.
v2: add extension define in libocl; fix error check.
Signed-off-by: Luo Xionghu <xionghu.luo@intel.com>
Reviewed-by: Yang Rong <rong.r.yang@intel.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/cl_api.c | 9 | ||||
-rw-r--r-- | src/cl_device_id.c | 2 | ||||
-rw-r--r-- | src/cl_extensions.h | 5 | ||||
-rw-r--r-- | src/cl_image.c | 7 | ||||
-rw-r--r-- | src/cl_mem.c | 158 | ||||
-rw-r--r-- | src/cl_mem.h | 2 | ||||
-rw-r--r-- | src/intel/intel_defines.h | 1 | ||||
-rw-r--r-- | src/intel/intel_gpgpu.c | 38 | ||||
-rw-r--r-- | src/intel/intel_structs.h | 19 |
9 files changed, 217 insertions, 24 deletions
diff --git a/src/cl_api.c b/src/cl_api.c index 24b8b3d8..036ae172 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -134,7 +134,7 @@ clCreateImage(cl_context context, goto error; } if (image_format->image_channel_order < CL_R || - image_format->image_channel_order > CL_sBGRA) { + (image_format->image_channel_order > CL_sBGRA && image_format->image_channel_order != CL_NV12_INTEL)) { err = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; goto error; } @@ -166,6 +166,13 @@ clCreateImage(cl_context context, goto error; } + if (image_format->image_channel_order == CL_NV12_INTEL && + (image_format->image_channel_data_type != CL_UNORM_INT8 || + image_desc->image_width % 4 || image_desc->image_height % 4)) { + err = CL_INVALID_IMAGE_DESCRIPTOR; + goto error; + } + /* Other details check for image_desc will leave to image create. */ mem = cl_mem_new_image(context, flags, diff --git a/src/cl_device_id.c b/src/cl_device_id.c index d4f4208e..50ed0d99 100644 --- a/src/cl_device_id.c +++ b/src/cl_device_id.c @@ -1075,10 +1075,12 @@ cl_get_device_info(cl_device_id device, src_size = sizeof(device->image_max_array_size); break; case CL_DEVICE_IMAGE2D_MAX_WIDTH: + case CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL: src_ptr = &device->image2d_max_width; src_size = sizeof(device->image2d_max_width); break; case CL_DEVICE_IMAGE2D_MAX_HEIGHT: + case CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL: src_ptr = &device->image2d_max_height; src_size = sizeof(device->image2d_max_height); break; diff --git a/src/cl_extensions.h b/src/cl_extensions.h index 52a49531..55747a7b 100644 --- a/src/cl_extensions.h +++ b/src/cl_extensions.h @@ -29,7 +29,8 @@ DECL_EXT(intel_accelerator) \ DECL_EXT(intel_motion_estimation) \ DECL_EXT(intel_subgroups) \ - DECL_EXT(intel_subgroups_short) + DECL_EXT(intel_subgroups_short) \ + DECL_EXT(intel_planar_yuv) #define DECL_GL_EXTENSIONS \ DECL_EXT(khr_gl_sharing)\ @@ -64,7 +65,7 @@ cl_khr_extension_id_max #define OPT1_EXT_START_ID EXT_ID(khr_int64_base_atomics) #define OPT1_EXT_END_ID EXT_ID(khr_icd) #define INTEL_EXT_START_ID EXT_ID(intel_accelerator) -#define INTEL_EXT_END_ID EXT_ID(intel_subgroups_short) +#define INTEL_EXT_END_ID EXT_ID(intel_planar_yuv) #define GL_EXT_START_ID EXT_ID(khr_gl_sharing) #define GL_EXT_END_ID EXT_ID(khr_gl_msaa_sharing) diff --git a/src/cl_image.c b/src/cl_image.c index 5ff459a0..fbdc17b2 100644 --- a/src/cl_image.c +++ b/src/cl_image.c @@ -17,6 +17,7 @@ * Author: Benjamin Segovia <benjamin.segovia@intel.com> */ +#include "CL/cl_ext.h" #include "cl_image.h" #include "cl_utils.h" #include "intel/intel_defines.h" @@ -97,6 +98,7 @@ cl_image_byte_per_pixel(const cl_image_format *fmt, uint32_t *bpp) return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; *bpp *= 4; break; + case CL_NV12_INTEL: break; default: return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; }; @@ -208,6 +210,11 @@ cl_image_get_intel_format(const cl_image_format *fmt) case CL_UNORM_INT8: return I965_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB; default: return INTEL_UNSUPPORTED_FORMAT; }; + case CL_NV12_INTEL: + switch (type) { + case CL_UNORM_INT8: return I965_SURFACEFORMAT_PLANAR_420_8; + default: return INTEL_UNSUPPORTED_FORMAT; + }; default: return INTEL_UNSUPPORTED_FORMAT; }; } diff --git a/src/cl_mem.c b/src/cl_mem.c index 0278b7fc..4a7bec82 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -152,6 +152,7 @@ cl_mem_allocate(enum cl_mem_type type, mem->cmrt_mem = NULL; if (mem->type == CL_MEM_IMAGE_TYPE) { cl_mem_image(mem)->is_image_from_buffer = 0; + cl_mem_image(mem)->is_image_from_nv12_image = 0; } if (sz != 0) { @@ -230,7 +231,11 @@ cl_mem_allocate(enum cl_mem_type type, } // if the image if created from buffer, should use the bo directly to share same bo. mem->bo = buffer->bo; - cl_mem_image(mem)->is_image_from_buffer = 1; + if (IS_IMAGE(buffer) && cl_mem_image(buffer)->fmt.image_channel_order == CL_NV12_INTEL) { + cl_mem_image(mem)->is_image_from_nv12_image = 1; + } else { + cl_mem_image(mem)->is_image_from_buffer = 1; + } bufCreated = 1; } @@ -827,7 +832,7 @@ _cl_mem_new_image(cl_context ctx, h = (w + ctx->devices[0]->image2d_max_width - 1) / ctx->devices[0]->image2d_max_width; w = w > ctx->devices[0]->image2d_max_width ? ctx->devices[0]->image2d_max_width : w; tiling = CL_NO_TILE; - } else if(image_type == CL_MEM_OBJECT_IMAGE2D && buffer != NULL) { + } else if(image_type == CL_MEM_OBJECT_IMAGE2D && buffer != NULL && !IS_IMAGE(buffer)) { tiling = CL_NO_TILE; } else if (cl_driver_get_ver(ctx->drv) != 6) { /* Pick up tiling mode (we do only linear on SNB) */ @@ -873,6 +878,9 @@ _cl_mem_new_image(cl_context ctx, assert(0); #undef DO_IMAGE_ERROR + if (fmt->image_channel_order == CL_NV12_INTEL) { + h += h/2; + } uint8_t enableUserptr = 0; if (enable_true_hostptr && ctx->devices[0]->host_unified_memory && data != NULL && (flags & CL_MEM_USE_HOST_PTR)) { @@ -894,7 +902,7 @@ _cl_mem_new_image(cl_context ctx, aligned_pitch = pitch; //no need align the height if 2d image from buffer. //the pitch should be same with buffer's pitch as they share same bo. - if (image_type == CL_MEM_OBJECT_IMAGE2D && buffer != NULL) { + if (image_type == CL_MEM_OBJECT_IMAGE2D && buffer != NULL && !IS_IMAGE(buffer)) { if(aligned_pitch < pitch) { aligned_pitch = pitch; } @@ -911,7 +919,7 @@ _cl_mem_new_image(cl_context ctx, } sz = aligned_pitch * aligned_h * depth; - if (image_type == CL_MEM_OBJECT_IMAGE2D && buffer != NULL) { + if (image_type == CL_MEM_OBJECT_IMAGE2D && buffer != NULL && !IS_IMAGE(buffer)) { //image 2d created from buffer: per spec, the buffer sz maybe larger than the image 2d. if (buffer->size >= sz) sz = buffer->size; @@ -979,6 +987,11 @@ _cl_mem_new_image(cl_context ctx, cl_mem_copy_image(cl_mem_image(mem), pitch, slice_pitch, data); } + /* copy yuv data if required */ + if(fmt->image_channel_order == CL_NV12_INTEL && data) { + cl_mem_copy_image(cl_mem_image(mem), pitch, slice_pitch, data); + } + exit: if (errcode_ret) *errcode_ret = err; @@ -990,6 +1003,121 @@ error: } static cl_mem +_cl_mem_new_image_from_nv12_image(cl_context ctx, + cl_mem_flags flags, + const cl_image_format* image_format, + const cl_image_desc *image_desc, + cl_int *errcode_ret) +{ + cl_mem image = NULL; + cl_mem imageIn = image_desc->mem_object; + cl_int err = CL_SUCCESS; + *errcode_ret = err; + uint32_t bpp; + uint32_t intel_fmt = INTEL_UNSUPPORTED_FORMAT; + size_t width = 0; + size_t height = 0; + size_t depth = 0; + + /* Get the size of each pixel */ + if (UNLIKELY((err = cl_image_byte_per_pixel(image_format, &bpp)) != CL_SUCCESS)) + goto error; + + /* Only a sub-set of the formats are supported */ + intel_fmt = cl_image_get_intel_format(image_format); + if (UNLIKELY(intel_fmt == INTEL_UNSUPPORTED_FORMAT)) { + err = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR; + goto error; + } + + if(imageIn == NULL) { + err = CL_INVALID_IMAGE_DESCRIPTOR; + goto error; + } + + if (cl_mem_image(imageIn)->fmt.image_channel_order != CL_NV12_INTEL || + (image_format->image_channel_order != CL_R && + image_format->image_channel_order != CL_RG) || + image_format->image_channel_data_type != CL_UNORM_INT8) { + err = CL_INVALID_IMAGE_DESCRIPTOR; + goto error; + } + + width = cl_mem_image(imageIn)->w; + if (image_desc->image_depth == 0) { + height = cl_mem_image(imageIn)->h * 2 / 3; + } else if (image_desc->image_depth == 1) { + width = cl_mem_image(imageIn)->w / 2; + height = cl_mem_image(imageIn)->h / 3; + } else { + err = CL_INVALID_IMAGE_DESCRIPTOR; + goto error; + } + + //flags check here. + if ((flags & CL_MEM_USE_HOST_PTR) || (flags & CL_MEM_ALLOC_HOST_PTR) || + (flags & CL_MEM_COPY_HOST_PTR)) { + err = CL_INVALID_VALUE; + goto error; + } + + if (!(imageIn->flags & CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL)) { + if (((flags & CL_MEM_READ_WRITE) || (flags & CL_MEM_READ_ONLY)) && + (imageIn->flags & CL_MEM_WRITE_ONLY)) { + err = CL_INVALID_VALUE; + goto error; + } + if (((flags & CL_MEM_READ_WRITE) || (flags & CL_MEM_WRITE_ONLY)) && + (imageIn->flags | CL_MEM_READ_ONLY)) { + err = CL_INVALID_VALUE; + goto error; + } + if (((flags & CL_MEM_READ_WRITE) || (flags & CL_MEM_WRITE_ONLY) ||(flags & CL_MEM_READ_ONLY)) && + (imageIn->flags & CL_MEM_NO_ACCESS_INTEL)) { + err = CL_INVALID_VALUE; + goto error; + } + if ((flags & CL_MEM_HOST_READ_ONLY) && + (imageIn->flags & CL_MEM_HOST_WRITE_ONLY)) { + err = CL_INVALID_VALUE; + goto error; + } + if ((flags & CL_MEM_HOST_WRITE_ONLY) && + (imageIn->flags & CL_MEM_HOST_READ_ONLY)) { + err = CL_INVALID_VALUE; + goto error; + } + if (((flags & CL_MEM_HOST_READ_ONLY) || (flags & CL_MEM_HOST_WRITE_ONLY)) && + (imageIn->flags & CL_MEM_HOST_NO_ACCESS)) { + err = CL_INVALID_VALUE; + goto error; + } + } + + image = _cl_mem_new_image(ctx, flags, image_format, image_desc->image_type, + width, height, depth, cl_mem_image(imageIn)->row_pitch, + 0, NULL, + imageIn, errcode_ret); + if (image == NULL) + return NULL; + + + if (image_desc->image_depth == 1) { + cl_mem_image(image)->offset = cl_mem_image(imageIn)->row_pitch * height * 2; + } + cl_mem_image(image)->nv12_image = imageIn; + cl_mem_add_ref(imageIn); + return image; + +error: + if (image) + cl_mem_delete(image); + image = NULL; + *errcode_ret = err; + return image; +} + +static cl_mem _cl_mem_new_image_from_buffer(cl_context ctx, cl_mem_flags flags, const cl_image_format* image_format, @@ -1034,7 +1162,7 @@ _cl_mem_new_image_from_buffer(cl_context ctx, goto error; } if ((buffer->flags & CL_MEM_READ_ONLY) && - (flags & (CL_MEM_READ_WRITE|CL_MEM_WRITE_ONLY))) { + (flags & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY))) { err = CL_INVALID_VALUE; goto error; } @@ -1169,9 +1297,14 @@ cl_mem_new_image(cl_context context, image_desc->image_row_pitch, image_desc->image_slice_pitch, host_ptr, NULL, errcode_ret); case CL_MEM_OBJECT_IMAGE2D: - if(image_desc->buffer) - return _cl_mem_new_image_from_buffer(context, flags, image_format, - image_desc, errcode_ret); + if (image_desc->buffer) { + if (IS_IMAGE(image_desc->buffer)) { + return _cl_mem_new_image_from_nv12_image(context, flags, image_format, + image_desc, errcode_ret); + } else + return _cl_mem_new_image_from_buffer(context, flags, image_format, + image_desc, errcode_ret); + } else return _cl_mem_new_image(context, flags, image_format, image_desc->image_type, image_desc->image_width, image_desc->image_height, image_desc->image_depth, @@ -1247,6 +1380,15 @@ cl_mem_delete(cl_mem mem) mem->bo = NULL; } } + if (cl_mem_image(mem)->nv12_image) { + assert(cl_mem_image(mem)->image_type == CL_MEM_OBJECT_IMAGE2D); + cl_mem_delete(cl_mem_image(mem)->nv12_image); + if(cl_mem_image(mem)->image_type == CL_MEM_OBJECT_IMAGE2D && cl_mem_image(mem)->is_image_from_nv12_image == 1) + { + cl_mem_image(mem)->nv12_image = NULL; + mem->bo = NULL; + } + } } /* Someone still mapped, unmap */ diff --git a/src/cl_mem.h b/src/cl_mem.h index 4764401d..edfd0436 100644 --- a/src/cl_mem.h +++ b/src/cl_mem.h @@ -143,6 +143,8 @@ struct _cl_mem_image { size_t offset; /* offset for dri_bo, used when it's reloc. */ cl_mem buffer_1d; /* if the image is created from buffer, it point to the buffer.*/ uint8_t is_image_from_buffer; /* IMAGE from Buffer*/ + cl_mem nv12_image; /* if the image is created from nv12 Image, it point to the image.*/ + uint8_t is_image_from_nv12_image; /* IMAGE from NV12 Image*/ }; struct _cl_mem_gl_image { diff --git a/src/intel/intel_defines.h b/src/intel/intel_defines.h index 6ada30cd..36be4fb8 100644 --- a/src/intel/intel_defines.h +++ b/src/intel/intel_defines.h @@ -267,6 +267,7 @@ #define I965_SURFACEFORMAT_R16G16B16_SNORM 0x19D #define I965_SURFACEFORMAT_R16G16B16_SSCALED 0x19E #define I965_SURFACEFORMAT_R16G16B16_USCALED 0x19F +#define I965_SURFACEFORMAT_PLANAR_420_8 0x1A5 #define I965_SURFACEFORMAT_RAW 0x1FF #define I965_MAPFILTER_NEAREST 0x0 diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index 283b07a4..041938fe 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -1258,6 +1258,14 @@ intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu, ss->ss0.surface_array = 1; ss->ss0.surface_array_spacing = 1; } + + if (obj_bo_offset && tiling != GPGPU_NO_TILE) { + uint32_t unaligned = obj_bo_offset; + obj_bo_offset = (obj_bo_offset / 0x1000) * 0x1000; + uint32_t h_ = (unaligned - obj_bo_offset )/ pitch; + ss->ss5.y_offset = h_ / 2; + } + ss->ss0.surface_format = format; ss->ss1.base_addr = obj_bo->offset + obj_bo_offset; ss->ss2.width = w - 1; @@ -1354,6 +1362,14 @@ intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu, ss->ss0.surface_array = 1; ss->ss0.surface_array_spacing = 1; } + + if (obj_bo_offset && tiling != GPGPU_NO_TILE) { + uint32_t unaligned = obj_bo_offset; + obj_bo_offset = (obj_bo_offset / 0x1000) * 0x1000; + uint32_t h_ = (unaligned - obj_bo_offset )/ pitch; + ss->ss5.y_offset = h_ / 2; + } + ss->ss0.surface_format = format; ss->ss1.base_addr = obj_bo->offset + obj_bo_offset; ss->ss2.width = w - 1; @@ -1419,6 +1435,13 @@ intel_gpgpu_bind_image_gen8(intel_gpgpu_t *gpgpu, ss->ss2.height = h - 1; ss->ss3.depth = depth - 1; + if(obj_bo_offset && tiling != GPGPU_NO_TILE) { + uint32_t unaligned = obj_bo_offset; + obj_bo_offset = (obj_bo_offset / 0x1000) * 0x1000; + uint32_t h_ = (unaligned - obj_bo_offset) / pitch; + ss->ss5.y_offset = h_ / 4; + } + ss->ss8.surface_base_addr_lo = (obj_bo->offset64 + obj_bo_offset) & 0xffffffff; ss->ss9.surface_base_addr_hi = ((obj_bo->offset64 + obj_bo_offset) >> 32) & 0xffffffff; @@ -1427,6 +1450,10 @@ intel_gpgpu_bind_image_gen8(intel_gpgpu_t *gpgpu, ss->ss3.surface_pitch = pitch - 1; ss->ss1.mem_obj_ctrl_state = cl_gpgpu_get_cache_ctrl(); + //NV12 surface. the height is 3/2 * h, so need set proper offset here. + if (format == I965_SURFACEFORMAT_PLANAR_420_8) + ss->ss6.uv_plane_y_offset = h * 2 / 3; + ss->ss7.shader_channel_select_red = I965_SURCHAN_SELECT_RED; ss->ss7.shader_channel_select_green = I965_SURCHAN_SELECT_GREEN; ss->ss7.shader_channel_select_blue = I965_SURCHAN_SELECT_BLUE; @@ -1495,6 +1522,13 @@ intel_gpgpu_bind_image_gen9(intel_gpgpu_t *gpgpu, ss->ss2.height = h - 1; ss->ss3.depth = depth - 1; + if (obj_bo_offset && tiling != GPGPU_NO_TILE) { + uint32_t unaligned = obj_bo_offset; + obj_bo_offset = (obj_bo_offset / 0x1000) * 0x1000; + uint32_t h_ = (unaligned - obj_bo_offset )/ pitch; + ss->ss5.y_offset = h_ / 4; + } + ss->ss8.surface_base_addr_lo = (obj_bo->offset64 + obj_bo_offset) & 0xffffffff; ss->ss9.surface_base_addr_hi = ((obj_bo->offset64 + obj_bo_offset) >> 32) & 0xffffffff; @@ -1502,6 +1536,10 @@ intel_gpgpu_bind_image_gen9(intel_gpgpu_t *gpgpu, ss->ss4.min_array_elt = 0; ss->ss3.surface_pitch = pitch - 1; + //NV12 surface. the height is 3/2 * h, so need set proper offset here. + if (format == I965_SURFACEFORMAT_PLANAR_420_8) + ss->ss6.uv_plane_y_offset = h * 2 / 3; + ss->ss1.mem_obj_ctrl_state = cl_gpgpu_get_cache_ctrl(); ss->ss7.shader_channel_select_red = I965_SURCHAN_SELECT_RED; ss->ss7.shader_channel_select_green = I965_SURCHAN_SELECT_GREEN; diff --git a/src/intel/intel_structs.h b/src/intel/intel_structs.h index c112a160..b38cc423 100644 --- a/src/intel/intel_structs.h +++ b/src/intel/intel_structs.h @@ -310,29 +310,22 @@ typedef struct gen8_surface_state } ss5; struct { - union { union { struct { uint32_t aux_surface_mode:3; uint32_t aux_surface_pitch:9; uint32_t pad3:4; + uint32_t aux_sruface_qpitch:15; + uint32_t pad2:1; }; - struct { - uint32_t uv_plane_y_offset:14; - uint32_t pad2:2; - }; - }; struct { - uint32_t uv_plane_x_offset:14; - uint32_t pad1:1; - uint32_t seperate_uv_plane_enable:1; + uint32_t uv_plane_y_offset : 14; + uint32_t pad1 : 2; + uint32_t uv_plane_x_offset : 14; + uint32_t pad0 : 2; }; - struct { - uint32_t aux_sruface_qpitch:15; - uint32_t pad0:1; }; - }; } ss6; struct { |