summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLuo Xionghu <xionghu.luo@intel.com>2017-03-10 01:30:33 +0800
committerYang Rong <rong.r.yang@intel.com>2017-03-13 16:56:34 +0800
commit20fd72f6b2d178bdabf76159a84f7514e0fd3f75 (patch)
treefbd6b742bfc8d0d76be3daa17167c745690efdfa /src
parentc777c714d1149c1b35b2cb044748377425f424c5 (diff)
downloadbeignet-20fd72f6b2d178bdabf76159a84f7514e0fd3f75.tar.gz
add extension intel_planar_yuv.
create a w* (3/2*h) size bo for the whole CL_NV12_INTEL format surface, and the y surface (format CL_R) share the first w * h part, uv surface (format CL_RG) share the left w * 1/2h part; set correct bo offset for uv surface per different platforms. v2: add extension define in libocl; fix error check. Signed-off-by: Luo Xionghu <xionghu.luo@intel.com> Reviewed-by: Yang Rong <rong.r.yang@intel.com>
Diffstat (limited to 'src')
-rw-r--r--src/cl_api.c9
-rw-r--r--src/cl_device_id.c2
-rw-r--r--src/cl_extensions.h5
-rw-r--r--src/cl_image.c7
-rw-r--r--src/cl_mem.c158
-rw-r--r--src/cl_mem.h2
-rw-r--r--src/intel/intel_defines.h1
-rw-r--r--src/intel/intel_gpgpu.c38
-rw-r--r--src/intel/intel_structs.h19
9 files changed, 217 insertions, 24 deletions
diff --git a/src/cl_api.c b/src/cl_api.c
index 24b8b3d8..036ae172 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -134,7 +134,7 @@ clCreateImage(cl_context context,
goto error;
}
if (image_format->image_channel_order < CL_R ||
- image_format->image_channel_order > CL_sBGRA) {
+ (image_format->image_channel_order > CL_sBGRA && image_format->image_channel_order != CL_NV12_INTEL)) {
err = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
goto error;
}
@@ -166,6 +166,13 @@ clCreateImage(cl_context context,
goto error;
}
+ if (image_format->image_channel_order == CL_NV12_INTEL &&
+ (image_format->image_channel_data_type != CL_UNORM_INT8 ||
+ image_desc->image_width % 4 || image_desc->image_height % 4)) {
+ err = CL_INVALID_IMAGE_DESCRIPTOR;
+ goto error;
+ }
+
/* Other details check for image_desc will leave to image create. */
mem = cl_mem_new_image(context,
flags,
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index d4f4208e..50ed0d99 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -1075,10 +1075,12 @@ cl_get_device_info(cl_device_id device,
src_size = sizeof(device->image_max_array_size);
break;
case CL_DEVICE_IMAGE2D_MAX_WIDTH:
+ case CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL:
src_ptr = &device->image2d_max_width;
src_size = sizeof(device->image2d_max_width);
break;
case CL_DEVICE_IMAGE2D_MAX_HEIGHT:
+ case CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL:
src_ptr = &device->image2d_max_height;
src_size = sizeof(device->image2d_max_height);
break;
diff --git a/src/cl_extensions.h b/src/cl_extensions.h
index 52a49531..55747a7b 100644
--- a/src/cl_extensions.h
+++ b/src/cl_extensions.h
@@ -29,7 +29,8 @@
DECL_EXT(intel_accelerator) \
DECL_EXT(intel_motion_estimation) \
DECL_EXT(intel_subgroups) \
- DECL_EXT(intel_subgroups_short)
+ DECL_EXT(intel_subgroups_short) \
+ DECL_EXT(intel_planar_yuv)
#define DECL_GL_EXTENSIONS \
DECL_EXT(khr_gl_sharing)\
@@ -64,7 +65,7 @@ cl_khr_extension_id_max
#define OPT1_EXT_START_ID EXT_ID(khr_int64_base_atomics)
#define OPT1_EXT_END_ID EXT_ID(khr_icd)
#define INTEL_EXT_START_ID EXT_ID(intel_accelerator)
-#define INTEL_EXT_END_ID EXT_ID(intel_subgroups_short)
+#define INTEL_EXT_END_ID EXT_ID(intel_planar_yuv)
#define GL_EXT_START_ID EXT_ID(khr_gl_sharing)
#define GL_EXT_END_ID EXT_ID(khr_gl_msaa_sharing)
diff --git a/src/cl_image.c b/src/cl_image.c
index 5ff459a0..fbdc17b2 100644
--- a/src/cl_image.c
+++ b/src/cl_image.c
@@ -17,6 +17,7 @@
* Author: Benjamin Segovia <benjamin.segovia@intel.com>
*/
+#include "CL/cl_ext.h"
#include "cl_image.h"
#include "cl_utils.h"
#include "intel/intel_defines.h"
@@ -97,6 +98,7 @@ cl_image_byte_per_pixel(const cl_image_format *fmt, uint32_t *bpp)
return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
*bpp *= 4;
break;
+ case CL_NV12_INTEL: break;
default: return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
};
@@ -208,6 +210,11 @@ cl_image_get_intel_format(const cl_image_format *fmt)
case CL_UNORM_INT8: return I965_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB;
default: return INTEL_UNSUPPORTED_FORMAT;
};
+ case CL_NV12_INTEL:
+ switch (type) {
+ case CL_UNORM_INT8: return I965_SURFACEFORMAT_PLANAR_420_8;
+ default: return INTEL_UNSUPPORTED_FORMAT;
+ };
default: return INTEL_UNSUPPORTED_FORMAT;
};
}
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 0278b7fc..4a7bec82 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -152,6 +152,7 @@ cl_mem_allocate(enum cl_mem_type type,
mem->cmrt_mem = NULL;
if (mem->type == CL_MEM_IMAGE_TYPE) {
cl_mem_image(mem)->is_image_from_buffer = 0;
+ cl_mem_image(mem)->is_image_from_nv12_image = 0;
}
if (sz != 0) {
@@ -230,7 +231,11 @@ cl_mem_allocate(enum cl_mem_type type,
}
// if the image if created from buffer, should use the bo directly to share same bo.
mem->bo = buffer->bo;
- cl_mem_image(mem)->is_image_from_buffer = 1;
+ if (IS_IMAGE(buffer) && cl_mem_image(buffer)->fmt.image_channel_order == CL_NV12_INTEL) {
+ cl_mem_image(mem)->is_image_from_nv12_image = 1;
+ } else {
+ cl_mem_image(mem)->is_image_from_buffer = 1;
+ }
bufCreated = 1;
}
@@ -827,7 +832,7 @@ _cl_mem_new_image(cl_context ctx,
h = (w + ctx->devices[0]->image2d_max_width - 1) / ctx->devices[0]->image2d_max_width;
w = w > ctx->devices[0]->image2d_max_width ? ctx->devices[0]->image2d_max_width : w;
tiling = CL_NO_TILE;
- } else if(image_type == CL_MEM_OBJECT_IMAGE2D && buffer != NULL) {
+ } else if(image_type == CL_MEM_OBJECT_IMAGE2D && buffer != NULL && !IS_IMAGE(buffer)) {
tiling = CL_NO_TILE;
} else if (cl_driver_get_ver(ctx->drv) != 6) {
/* Pick up tiling mode (we do only linear on SNB) */
@@ -873,6 +878,9 @@ _cl_mem_new_image(cl_context ctx,
assert(0);
#undef DO_IMAGE_ERROR
+ if (fmt->image_channel_order == CL_NV12_INTEL) {
+ h += h/2;
+ }
uint8_t enableUserptr = 0;
if (enable_true_hostptr && ctx->devices[0]->host_unified_memory && data != NULL && (flags & CL_MEM_USE_HOST_PTR)) {
@@ -894,7 +902,7 @@ _cl_mem_new_image(cl_context ctx,
aligned_pitch = pitch;
//no need align the height if 2d image from buffer.
//the pitch should be same with buffer's pitch as they share same bo.
- if (image_type == CL_MEM_OBJECT_IMAGE2D && buffer != NULL) {
+ if (image_type == CL_MEM_OBJECT_IMAGE2D && buffer != NULL && !IS_IMAGE(buffer)) {
if(aligned_pitch < pitch) {
aligned_pitch = pitch;
}
@@ -911,7 +919,7 @@ _cl_mem_new_image(cl_context ctx,
}
sz = aligned_pitch * aligned_h * depth;
- if (image_type == CL_MEM_OBJECT_IMAGE2D && buffer != NULL) {
+ if (image_type == CL_MEM_OBJECT_IMAGE2D && buffer != NULL && !IS_IMAGE(buffer)) {
//image 2d created from buffer: per spec, the buffer sz maybe larger than the image 2d.
if (buffer->size >= sz)
sz = buffer->size;
@@ -979,6 +987,11 @@ _cl_mem_new_image(cl_context ctx,
cl_mem_copy_image(cl_mem_image(mem), pitch, slice_pitch, data);
}
+ /* copy yuv data if required */
+ if(fmt->image_channel_order == CL_NV12_INTEL && data) {
+ cl_mem_copy_image(cl_mem_image(mem), pitch, slice_pitch, data);
+ }
+
exit:
if (errcode_ret)
*errcode_ret = err;
@@ -990,6 +1003,121 @@ error:
}
static cl_mem
+_cl_mem_new_image_from_nv12_image(cl_context ctx,
+ cl_mem_flags flags,
+ const cl_image_format* image_format,
+ const cl_image_desc *image_desc,
+ cl_int *errcode_ret)
+{
+ cl_mem image = NULL;
+ cl_mem imageIn = image_desc->mem_object;
+ cl_int err = CL_SUCCESS;
+ *errcode_ret = err;
+ uint32_t bpp;
+ uint32_t intel_fmt = INTEL_UNSUPPORTED_FORMAT;
+ size_t width = 0;
+ size_t height = 0;
+ size_t depth = 0;
+
+ /* Get the size of each pixel */
+ if (UNLIKELY((err = cl_image_byte_per_pixel(image_format, &bpp)) != CL_SUCCESS))
+ goto error;
+
+ /* Only a sub-set of the formats are supported */
+ intel_fmt = cl_image_get_intel_format(image_format);
+ if (UNLIKELY(intel_fmt == INTEL_UNSUPPORTED_FORMAT)) {
+ err = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ goto error;
+ }
+
+ if(imageIn == NULL) {
+ err = CL_INVALID_IMAGE_DESCRIPTOR;
+ goto error;
+ }
+
+ if (cl_mem_image(imageIn)->fmt.image_channel_order != CL_NV12_INTEL ||
+ (image_format->image_channel_order != CL_R &&
+ image_format->image_channel_order != CL_RG) ||
+ image_format->image_channel_data_type != CL_UNORM_INT8) {
+ err = CL_INVALID_IMAGE_DESCRIPTOR;
+ goto error;
+ }
+
+ width = cl_mem_image(imageIn)->w;
+ if (image_desc->image_depth == 0) {
+ height = cl_mem_image(imageIn)->h * 2 / 3;
+ } else if (image_desc->image_depth == 1) {
+ width = cl_mem_image(imageIn)->w / 2;
+ height = cl_mem_image(imageIn)->h / 3;
+ } else {
+ err = CL_INVALID_IMAGE_DESCRIPTOR;
+ goto error;
+ }
+
+ //flags check here.
+ if ((flags & CL_MEM_USE_HOST_PTR) || (flags & CL_MEM_ALLOC_HOST_PTR) ||
+ (flags & CL_MEM_COPY_HOST_PTR)) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+
+ if (!(imageIn->flags & CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL)) {
+ if (((flags & CL_MEM_READ_WRITE) || (flags & CL_MEM_READ_ONLY)) &&
+ (imageIn->flags & CL_MEM_WRITE_ONLY)) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+ if (((flags & CL_MEM_READ_WRITE) || (flags & CL_MEM_WRITE_ONLY)) &&
+ (imageIn->flags | CL_MEM_READ_ONLY)) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+ if (((flags & CL_MEM_READ_WRITE) || (flags & CL_MEM_WRITE_ONLY) ||(flags & CL_MEM_READ_ONLY)) &&
+ (imageIn->flags & CL_MEM_NO_ACCESS_INTEL)) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+ if ((flags & CL_MEM_HOST_READ_ONLY) &&
+ (imageIn->flags & CL_MEM_HOST_WRITE_ONLY)) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+ if ((flags & CL_MEM_HOST_WRITE_ONLY) &&
+ (imageIn->flags & CL_MEM_HOST_READ_ONLY)) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+ if (((flags & CL_MEM_HOST_READ_ONLY) || (flags & CL_MEM_HOST_WRITE_ONLY)) &&
+ (imageIn->flags & CL_MEM_HOST_NO_ACCESS)) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+ }
+
+ image = _cl_mem_new_image(ctx, flags, image_format, image_desc->image_type,
+ width, height, depth, cl_mem_image(imageIn)->row_pitch,
+ 0, NULL,
+ imageIn, errcode_ret);
+ if (image == NULL)
+ return NULL;
+
+
+ if (image_desc->image_depth == 1) {
+ cl_mem_image(image)->offset = cl_mem_image(imageIn)->row_pitch * height * 2;
+ }
+ cl_mem_image(image)->nv12_image = imageIn;
+ cl_mem_add_ref(imageIn);
+ return image;
+
+error:
+ if (image)
+ cl_mem_delete(image);
+ image = NULL;
+ *errcode_ret = err;
+ return image;
+}
+
+static cl_mem
_cl_mem_new_image_from_buffer(cl_context ctx,
cl_mem_flags flags,
const cl_image_format* image_format,
@@ -1034,7 +1162,7 @@ _cl_mem_new_image_from_buffer(cl_context ctx,
goto error;
}
if ((buffer->flags & CL_MEM_READ_ONLY) &&
- (flags & (CL_MEM_READ_WRITE|CL_MEM_WRITE_ONLY))) {
+ (flags & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY))) {
err = CL_INVALID_VALUE;
goto error;
}
@@ -1169,9 +1297,14 @@ cl_mem_new_image(cl_context context,
image_desc->image_row_pitch, image_desc->image_slice_pitch,
host_ptr, NULL, errcode_ret);
case CL_MEM_OBJECT_IMAGE2D:
- if(image_desc->buffer)
- return _cl_mem_new_image_from_buffer(context, flags, image_format,
- image_desc, errcode_ret);
+ if (image_desc->buffer) {
+ if (IS_IMAGE(image_desc->buffer)) {
+ return _cl_mem_new_image_from_nv12_image(context, flags, image_format,
+ image_desc, errcode_ret);
+ } else
+ return _cl_mem_new_image_from_buffer(context, flags, image_format,
+ image_desc, errcode_ret);
+ }
else
return _cl_mem_new_image(context, flags, image_format, image_desc->image_type,
image_desc->image_width, image_desc->image_height, image_desc->image_depth,
@@ -1247,6 +1380,15 @@ cl_mem_delete(cl_mem mem)
mem->bo = NULL;
}
}
+ if (cl_mem_image(mem)->nv12_image) {
+ assert(cl_mem_image(mem)->image_type == CL_MEM_OBJECT_IMAGE2D);
+ cl_mem_delete(cl_mem_image(mem)->nv12_image);
+ if(cl_mem_image(mem)->image_type == CL_MEM_OBJECT_IMAGE2D && cl_mem_image(mem)->is_image_from_nv12_image == 1)
+ {
+ cl_mem_image(mem)->nv12_image = NULL;
+ mem->bo = NULL;
+ }
+ }
}
/* Someone still mapped, unmap */
diff --git a/src/cl_mem.h b/src/cl_mem.h
index 4764401d..edfd0436 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -143,6 +143,8 @@ struct _cl_mem_image {
size_t offset; /* offset for dri_bo, used when it's reloc. */
cl_mem buffer_1d; /* if the image is created from buffer, it point to the buffer.*/
uint8_t is_image_from_buffer; /* IMAGE from Buffer*/
+ cl_mem nv12_image; /* if the image is created from nv12 Image, it point to the image.*/
+ uint8_t is_image_from_nv12_image; /* IMAGE from NV12 Image*/
};
struct _cl_mem_gl_image {
diff --git a/src/intel/intel_defines.h b/src/intel/intel_defines.h
index 6ada30cd..36be4fb8 100644
--- a/src/intel/intel_defines.h
+++ b/src/intel/intel_defines.h
@@ -267,6 +267,7 @@
#define I965_SURFACEFORMAT_R16G16B16_SNORM 0x19D
#define I965_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
#define I965_SURFACEFORMAT_R16G16B16_USCALED 0x19F
+#define I965_SURFACEFORMAT_PLANAR_420_8 0x1A5
#define I965_SURFACEFORMAT_RAW 0x1FF
#define I965_MAPFILTER_NEAREST 0x0
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index 283b07a4..041938fe 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -1258,6 +1258,14 @@ intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu,
ss->ss0.surface_array = 1;
ss->ss0.surface_array_spacing = 1;
}
+
+ if (obj_bo_offset && tiling != GPGPU_NO_TILE) {
+ uint32_t unaligned = obj_bo_offset;
+ obj_bo_offset = (obj_bo_offset / 0x1000) * 0x1000;
+ uint32_t h_ = (unaligned - obj_bo_offset )/ pitch;
+ ss->ss5.y_offset = h_ / 2;
+ }
+
ss->ss0.surface_format = format;
ss->ss1.base_addr = obj_bo->offset + obj_bo_offset;
ss->ss2.width = w - 1;
@@ -1354,6 +1362,14 @@ intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu,
ss->ss0.surface_array = 1;
ss->ss0.surface_array_spacing = 1;
}
+
+ if (obj_bo_offset && tiling != GPGPU_NO_TILE) {
+ uint32_t unaligned = obj_bo_offset;
+ obj_bo_offset = (obj_bo_offset / 0x1000) * 0x1000;
+ uint32_t h_ = (unaligned - obj_bo_offset )/ pitch;
+ ss->ss5.y_offset = h_ / 2;
+ }
+
ss->ss0.surface_format = format;
ss->ss1.base_addr = obj_bo->offset + obj_bo_offset;
ss->ss2.width = w - 1;
@@ -1419,6 +1435,13 @@ intel_gpgpu_bind_image_gen8(intel_gpgpu_t *gpgpu,
ss->ss2.height = h - 1;
ss->ss3.depth = depth - 1;
+ if(obj_bo_offset && tiling != GPGPU_NO_TILE) {
+ uint32_t unaligned = obj_bo_offset;
+ obj_bo_offset = (obj_bo_offset / 0x1000) * 0x1000;
+ uint32_t h_ = (unaligned - obj_bo_offset) / pitch;
+ ss->ss5.y_offset = h_ / 4;
+ }
+
ss->ss8.surface_base_addr_lo = (obj_bo->offset64 + obj_bo_offset) & 0xffffffff;
ss->ss9.surface_base_addr_hi = ((obj_bo->offset64 + obj_bo_offset) >> 32) & 0xffffffff;
@@ -1427,6 +1450,10 @@ intel_gpgpu_bind_image_gen8(intel_gpgpu_t *gpgpu,
ss->ss3.surface_pitch = pitch - 1;
ss->ss1.mem_obj_ctrl_state = cl_gpgpu_get_cache_ctrl();
+ //NV12 surface. the height is 3/2 * h, so need set proper offset here.
+ if (format == I965_SURFACEFORMAT_PLANAR_420_8)
+ ss->ss6.uv_plane_y_offset = h * 2 / 3;
+
ss->ss7.shader_channel_select_red = I965_SURCHAN_SELECT_RED;
ss->ss7.shader_channel_select_green = I965_SURCHAN_SELECT_GREEN;
ss->ss7.shader_channel_select_blue = I965_SURCHAN_SELECT_BLUE;
@@ -1495,6 +1522,13 @@ intel_gpgpu_bind_image_gen9(intel_gpgpu_t *gpgpu,
ss->ss2.height = h - 1;
ss->ss3.depth = depth - 1;
+ if (obj_bo_offset && tiling != GPGPU_NO_TILE) {
+ uint32_t unaligned = obj_bo_offset;
+ obj_bo_offset = (obj_bo_offset / 0x1000) * 0x1000;
+ uint32_t h_ = (unaligned - obj_bo_offset )/ pitch;
+ ss->ss5.y_offset = h_ / 4;
+ }
+
ss->ss8.surface_base_addr_lo = (obj_bo->offset64 + obj_bo_offset) & 0xffffffff;
ss->ss9.surface_base_addr_hi = ((obj_bo->offset64 + obj_bo_offset) >> 32) & 0xffffffff;
@@ -1502,6 +1536,10 @@ intel_gpgpu_bind_image_gen9(intel_gpgpu_t *gpgpu,
ss->ss4.min_array_elt = 0;
ss->ss3.surface_pitch = pitch - 1;
+ //NV12 surface. the height is 3/2 * h, so need set proper offset here.
+ if (format == I965_SURFACEFORMAT_PLANAR_420_8)
+ ss->ss6.uv_plane_y_offset = h * 2 / 3;
+
ss->ss1.mem_obj_ctrl_state = cl_gpgpu_get_cache_ctrl();
ss->ss7.shader_channel_select_red = I965_SURCHAN_SELECT_RED;
ss->ss7.shader_channel_select_green = I965_SURCHAN_SELECT_GREEN;
diff --git a/src/intel/intel_structs.h b/src/intel/intel_structs.h
index c112a160..b38cc423 100644
--- a/src/intel/intel_structs.h
+++ b/src/intel/intel_structs.h
@@ -310,29 +310,22 @@ typedef struct gen8_surface_state
} ss5;
struct {
- union {
union {
struct {
uint32_t aux_surface_mode:3;
uint32_t aux_surface_pitch:9;
uint32_t pad3:4;
+ uint32_t aux_sruface_qpitch:15;
+ uint32_t pad2:1;
};
- struct {
- uint32_t uv_plane_y_offset:14;
- uint32_t pad2:2;
- };
- };
struct {
- uint32_t uv_plane_x_offset:14;
- uint32_t pad1:1;
- uint32_t seperate_uv_plane_enable:1;
+ uint32_t uv_plane_y_offset : 14;
+ uint32_t pad1 : 2;
+ uint32_t uv_plane_x_offset : 14;
+ uint32_t pad0 : 2;
};
- struct {
- uint32_t aux_sruface_qpitch:15;
- uint32_t pad0:1;
};
- };
} ss6;
struct {