summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorChuanbo Weng <chuanbo.weng@intel.com>2017-06-14 00:54:13 +0800
committerYang Rong <rong.r.yang@intel.com>2017-07-12 18:29:19 +0800
commit9cb7ff4c285d892616595e5a43793f4d1408eca4 (patch)
tree335679b4a0e2fb166ae5bb0517a871cde6071529 /src
parent4933bf9212c9721ca2b0e615097ed2b53fec51c3 (diff)
downloadbeignet-9cb7ff4c285d892616595e5a43793f4d1408eca4.tar.gz
Implement extension cl_intel_device_side_avc_motion_estimation.
This patch mainly contains: 1. built-in function __gen_ocl_ime implementation. 2. Lots of built-in functions of cl_intel_device_side_avc_motion_estimation are implemented. 3. This extension is required to run in simd16 mode. v2: move the utests to seprate patches one by one; as all the utests has extension function check, no need to put them in stand alone utest; uncomment the self test; fix extension check logic issue, should be && instead of ||. Signed-off-by: Chuanbo Weng <chuanbo.weng@intel.com> Signed-off-by: Xionghu Luo <xionghu.luo@intel.com> Reviewed-by: Yang Rong <rong.r.yang@intel.com>
Diffstat (limited to 'src')
-rw-r--r--src/cl_command_queue.c7
-rw-r--r--src/cl_device_id.c4
-rw-r--r--src/cl_extensions.c2
-rw-r--r--src/cl_extensions.h5
-rw-r--r--src/intel/intel_gpgpu.c70
-rw-r--r--src/intel/intel_structs.h63
6 files changed, 148 insertions, 3 deletions
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index 55b1a230..43ff8fed 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -154,6 +154,13 @@ cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k, cl_gpgpu gpgpu,
image->intel_fmt, image->image_type, image->bpp,
image->w, image->h, image->depth,
image->row_pitch, image->slice_pitch, (cl_gpgpu_tiling)image->tiling);
+ //We always setup media surface state, so this surface can be used for vme
+ else if( (image->fmt.image_channel_order == CL_R) && (image->fmt.image_channel_data_type == CL_UNORM_INT8) )
+ cl_gpgpu_bind_image_for_vme(gpgpu, k->images[i].idx + BTI_WORKAROUND_IMAGE_OFFSET, image->base.bo,
+ image->offset + k->args[id].mem->offset,
+ image->intel_fmt, image->image_type, image->bpp,
+ image->w, image->h, image->depth,
+ image->row_pitch, image->slice_pitch, (cl_gpgpu_tiling)image->tiling);
}
return CL_SUCCESS;
}
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 1960463e..5e284193 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -576,6 +576,7 @@ skl_gt1_break:
#endif
cl_intel_platform_get_default_extension(ret);
cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
+ cl_intel_platform_enable_extension(ret, cl_intel_device_side_avc_motion_estimation_ext_id);
break;
case PCI_CHIP_SKYLAKE_ULT_GT2:
@@ -601,6 +602,7 @@ skl_gt2_break:
#endif
cl_intel_platform_get_default_extension(ret);
cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
+ cl_intel_platform_enable_extension(ret, cl_intel_device_side_avc_motion_estimation_ext_id);
break;
case PCI_CHIP_SKYLAKE_ULT_GT3:
@@ -624,6 +626,7 @@ skl_gt3_break:
cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id);
#endif
cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
+ cl_intel_platform_enable_extension(ret, cl_intel_device_side_avc_motion_estimation_ext_id);
break;
case PCI_CHIP_SKYLAKE_DT_GT4:
@@ -643,6 +646,7 @@ skl_gt4_break:
#endif
cl_intel_platform_get_default_extension(ret);
cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
+ cl_intel_platform_enable_extension(ret, cl_intel_device_side_avc_motion_estimation_ext_id);
break;
case PCI_CHIP_BROXTON_0:
diff --git a/src/cl_extensions.c b/src/cl_extensions.c
index 56099ad0..4987bee2 100644
--- a/src/cl_extensions.c
+++ b/src/cl_extensions.c
@@ -70,7 +70,7 @@ check_intel_extension(cl_extensions_t *extensions)
int id;
for(id = INTEL_EXT_START_ID; id <= INTEL_EXT_END_ID; id++)
{
- if(id != EXT_ID(intel_motion_estimation))
+ if(id != EXT_ID(intel_motion_estimation) && id != EXT_ID(intel_device_side_avc_motion_estimation))
extensions->extensions[id].base.ext_enabled = 1;
if(id == EXT_ID(intel_required_subgroup_size))
#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR > 40
diff --git a/src/cl_extensions.h b/src/cl_extensions.h
index bb61c0bc..b32b2362 100644
--- a/src/cl_extensions.h
+++ b/src/cl_extensions.h
@@ -32,7 +32,8 @@
DECL_EXT(intel_subgroups_short) \
DECL_EXT(intel_required_subgroup_size) \
DECL_EXT(intel_media_block_io) \
- DECL_EXT(intel_planar_yuv)
+ DECL_EXT(intel_planar_yuv) \
+ DECL_EXT(intel_device_side_avc_motion_estimation)
#define DECL_GL_EXTENSIONS \
DECL_EXT(khr_gl_sharing)\
@@ -67,7 +68,7 @@ cl_khr_extension_id_max
#define OPT1_EXT_START_ID EXT_ID(khr_int64_base_atomics)
#define OPT1_EXT_END_ID EXT_ID(khr_icd)
#define INTEL_EXT_START_ID EXT_ID(intel_accelerator)
-#define INTEL_EXT_END_ID EXT_ID(intel_planar_yuv)
+#define INTEL_EXT_END_ID EXT_ID(intel_device_side_avc_motion_estimation)
#define GL_EXT_START_ID EXT_ID(khr_gl_sharing)
#define GL_EXT_END_ID EXT_ID(khr_gl_msaa_sharing)
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index 2b778e5a..b0d6bd94 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -1337,6 +1337,75 @@ intel_gpgpu_bind_image_for_vme_gen7(intel_gpgpu_t *gpgpu,
assert(index < GEN_MAX_SURFACES);
}
+static void
+intel_gpgpu_bind_image_for_vme_gen9(intel_gpgpu_t *gpgpu,
+ uint32_t index,
+ dri_bo* obj_bo,
+ uint32_t obj_bo_offset,
+ uint32_t format,
+ cl_mem_object_type type,
+ uint32_t bpp,
+ int32_t w,
+ int32_t h,
+ int32_t depth,
+ int32_t pitch,
+ int32_t slice_pitch,
+ int32_t tiling)
+{
+ surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset;
+ gen9_media_surface_state_t *ss = (gen9_media_surface_state_t *) &heap->surface[index * sizeof(gen8_surface_state_t)];
+
+ memset(ss, 0, sizeof(gen8_surface_state_t));
+ ss->ss0.rotation = 0; //++
+ ss->ss1.uv_offset_v_direction = 0;
+ ss->ss1.pic_struct = 0;
+ ss->ss1.width = w - 1;
+ ss->ss1.height = h - 1;
+ if (tiling == GPGPU_NO_TILE) {
+ ss->ss2.tile_mode = 0;
+ }
+ else if (tiling == GPGPU_TILE_X){
+ ss->ss2.tile_mode = 2;
+ }
+ else if (tiling == GPGPU_TILE_Y){
+ ss->ss2.tile_mode = 3;
+ }
+ ss->ss2.half_pitch_for_chroma = 0;
+ ss->ss2.surface_pitch = pitch - 1;
+ ss->ss2.address_control = 1; //++ CLAMP: 0; MIRROR:1;
+ ss->ss2.mem_compress_enable = 0; //++
+ ss->ss2.mem_compress_mode = 0; //++
+ ss->ss2.uv_offset_v_direction_msb = 0; //++
+ ss->ss2.uv_offset_u_direction = 0; //++
+ ss->ss2.interleave_chroma = 0;
+ ss->ss2.surface_format = 12; //Y8_UNORM
+ //ss->ss2.surface_format = 4; //PLANAR_420_8
+ ss->ss3.y_offset_for_u = 0;
+ ss->ss3.x_offset_for_u = 0;
+ ss->ss4.y_offset_for_v = 0;
+ ss->ss4.x_offset_for_v = 0;
+ ss->ss5.surface_object_control_state = cl_gpgpu_get_cache_ctrl();
+ ss->ss5.tiled_res_mode = 0; //++ TRMODE_NONE: 0; TRMODE_TILEYF: 1; TRMODE_TILEYS:2
+ ss->ss5.vert_line_stride_offset = 0; //++
+ ss->ss5.vert_line_stride = 0; //++
+ ss->ss6.base_addr = (obj_bo->offset64 + obj_bo_offset) & 0xffffffff; //
+ ss->ss7.base_addr_high = ((obj_bo->offset64 + obj_bo_offset) >> 32) & 0xffffffff; //
+
+
+ heap->binding_table[index] = offsetof(surface_heap_t, surface) +
+ index * surface_state_sz;
+ dri_bo_emit_reloc(gpgpu->aux_buf.bo,
+ I915_GEM_DOMAIN_RENDER,
+ I915_GEM_DOMAIN_RENDER,
+ obj_bo_offset,
+ gpgpu->aux_offset.surface_heap_offset +
+ heap->binding_table[index] +
+ offsetof(gen9_media_surface_state_t, ss6),
+ obj_bo);
+
+ assert(index < GEN_MAX_SURFACES);
+}
+
static void
intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu,
@@ -2562,6 +2631,7 @@ intel_set_gpgpu_callbacks(int device_id)
}
if (IS_GEN9(device_id)) {
cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen9;
+ cl_gpgpu_bind_image_for_vme = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_for_vme_gen9;
intel_gpgpu_set_L3 = intel_gpgpu_set_L3_gen8;
cl_gpgpu_get_cache_ctrl = (cl_gpgpu_get_cache_ctrl_cb *)intel_gpgpu_get_cache_ctrl_gen9;
intel_gpgpu_get_scratch_index = intel_gpgpu_get_scratch_index_gen8;
diff --git a/src/intel/intel_structs.h b/src/intel/intel_structs.h
index b38cc423..282929d7 100644
--- a/src/intel/intel_structs.h
+++ b/src/intel/intel_structs.h
@@ -425,6 +425,69 @@ typedef struct gen7_media_surface_state
} ss7;
} gen7_media_surface_state_t;
+typedef struct gen9_media_surface_state
+{
+ struct {
+ uint32_t pad3:12;
+ uint32_t pad2:4;
+ uint32_t pad1:11; //ExistsIf [Surface Format] is not one of Planar Formats
+ uint32_t rotation:2;
+ } ss0;
+
+ struct {
+ uint32_t uv_offset_v_direction:2;
+ uint32_t pic_struct:2;
+ uint32_t width:14;
+ uint32_t height:14;
+ } ss1;
+
+ struct {
+ uint32_t tile_mode:2;
+ uint32_t half_pitch_for_chroma:1;
+ uint32_t surface_pitch:18;
+ uint32_t address_control:1;
+ uint32_t mem_compress_enable:1;
+ uint32_t mem_compress_mode:1;
+ uint32_t uv_offset_v_direction_msb:1;
+ uint32_t uv_offset_u_direction:1;
+ uint32_t interleave_chroma:1;
+ uint32_t surface_format:5;
+ } ss2;
+
+ struct {
+ uint32_t y_offset_for_u:14;
+ uint32_t pad1:2;
+ uint32_t x_offset_for_u:14;
+ uint32_t pad0:2;
+ } ss3;
+
+ struct {
+ uint32_t y_offset_for_v:15;
+ uint32_t pad1:1;
+ uint32_t x_offset_for_v:14;
+ uint32_t pad0:2;
+ } ss4;
+
+ struct {
+ uint32_t surface_object_control_state:7;
+ uint32_t pad2:11;
+ uint32_t tiled_res_mode:2;
+ uint32_t pad1:4;
+ uint32_t pad0:6;
+ uint32_t vert_line_stride_offset:1;
+ uint32_t vert_line_stride:1;
+ } ss5;
+
+ struct {
+ uint32_t base_addr;
+ } ss6;
+
+ struct {
+ uint32_t base_addr_high:16;
+ uint32_t pad0:16;
+ } ss7;
+} gen9_media_surface_state_t;
+
typedef union gen_surface_state
{
gen7_surface_state_t gen7_surface_state;