diff options
author | Chuanbo Weng <chuanbo.weng@intel.com> | 2017-06-14 00:54:13 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2017-07-12 18:29:19 +0800 |
commit | 9cb7ff4c285d892616595e5a43793f4d1408eca4 (patch) | |
tree | 335679b4a0e2fb166ae5bb0517a871cde6071529 /src | |
parent | 4933bf9212c9721ca2b0e615097ed2b53fec51c3 (diff) | |
download | beignet-9cb7ff4c285d892616595e5a43793f4d1408eca4.tar.gz |
Implement extension cl_intel_device_side_avc_motion_estimation.
This patch mainly contains:
1. built-in function __gen_ocl_ime implementation.
2. Lots of built-in functions of cl_intel_device_side_avc_motion_estimation
are implemented.
3. This extension is required to run in simd16 mode.
v2: move the utests to seprate patches one by one;
as all the utests has extension function check, no need to put them
in stand alone utest;
uncomment the self test;
fix extension check logic issue, should be && instead of ||.
Signed-off-by: Chuanbo Weng <chuanbo.weng@intel.com>
Signed-off-by: Xionghu Luo <xionghu.luo@intel.com>
Reviewed-by: Yang Rong <rong.r.yang@intel.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/cl_command_queue.c | 7 | ||||
-rw-r--r-- | src/cl_device_id.c | 4 | ||||
-rw-r--r-- | src/cl_extensions.c | 2 | ||||
-rw-r--r-- | src/cl_extensions.h | 5 | ||||
-rw-r--r-- | src/intel/intel_gpgpu.c | 70 | ||||
-rw-r--r-- | src/intel/intel_structs.h | 63 |
6 files changed, 148 insertions, 3 deletions
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c index 55b1a230..43ff8fed 100644 --- a/src/cl_command_queue.c +++ b/src/cl_command_queue.c @@ -154,6 +154,13 @@ cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k, cl_gpgpu gpgpu, image->intel_fmt, image->image_type, image->bpp, image->w, image->h, image->depth, image->row_pitch, image->slice_pitch, (cl_gpgpu_tiling)image->tiling); + //We always setup media surface state, so this surface can be used for vme + else if( (image->fmt.image_channel_order == CL_R) && (image->fmt.image_channel_data_type == CL_UNORM_INT8) ) + cl_gpgpu_bind_image_for_vme(gpgpu, k->images[i].idx + BTI_WORKAROUND_IMAGE_OFFSET, image->base.bo, + image->offset + k->args[id].mem->offset, + image->intel_fmt, image->image_type, image->bpp, + image->w, image->h, image->depth, + image->row_pitch, image->slice_pitch, (cl_gpgpu_tiling)image->tiling); } return CL_SUCCESS; } diff --git a/src/cl_device_id.c b/src/cl_device_id.c index 1960463e..5e284193 100644 --- a/src/cl_device_id.c +++ b/src/cl_device_id.c @@ -576,6 +576,7 @@ skl_gt1_break: #endif cl_intel_platform_get_default_extension(ret); cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); + cl_intel_platform_enable_extension(ret, cl_intel_device_side_avc_motion_estimation_ext_id); break; case PCI_CHIP_SKYLAKE_ULT_GT2: @@ -601,6 +602,7 @@ skl_gt2_break: #endif cl_intel_platform_get_default_extension(ret); cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); + cl_intel_platform_enable_extension(ret, cl_intel_device_side_avc_motion_estimation_ext_id); break; case PCI_CHIP_SKYLAKE_ULT_GT3: @@ -624,6 +626,7 @@ skl_gt3_break: cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id); #endif cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); + cl_intel_platform_enable_extension(ret, cl_intel_device_side_avc_motion_estimation_ext_id); break; case PCI_CHIP_SKYLAKE_DT_GT4: @@ -643,6 +646,7 @@ skl_gt4_break: #endif cl_intel_platform_get_default_extension(ret); cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); + cl_intel_platform_enable_extension(ret, cl_intel_device_side_avc_motion_estimation_ext_id); break; case PCI_CHIP_BROXTON_0: diff --git a/src/cl_extensions.c b/src/cl_extensions.c index 56099ad0..4987bee2 100644 --- a/src/cl_extensions.c +++ b/src/cl_extensions.c @@ -70,7 +70,7 @@ check_intel_extension(cl_extensions_t *extensions) int id; for(id = INTEL_EXT_START_ID; id <= INTEL_EXT_END_ID; id++) { - if(id != EXT_ID(intel_motion_estimation)) + if(id != EXT_ID(intel_motion_estimation) && id != EXT_ID(intel_device_side_avc_motion_estimation)) extensions->extensions[id].base.ext_enabled = 1; if(id == EXT_ID(intel_required_subgroup_size)) #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR > 40 diff --git a/src/cl_extensions.h b/src/cl_extensions.h index bb61c0bc..b32b2362 100644 --- a/src/cl_extensions.h +++ b/src/cl_extensions.h @@ -32,7 +32,8 @@ DECL_EXT(intel_subgroups_short) \ DECL_EXT(intel_required_subgroup_size) \ DECL_EXT(intel_media_block_io) \ - DECL_EXT(intel_planar_yuv) + DECL_EXT(intel_planar_yuv) \ + DECL_EXT(intel_device_side_avc_motion_estimation) #define DECL_GL_EXTENSIONS \ DECL_EXT(khr_gl_sharing)\ @@ -67,7 +68,7 @@ cl_khr_extension_id_max #define OPT1_EXT_START_ID EXT_ID(khr_int64_base_atomics) #define OPT1_EXT_END_ID EXT_ID(khr_icd) #define INTEL_EXT_START_ID EXT_ID(intel_accelerator) -#define INTEL_EXT_END_ID EXT_ID(intel_planar_yuv) +#define INTEL_EXT_END_ID EXT_ID(intel_device_side_avc_motion_estimation) #define GL_EXT_START_ID EXT_ID(khr_gl_sharing) #define GL_EXT_END_ID EXT_ID(khr_gl_msaa_sharing) diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index 2b778e5a..b0d6bd94 100644 --- a/src/intel/intel_gpgpu.c +++ b/src/intel/intel_gpgpu.c @@ -1337,6 +1337,75 @@ intel_gpgpu_bind_image_for_vme_gen7(intel_gpgpu_t *gpgpu, assert(index < GEN_MAX_SURFACES); } +static void +intel_gpgpu_bind_image_for_vme_gen9(intel_gpgpu_t *gpgpu, + uint32_t index, + dri_bo* obj_bo, + uint32_t obj_bo_offset, + uint32_t format, + cl_mem_object_type type, + uint32_t bpp, + int32_t w, + int32_t h, + int32_t depth, + int32_t pitch, + int32_t slice_pitch, + int32_t tiling) +{ + surface_heap_t *heap = gpgpu->aux_buf.bo->virtual + gpgpu->aux_offset.surface_heap_offset; + gen9_media_surface_state_t *ss = (gen9_media_surface_state_t *) &heap->surface[index * sizeof(gen8_surface_state_t)]; + + memset(ss, 0, sizeof(gen8_surface_state_t)); + ss->ss0.rotation = 0; //++ + ss->ss1.uv_offset_v_direction = 0; + ss->ss1.pic_struct = 0; + ss->ss1.width = w - 1; + ss->ss1.height = h - 1; + if (tiling == GPGPU_NO_TILE) { + ss->ss2.tile_mode = 0; + } + else if (tiling == GPGPU_TILE_X){ + ss->ss2.tile_mode = 2; + } + else if (tiling == GPGPU_TILE_Y){ + ss->ss2.tile_mode = 3; + } + ss->ss2.half_pitch_for_chroma = 0; + ss->ss2.surface_pitch = pitch - 1; + ss->ss2.address_control = 1; //++ CLAMP: 0; MIRROR:1; + ss->ss2.mem_compress_enable = 0; //++ + ss->ss2.mem_compress_mode = 0; //++ + ss->ss2.uv_offset_v_direction_msb = 0; //++ + ss->ss2.uv_offset_u_direction = 0; //++ + ss->ss2.interleave_chroma = 0; + ss->ss2.surface_format = 12; //Y8_UNORM + //ss->ss2.surface_format = 4; //PLANAR_420_8 + ss->ss3.y_offset_for_u = 0; + ss->ss3.x_offset_for_u = 0; + ss->ss4.y_offset_for_v = 0; + ss->ss4.x_offset_for_v = 0; + ss->ss5.surface_object_control_state = cl_gpgpu_get_cache_ctrl(); + ss->ss5.tiled_res_mode = 0; //++ TRMODE_NONE: 0; TRMODE_TILEYF: 1; TRMODE_TILEYS:2 + ss->ss5.vert_line_stride_offset = 0; //++ + ss->ss5.vert_line_stride = 0; //++ + ss->ss6.base_addr = (obj_bo->offset64 + obj_bo_offset) & 0xffffffff; // + ss->ss7.base_addr_high = ((obj_bo->offset64 + obj_bo_offset) >> 32) & 0xffffffff; // + + + heap->binding_table[index] = offsetof(surface_heap_t, surface) + + index * surface_state_sz; + dri_bo_emit_reloc(gpgpu->aux_buf.bo, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, + obj_bo_offset, + gpgpu->aux_offset.surface_heap_offset + + heap->binding_table[index] + + offsetof(gen9_media_surface_state_t, ss6), + obj_bo); + + assert(index < GEN_MAX_SURFACES); +} + static void intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu, @@ -2562,6 +2631,7 @@ intel_set_gpgpu_callbacks(int device_id) } if (IS_GEN9(device_id)) { cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen9; + cl_gpgpu_bind_image_for_vme = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_for_vme_gen9; intel_gpgpu_set_L3 = intel_gpgpu_set_L3_gen8; cl_gpgpu_get_cache_ctrl = (cl_gpgpu_get_cache_ctrl_cb *)intel_gpgpu_get_cache_ctrl_gen9; intel_gpgpu_get_scratch_index = intel_gpgpu_get_scratch_index_gen8; diff --git a/src/intel/intel_structs.h b/src/intel/intel_structs.h index b38cc423..282929d7 100644 --- a/src/intel/intel_structs.h +++ b/src/intel/intel_structs.h @@ -425,6 +425,69 @@ typedef struct gen7_media_surface_state } ss7; } gen7_media_surface_state_t; +typedef struct gen9_media_surface_state +{ + struct { + uint32_t pad3:12; + uint32_t pad2:4; + uint32_t pad1:11; //ExistsIf [Surface Format] is not one of Planar Formats + uint32_t rotation:2; + } ss0; + + struct { + uint32_t uv_offset_v_direction:2; + uint32_t pic_struct:2; + uint32_t width:14; + uint32_t height:14; + } ss1; + + struct { + uint32_t tile_mode:2; + uint32_t half_pitch_for_chroma:1; + uint32_t surface_pitch:18; + uint32_t address_control:1; + uint32_t mem_compress_enable:1; + uint32_t mem_compress_mode:1; + uint32_t uv_offset_v_direction_msb:1; + uint32_t uv_offset_u_direction:1; + uint32_t interleave_chroma:1; + uint32_t surface_format:5; + } ss2; + + struct { + uint32_t y_offset_for_u:14; + uint32_t pad1:2; + uint32_t x_offset_for_u:14; + uint32_t pad0:2; + } ss3; + + struct { + uint32_t y_offset_for_v:15; + uint32_t pad1:1; + uint32_t x_offset_for_v:14; + uint32_t pad0:2; + } ss4; + + struct { + uint32_t surface_object_control_state:7; + uint32_t pad2:11; + uint32_t tiled_res_mode:2; + uint32_t pad1:4; + uint32_t pad0:6; + uint32_t vert_line_stride_offset:1; + uint32_t vert_line_stride:1; + } ss5; + + struct { + uint32_t base_addr; + } ss6; + + struct { + uint32_t base_addr_high:16; + uint32_t pad0:16; + } ss7; +} gen9_media_surface_state_t; + typedef union gen_surface_state { gen7_surface_state_t gen7_surface_state; |