diff options
author | Luo Xionghu <xionghu.luo@intel.com> | 2017-06-14 00:54:14 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2017-07-12 18:29:40 +0800 |
commit | 5a288032ab2361030e7fa24bcbc3c5e8f5b07d3a (patch) | |
tree | 2ede1a75773a4960e83673e292538c2f5a1a4163 /kernels/compiler_intra_prediction.cl | |
parent | 9cb7ff4c285d892616595e5a43793f4d1408eca4 (diff) | |
download | beignet-5a288032ab2361030e7fa24bcbc3c5e8f5b07d3a.tar.gz |
add utest compiler_intra_prediction for extenstion cl_intel_device_side_avc_motion_estimation.
fix build warnings.
Signed-off-by: Chuanbo Weng <chuanbo.weng@intel.com>
Signed-off-by: Xionghu Luo <xionghu.luo@intel.com>
Reviewed-by: Yang Rong <rong.r.yang@intel.com>
Diffstat (limited to 'kernels/compiler_intra_prediction.cl')
-rw-r--r-- | kernels/compiler_intra_prediction.cl | 91 |
1 files changed, 91 insertions, 0 deletions
diff --git a/kernels/compiler_intra_prediction.cl b/kernels/compiler_intra_prediction.cl new file mode 100644 index 00000000..28e81e52 --- /dev/null +++ b/kernels/compiler_intra_prediction.cl @@ -0,0 +1,91 @@ + +__kernel __attribute__((intel_reqd_sub_group_size(16))) +void compiler_intra_prediction( + __read_only image2d_t srcImg, + __global uchar *luma_mode, + __global ushort *luma_distortion, + __global uchar *luma_shape, + __global uint* dwo_buffer, + __global uint* pld_buffer){ + + int gr_id0 = get_group_id(0); + int gr_id1 = get_group_id(1); + + ushort2 src_coord; + /*src_coord.x = gr_id0 * 16; + src_coord.y = gr_id1 * 16;*/ + src_coord.x = 2 * 16; + src_coord.y = 1 * 16; + + intel_sub_group_avc_sic_payload_t payload = intel_sub_group_avc_sic_initialize(src_coord); + + uchar sad_adjustment = CLK_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL; + uchar intra_partition_mask = CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL; +//XXX: Different from official value? +#undef CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL +#undef CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL +#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x4 +#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x8 + uint nb_avail = CLK_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL | + CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL | + CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL | + CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL; + + uint sgl_id = get_sub_group_local_id(); + int2 nb_coord; + float4 color; + + nb_coord.x = src_coord.x - 1; + nb_coord.y = src_coord.y + sgl_id; + color = read_imagef(srcImg, nb_coord); + uchar left_edge = color.s0 * 255; + + nb_coord.x = src_coord.x - 1; + nb_coord.y = src_coord.y - 1; + color = read_imagef(srcImg, nb_coord); + uchar upper_left_corner = color.s0 * 255; + + nb_coord.x = src_coord.x + sgl_id; + nb_coord.y = src_coord.y - 1; + color = read_imagef(srcImg, nb_coord); + uchar upper_edge = color.s0 * 255; + + uchar upper_right_edge = 0; + if(sgl_id < 8){ + nb_coord.x = src_coord.x + 16 + sgl_id; + nb_coord.y = src_coord.y - 1; + color = read_imagef(srcImg, nb_coord); + upper_right_edge = color.s0 * 255; + } + payload = intel_sub_group_avc_sic_configure_ipe( + intra_partition_mask, nb_avail, left_edge, upper_left_corner, upper_edge, + upper_right_edge, sad_adjustment, payload); + + uchar shape_cost_16_16 = (1 << 4) | 5; + uchar shape_cost_8_8 = (1 << 4) | 4; + uchar shape_cost_4_4 = (1 << 4) | 3; + uint intra_shape_cost = (shape_cost_4_4 << 24) | (shape_cost_8_8 << 16) | (shape_cost_16_16 << 8) | (0x0); + payload = intel_sub_group_avc_sic_set_intra_luma_shape_penalty(intra_shape_cost, payload); + + sampler_t vs = 0; + intel_sub_group_avc_sic_result_t result = + intel_sub_group_avc_sic_evaluate_ipe(srcImg, vs, payload); + + uchar shape = intel_sub_group_avc_sic_get_ipe_luma_shape(result); + ushort dist = intel_sub_group_avc_sic_get_best_ipe_luma_distortion(result); + ulong modes = intel_sub_group_avc_sic_get_packed_ipe_luma_modes(result); + + int lid_x = get_local_id(0); + int mb_idx = gr_id0 + gr_id1 * get_num_groups(0); + if (lid_x == 0) { + luma_shape[mb_idx] = shape; + luma_distortion[mb_idx] = dist; + uchar mode = modes & 0xF; + luma_mode[mb_idx] = mode; + } + + dwo_buffer[mb_idx*16*4 + lid_x + 16*0] = result.s0; + dwo_buffer[mb_idx*16*4 + lid_x + 16*1] = result.s1; + dwo_buffer[mb_idx*16*4 + lid_x + 16*2] = result.s2; + dwo_buffer[mb_idx*16*4 + lid_x + 16*3] = result.s3; +} |