summaryrefslogtreecommitdiff
path: root/kernels/compiler_intra_prediction.cl
blob: 28e81e521fe65e8d9848f0c35260bc5b67a4176d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91

__kernel __attribute__((intel_reqd_sub_group_size(16)))
void  compiler_intra_prediction(
    __read_only image2d_t   srcImg,
    __global uchar          *luma_mode,
    __global ushort         *luma_distortion,
    __global uchar          *luma_shape,
    __global uint*          dwo_buffer,
    __global uint*          pld_buffer){

  int gr_id0 = get_group_id(0);
  int gr_id1 = get_group_id(1);

  ushort2 src_coord;
  /*src_coord.x = gr_id0  * 16;
  src_coord.y = gr_id1 * 16;*/
  src_coord.x = 2 * 16;
  src_coord.y = 1 * 16;

  intel_sub_group_avc_sic_payload_t payload = intel_sub_group_avc_sic_initialize(src_coord);

  uchar sad_adjustment = CLK_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL;
  uchar intra_partition_mask = CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL;
//XXX: Different from official value?
#undef CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL
#undef CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL
#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x4
#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL  0x8
  uint nb_avail = CLK_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL |
               CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL |
               CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL |
               CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL;

  uint sgl_id = get_sub_group_local_id();
  int2 nb_coord;
  float4 color;

  nb_coord.x = src_coord.x - 1;
  nb_coord.y = src_coord.y + sgl_id;
  color = read_imagef(srcImg, nb_coord);
  uchar left_edge = color.s0 * 255;

  nb_coord.x = src_coord.x - 1;
  nb_coord.y = src_coord.y - 1;
  color = read_imagef(srcImg, nb_coord);
  uchar upper_left_corner = color.s0 * 255;

  nb_coord.x = src_coord.x + sgl_id;
  nb_coord.y = src_coord.y - 1;
  color = read_imagef(srcImg, nb_coord);
  uchar upper_edge = color.s0 * 255;

  uchar upper_right_edge = 0;
  if(sgl_id < 8){
    nb_coord.x = src_coord.x + 16 + sgl_id;
    nb_coord.y = src_coord.y - 1;
    color = read_imagef(srcImg, nb_coord);
    upper_right_edge = color.s0 * 255;
  }
  payload = intel_sub_group_avc_sic_configure_ipe(
      intra_partition_mask, nb_avail, left_edge, upper_left_corner, upper_edge,
      upper_right_edge, sad_adjustment, payload);

  uchar shape_cost_16_16 = (1 << 4) | 5;
  uchar shape_cost_8_8 = (1 << 4) | 4;
  uchar shape_cost_4_4 = (1 << 4) | 3;
  uint intra_shape_cost = (shape_cost_4_4 << 24) | (shape_cost_8_8 << 16) | (shape_cost_16_16 << 8) | (0x0);
  payload = intel_sub_group_avc_sic_set_intra_luma_shape_penalty(intra_shape_cost, payload);

  sampler_t vs = 0;
  intel_sub_group_avc_sic_result_t result =
      intel_sub_group_avc_sic_evaluate_ipe(srcImg, vs, payload);

  uchar shape = intel_sub_group_avc_sic_get_ipe_luma_shape(result);
  ushort dist = intel_sub_group_avc_sic_get_best_ipe_luma_distortion(result);
  ulong modes = intel_sub_group_avc_sic_get_packed_ipe_luma_modes(result);

  int lid_x = get_local_id(0);
  int mb_idx = gr_id0 + gr_id1 * get_num_groups(0);
  if (lid_x == 0) {
    luma_shape[mb_idx] = shape;
    luma_distortion[mb_idx] = dist;
    uchar mode = modes & 0xF;
    luma_mode[mb_idx] = mode;
  }

  dwo_buffer[mb_idx*16*4 + lid_x + 16*0] = result.s0;
  dwo_buffer[mb_idx*16*4 + lid_x + 16*1] = result.s1;
  dwo_buffer[mb_idx*16*4 + lid_x + 16*2] = result.s2;
  dwo_buffer[mb_idx*16*4 + lid_x + 16*3] = result.s3;
}