summaryrefslogtreecommitdiff
path: root/kernels/compiler_sub_group_shuffle_up.cl
blob: fd287d52484543486b88de9b494265bc9584173a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
__kernel void compiler_sub_group_shuffle_up_int(global int *dst, int c)
{
  int i = get_global_id(0);
  if (i == 0)
    dst[0] = get_max_sub_group_size();
  dst++;

  int from = i;
  int j = get_sub_group_local_id() + 1;
  int k = get_max_sub_group_size() - get_sub_group_local_id() - 1;
  int o0 = intel_sub_group_shuffle_up(123, 456, c);
  int o1 = intel_sub_group_shuffle_up(123, from, c);
  int o2 = intel_sub_group_shuffle_up(from, -from, k);
  int o3 = intel_sub_group_shuffle_up(from, 321, j);
  dst[i*4] = o0;
  dst[i*4+1] = o1;
  dst[i*4+2] = o2;
  dst[i*4+3] = o3;
}
#ifdef SHORT
__kernel void compiler_sub_group_shuffle_up_short(global short *dst, int c)
{
  short i = get_global_id(0);
  if (i == 0)
    dst[0] = get_max_sub_group_size();
  dst++;

  short from = i;
  int j = get_sub_group_local_id() + 1;
  int k = get_max_sub_group_size() - get_sub_group_local_id() - 1;
  short o0 = intel_sub_group_shuffle_up((short)123, (short)456, c);
  short o1 = intel_sub_group_shuffle_up((short)123, from, c);
  short o2 = intel_sub_group_shuffle_up(from, (short)-from, k);
  short o3 = intel_sub_group_shuffle_up(from, (short)321, j);
  dst[i*4] = o0;
  dst[i*4+1] = o1;
  dst[i*4+2] = o2;
  dst[i*4+3] = o3;
}
#endif