summaryrefslogtreecommitdiff
path: root/kernels/bench_copy_buffer.cl
blob: 8d8afd8b2c927ee8e35bd83a817b49362fa10fac (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
const constant float filter_flag = 0.111111f;
__kernel void
bench_copy_buffer_uchar(__global uchar4* src, __global uchar4* dst)
{
  int x = (int)get_global_id(0);
  int y = (int)get_global_id(1);
  int x_sz = (int)get_global_size(0);
  dst[y * x_sz + x] = src[y * x_sz + x];
}

__kernel void
bench_copy_buffer_ushort(__global ushort4* src, __global ushort4* dst)
{
  int x = (int)get_global_id(0);
  int y = (int)get_global_id(1);
  int x_sz = (int)get_global_size(0);
  dst[y * x_sz + x] = src[y * x_sz + x];
}

__kernel void
bench_copy_buffer_uint(__global uint4* src, __global uint4* dst)
{
  int x = (int)get_global_id(0);
  int y = (int)get_global_id(1);
  int x_sz = (int)get_global_size(0);
  dst[y * x_sz + x] = src[y * x_sz + x];
}

__kernel void
bench_filter_buffer_uchar(__global uchar4* src, __global uchar4* dst)
{
  float4 result;
  int x = (int)get_global_id(0);
  int y = (int)get_global_id(1);
  int x_sz = (int)get_global_size(0);
  int y_sz = (int)get_global_size(1);

  int x0 = x - 1; int x1 = x + 1;
  int y0 = y - 1; int y1 = y + 1 ;
  int x_left = (x0 > 0)?x0:x; int x_right = (x1 > x_sz - 1)?x:x1;
  int y_top = (y0 > 0)?y0:y; int y_bottom = (y1 > y_sz - 1)?y:y1;

  result = convert_float4(src[y_top * x_sz + x_left]) + convert_float4(src[y_top * x_sz + x]) + convert_float4(src[y_top * x_sz + x_right])
         + convert_float4(src[y * x_sz + x_left]) + convert_float4(src[y * x_sz + x]) + convert_float4(src[y * x_sz + x_right])
         + convert_float4(src[y_bottom * x_sz + x_left]) + convert_float4(src[y_bottom * x_sz + x]) + convert_float4(src[y_bottom * x_sz +x_right]);

  dst[y * x_sz + x] = convert_uchar4(result * filter_flag);
}

__kernel void
bench_filter_buffer_ushort(__global ushort4* src, __global ushort4* dst)
{
  float4 result;
  int x = (int)get_global_id(0);
  int y = (int)get_global_id(1);
  int x_sz = (int)get_global_size(0);
  int y_sz = (int)get_global_size(1);

  int x0 = x - 1; int x1 = x + 1;
  int y0 = y - 1; int y1 = y + 1 ;
  int x_left = (x0 > 0)?x0:x; int x_right = (x1 > x_sz - 1)?x:x1;
  int y_top = (y0 > 0)?y0:y; int y_bottom = (y1 > y_sz - 1)?y:y1;

  result = convert_float4(src[y_top * x_sz + x_left]) + convert_float4(src[y_top * x_sz + x]) + convert_float4(src[y_top * x_sz + x_right])
         + convert_float4(src[y * x_sz + x_left]) + convert_float4(src[y * x_sz + x]) + convert_float4(src[y * x_sz + x_right])
         + convert_float4(src[y_bottom * x_sz + x_left]) + convert_float4(src[y_bottom * x_sz + x]) + convert_float4(src[y_bottom * x_sz +x_right]);

  dst[y * x_sz + x] = convert_ushort4(result * filter_flag);
}

__kernel void
bench_filter_buffer_uint(__global uint4* src, __global uint4* dst)
{
  float4 result;
  int x = (int)get_global_id(0);
  int y = (int)get_global_id(1);
  int x_sz = (int)get_global_size(0);
  int y_sz = (int)get_global_size(1);

  int x0 = x - 1; int x1 = x + 1;
  int y0 = y - 1; int y1 = y + 1 ;
  int x_left = (x0 > 0)?x0:x; int x_right = (x1 > x_sz - 1)?x:x1;
  int y_top = (y0 > 0)?y0:y; int y_bottom = (y1 > y_sz - 1)?y:y1;

  result = convert_float4(src[y_top * x_sz + x_left]) + convert_float4(src[y_top * x_sz + x]) + convert_float4(src[y_top * x_sz + x_right])
         + convert_float4(src[y * x_sz + x_left]) + convert_float4(src[y * x_sz + x]) + convert_float4(src[y * x_sz + x_right])
         + convert_float4(src[y_bottom * x_sz + x_left]) + convert_float4(src[y_bottom * x_sz + x]) + convert_float4(src[y_bottom * x_sz +x_right]);

  dst[y * x_sz + x] = convert_uint4(result * filter_flag);
}