summaryrefslogtreecommitdiff
path: root/kernels/vload_bench.cl
blob: c906c7522d1dbbdb048137b3527dcf920e33491d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#define VLOAD_BENCH(T, N, M) \
__kernel void \
vload_bench_##M ##T ##N(__global T* src, __global uint* dst, uint offset) \
{ \
  int id = (int)get_global_id(0); \
  uint ##N srcV = 0; \
  for(int i = 0; i < M; i++) \
  { \
    srcV += convert_uint ##N(vload ##N(id + (i & 0xFFFF), src + offset)); \
  } \
  vstore ##N(srcV, id, dst);\
  /*if (id < 16)*/ \
  /*printf("id %d %d %d\n", id, srcV.s0, srcV.s1);*/ \
}

#define VLOAD_BENCH_ALL_VECTOR(T, N_ITERATIONS) \
               VLOAD_BENCH(T, 2, N_ITERATIONS)  \
               VLOAD_BENCH(T, 3, N_ITERATIONS)  \
               VLOAD_BENCH(T, 4, N_ITERATIONS)  \
               VLOAD_BENCH(T, 8, N_ITERATIONS)  \
               VLOAD_BENCH(T, 16, N_ITERATIONS)

#define VLOAD_BENCH_ALL_TYPES(N_ITERATIONS)     \
   VLOAD_BENCH_ALL_VECTOR(uchar, N_ITERATIONS)  \
   VLOAD_BENCH_ALL_VECTOR(char, N_ITERATIONS)   \
   VLOAD_BENCH_ALL_VECTOR(ushort, N_ITERATIONS) \
   VLOAD_BENCH_ALL_VECTOR(short, N_ITERATIONS)  \
   VLOAD_BENCH_ALL_VECTOR(uint, N_ITERATIONS)   \
   VLOAD_BENCH_ALL_VECTOR(int, N_ITERATIONS)    \
   VLOAD_BENCH_ALL_VECTOR(float, N_ITERATIONS)

VLOAD_BENCH_ALL_TYPES(1)
VLOAD_BENCH_ALL_TYPES(10000)