diff options
-rw-r--r-- | benchmark/CMakeLists.txt | 3 | ||||
-rw-r--r-- | benchmark/benchmark_copy_buffer.cpp | 55 | ||||
-rw-r--r-- | kernels/bench_copy_buffer.cl | 27 |
3 files changed, 84 insertions, 1 deletions
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 3e43a214..03a56f22 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -16,7 +16,8 @@ set (benchmark_sources benchmark_read_buffer.cpp benchmark_read_image.cpp benchmark_copy_buffer_to_image.cpp - benchmark_copy_image_to_buffer.cpp) + benchmark_copy_image_to_buffer.cpp + benchmark_copy_buffer.cpp) SET(CMAKE_CXX_FLAGS "-DBUILD_BENCHMARK ${CMAKE_CXX_FLAGS}") diff --git a/benchmark/benchmark_copy_buffer.cpp b/benchmark/benchmark_copy_buffer.cpp new file mode 100644 index 00000000..88983a79 --- /dev/null +++ b/benchmark/benchmark_copy_buffer.cpp @@ -0,0 +1,55 @@ +#include "utests/utest_helper.hpp" +#include <sys/time.h> + +#define BENCH_COPY_BUFFER(T, K, M) \ +double benchmark_copy_buffer_ ##T(void) \ +{ \ + struct timeval start,stop; \ + \ + const size_t w = 1920; \ + const size_t h = 1080; \ + const size_t sz = 4 * w * h; \ + \ + OCL_CREATE_BUFFER(buf[0], 0, sz * sizeof(M), NULL); \ + OCL_CREATE_BUFFER(buf[1], 0, sz * sizeof(M), NULL); \ + \ + OCL_CREATE_KERNEL_FROM_FILE("bench_copy_buffer",K); \ + \ + OCL_MAP_BUFFER(0); \ + for (size_t i = 0; i < sz; i ++) { \ + ((M *)(buf_data[0]))[i] = rand(); \ + } \ + OCL_UNMAP_BUFFER(0); \ + \ + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); \ + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); \ + \ + globals[0] = w; \ + globals[1] = h; \ + locals[0] = 16; \ + locals[1] = 4; \ + \ + gettimeofday(&start,0); \ + for (size_t i=0; i<100; i++) { \ + OCL_NDRANGE(2); \ + } \ + OCL_FINISH(); \ + \ + OCL_MAP_BUFFER(1); \ + OCL_UNMAP_BUFFER(1); \ + gettimeofday(&stop,0); \ + \ + clReleaseMemObject(buf[0]); \ + free(buf_data[0]); \ + buf_data[0] = NULL; \ + \ + double elapsed = time_subtract(&stop, &start, 0); \ + \ + return BANDWIDTH(sz * sizeof(M) * 2 * 100, elapsed); \ +} \ + \ +MAKE_BENCHMARK_FROM_FUNCTION_KEEP_PROGRAM(benchmark_copy_buffer_ ##T,true); + +BENCH_COPY_BUFFER(uchar,"bench_copy_buffer_uchar",unsigned char) +BENCH_COPY_BUFFER(ushort,"bench_copy_buffer_ushort",unsigned short) +BENCH_COPY_BUFFER(uint,"bench_copy_buffer_uint",unsigned int) diff --git a/kernels/bench_copy_buffer.cl b/kernels/bench_copy_buffer.cl new file mode 100644 index 00000000..ed203528 --- /dev/null +++ b/kernels/bench_copy_buffer.cl @@ -0,0 +1,27 @@ +__kernel void +bench_copy_buffer_uchar(__global uchar4* src, __global uchar4* dst) +{ + int x = (int)get_global_id(0); + int y = (int)get_global_id(1); + int x_sz = (int)get_global_size(0); + dst[y * x_sz + x] = src[y * x_sz + x]; +} + +__kernel void +bench_copy_buffer_ushort(__global ushort4* src, __global ushort4* dst) +{ + int x = (int)get_global_id(0); + int y = (int)get_global_id(1); + int x_sz = (int)get_global_size(0); + dst[y * x_sz + x] = src[y * x_sz + x]; +} + +__kernel void +bench_copy_buffer_uint(__global uint4* src, __global uint4* dst) +{ + int x = (int)get_global_id(0); + int y = (int)get_global_id(1); + int x_sz = (int)get_global_size(0); + dst[y * x_sz + x] = src[y * x_sz + x]; +} + |