diff options
-rw-r--r-- | kernels/compiler_local_memory_barrier_2.cl | 7 | ||||
-rw-r--r-- | utests/CMakeLists.txt | 1 | ||||
-rw-r--r-- | utests/compiler_local_memory_barrier_2.cpp | 29 |
3 files changed, 37 insertions, 0 deletions
diff --git a/kernels/compiler_local_memory_barrier_2.cl b/kernels/compiler_local_memory_barrier_2.cl new file mode 100644 index 00000000..f6dd59d2 --- /dev/null +++ b/kernels/compiler_local_memory_barrier_2.cl @@ -0,0 +1,7 @@ +__kernel void compiler_global_memory_barrier_2(__global int *dst, __local int *src) { + src[get_local_id(0)] = get_local_id(0); + src[get_local_size(0) + get_local_id(0)] = get_local_id(0); + barrier(CLK_LOCAL_MEM_FENCE); + dst[get_local_size(0) * (2 * get_group_id(0)) + get_local_id(0)] = src[get_local_size(0) - (get_local_id(0) + 1)]; + dst[get_local_size(0) * (2 * get_group_id(0) + 1) + get_local_id(0)] = src[get_local_size(0) + get_local_size(0) - (get_local_id(0) + 1)]; +} diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index 93778ed5..108fa068 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -75,6 +75,7 @@ set (utests_sources compiler_local_memory_two_ptr.cpp compiler_local_memory_barrier.cpp compiler_local_memory_barrier_wg64.cpp +# compiler_local_memory_barrier_2.cpp compiler_movforphi_undef.cpp compiler_volatile.cpp compiler_copy_image1.cpp diff --git a/utests/compiler_local_memory_barrier_2.cpp b/utests/compiler_local_memory_barrier_2.cpp new file mode 100644 index 00000000..d670654e --- /dev/null +++ b/utests/compiler_local_memory_barrier_2.cpp @@ -0,0 +1,29 @@ +#include "utest_helper.hpp" + +static void compiler_global_memory_barrier(void) +{ + const size_t n = 16*1024; + + globals[0] = n/2; + locals[0] = 32; + + // Setup kernel and buffers + OCL_CREATE_KERNEL("compiler_local_memory_barrier_2"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint32_t), NULL); + //OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, locals[0] * 2 * sizeof(uint32_t), NULL); + + // Run the kernel + OCL_NDRANGE(1); + OCL_MAP_BUFFER(0); + + // Check results + uint32_t *dst = (uint32_t*)buf_data[0]; + for (uint32_t i = 0; i < n; i+=locals[0]) + for (uint32_t j = 0; j < locals[0]; ++j) + OCL_ASSERT(dst[i+j] == locals[0] - 1 -j); + OCL_UNMAP_BUFFER(0); +} + +MAKE_UTEST_FROM_FUNCTION(compiler_global_memory_barrier); |