diff options
-rw-r--r-- | kernels/compiler_atomic_functions_20.cl | 53 | ||||
-rw-r--r-- | utests/CMakeLists.txt | 1 | ||||
-rw-r--r-- | utests/compiler_atomic_functions_20.cpp | 106 |
3 files changed, 160 insertions, 0 deletions
diff --git a/kernels/compiler_atomic_functions_20.cl b/kernels/compiler_atomic_functions_20.cl new file mode 100644 index 00000000..cbca52e7 --- /dev/null +++ b/kernels/compiler_atomic_functions_20.cl @@ -0,0 +1,53 @@ +__kernel void compiler_atomic_functions_20(__global int *dst, __local int *tmp, __global int *src) { + int lid = get_local_id(0); + int i = lid % 12; + atomic_int* p = (atomic_int*)tmp; + if(lid == 0) { + for(int j=0; j<12; j=j+1) { + atomic_exchange(&p[j], 0); + } + atomic_exchange(&p[4], -1); + } + barrier(CLK_LOCAL_MEM_FENCE); + int compare = 0; + + switch(i) { + case 0: atomic_inc(&tmp[i]); break; + case 1: atomic_dec(&tmp[i]); break; + case 2: atomic_fetch_add(&p[i], src[lid]); break; + case 3: atomic_fetch_sub(&p[i], src[lid]); break; + case 4: atomic_fetch_and(&p[i], ~(src[lid]<<(lid / 16))); break; + case 5: atomic_fetch_or (&p[i], src[lid]<<(lid / 16)); break; + case 6: atomic_fetch_xor(&p[i], src[lid]); break; + case 7: atomic_fetch_min(&p[i], -src[lid]); break; + case 8: atomic_fetch_max(&p[i], src[lid]); break; + case 9: atomic_fetch_min((atomic_uint*)&p[i], -src[lid]); break; + case 10: atomic_fetch_max((atomic_uint*)&p[i], src[lid]); break; + case 11: atomic_compare_exchange_strong(&p[i], &compare, src[10]); break; + default: break; + } + + atomic_int* d = (atomic_int*)dst; + switch(i) { + case 0: atomic_inc(&dst[i]); break; + case 1: atomic_dec(&dst[i]); break; + case 2: atomic_fetch_add(&d[i], src[lid]); break; + case 3: atomic_fetch_sub(&d[i], src[lid]); break; + case 4: atomic_fetch_and(&d[i], ~(src[lid]<<(lid / 16))); break; + case 5: atomic_fetch_or (&d[i], src[lid]<<(lid / 16)); break; + case 6: atomic_fetch_xor(&d[i], src[lid]); break; + case 7: atomic_fetch_min(&d[i], -src[lid]); break; + case 8: atomic_fetch_max(&d[i], src[lid]); break; + case 9: atomic_fetch_min((atomic_uint*)&d[i], -src[lid]); break; + case 10: atomic_fetch_max((atomic_uint*)&d[i], src[lid]); break; + case 11: atomic_compare_exchange_strong(&d[i], &compare, src[10]); break; + default: break; + } + + barrier(CLK_GLOBAL_MEM_FENCE); + + if(get_global_id(0) == 0) { + for(i=0; i<12; i=i+1) + atomic_xchg(&dst[i+12], tmp[i]); + } +} diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt index b5acc2f6..3f94cd7c 100644 --- a/utests/CMakeLists.txt +++ b/utests/CMakeLists.txt @@ -302,6 +302,7 @@ if (ENABLE_OPENCL_20) ${utests_sources} compiler_program_global.cpp compiler_generic_atomic.cpp + compiler_atomic_functions_20.cpp compiler_generic_pointer.cpp) endif (ENABLE_OPENCL_20) diff --git a/utests/compiler_atomic_functions_20.cpp b/utests/compiler_atomic_functions_20.cpp new file mode 100644 index 00000000..ea1ace51 --- /dev/null +++ b/utests/compiler_atomic_functions_20.cpp @@ -0,0 +1,106 @@ +#include "utest_helper.hpp" +#include <cmath> +#include <algorithm> +#include <string.h> + +#define GROUP_NUM 16 +#define LOCAL_SIZE 256 +static void cpu_compiler_atomic(int *dst, int *src) +{ + dst[4] = 0xffffffff; + int tmp[16] = { 0 }; + tmp[4] = -1; + for(int j=0; j<LOCAL_SIZE; j++) { + int i = j % 12; + + switch(i) { + case 0: tmp[i] += 1; break; + case 1: tmp[i] -= 1; break; + case 2: tmp[i] += src[j]; break; + case 3: tmp[i] -= src[j]; break; + case 4: tmp[i] &= ~(src[j]<<(j>>4)); break; + case 5: tmp[i] |= src[j]<<(j>>4); break; + case 6: tmp[i] ^= src[j]; break; + case 7: tmp[i] = tmp[i] < -src[j] ? tmp[i] : -src[j]; break; + case 8: tmp[i] = tmp[i] > src[j] ? tmp[i] : src[j]; break; + case 9: tmp[i] = (unsigned int)tmp[i] < (unsigned int)(-src[j]) ? tmp[i] : -src[j]; break; + case 10: tmp[i] = (unsigned int)tmp[i] > (unsigned int)(src[j]) ? tmp[i] : src[j]; break; + case 11: tmp[i] = src[10]; break; + default: break; + } + } + + for(int k=0; k<GROUP_NUM; k++) { + for(int j=0; j<LOCAL_SIZE; j++) { + int i = j % 12; + + switch(i) { + case 0: dst[i] += 1; break; + case 1: dst[i] -= 1; break; + case 2: dst[i] += src[j]; break; + case 3: dst[i] -= src[j]; break; + case 4: dst[i] &= ~(src[j]<<(j>>4)); break; + case 5: dst[i] |= src[j]<<(j>>4); break; + case 6: dst[i] ^= src[j]; break; + case 7: dst[i] = dst[i] < -src[j] ? dst[i] : -src[j]; break; + case 8: dst[i] = dst[i] > src[j] ? dst[i] : src[j]; break; + case 9: dst[i] = (unsigned int)dst[i] < (unsigned int)(-src[j]) ? dst[i] : -src[j]; break; + case 10: dst[i] = (unsigned int)dst[i] > (unsigned int)(src[j]) ? dst[i] : src[j]; break; + case 11: dst[i] = src[10]; break; + default: break; + } + } + } + + for(int i=0; i<12; i++) + dst[i+12] = tmp[i]; +} + +static void compiler_atomic_functions(const char* kernel_name) +{ + const size_t n = GROUP_NUM * LOCAL_SIZE; + int cpu_dst[24] = {0}, cpu_src[256]; + + globals[0] = n; + locals[0] = LOCAL_SIZE; + + // Setup kernel and buffers + OCL_CREATE_KERNEL_FROM_FILE("compiler_atomic_functions_20", kernel_name); + OCL_CREATE_BUFFER(buf[0], 0, 24 * sizeof(int), NULL); + OCL_CREATE_BUFFER(buf[1], 0, locals[0] * sizeof(int), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, 16 * sizeof(int), NULL); + OCL_SET_ARG(2, sizeof(cl_mem), &buf[1]); + + OCL_MAP_BUFFER(0); + memset(buf_data[0], 0, 24 * sizeof(int)); + ((int *)buf_data[0])[4] = -1; + OCL_UNMAP_BUFFER(0); + + OCL_MAP_BUFFER(1); + for (uint32_t i = 0; i < locals[0]; ++i) + cpu_src[i] = ((int*)buf_data[1])[i] = rand() & 0xff; + cpu_compiler_atomic(cpu_dst, cpu_src); + OCL_UNMAP_BUFFER(1); + OCL_NDRANGE(1); + + OCL_MAP_BUFFER(0); + + // Check results + for(int i=0; i<24; i++) { + //printf("The dst(%d) gpu(0x%x) cpu(0x%x)\n", i, ((uint32_t *)buf_data[0])[i], cpu_dst[i]); + OCL_ASSERT(((int *)buf_data[0])[i] == cpu_dst[i]); + } + OCL_UNMAP_BUFFER(0); +} + +#define compiler_atomic(kernel, version) \ +static void compiler_atomic_functions_##version()\ +{\ + compiler_atomic_functions(kernel); \ +} \ +MAKE_UTEST_FROM_FUNCTION(compiler_atomic_functions_##version) + +compiler_atomic("compiler_atomic_functions_20", 20) + + |