#include "utest_helper.hpp" #include #include #include #define GROUP_NUM 16 #define LOCAL_SIZE 256 static void cpu_compiler_atomic(int *dst, int *src) { dst[4] = 0xffffffff; int tmp[16] = { 0 }; tmp[4] = -1; for(int j=0; j>4)); break; case 5: tmp[i] |= src[j]<<(j>>4); break; case 6: tmp[i] ^= src[j]; break; case 7: tmp[i] = tmp[i] < -src[j] ? tmp[i] : -src[j]; break; case 8: tmp[i] = tmp[i] > src[j] ? tmp[i] : src[j]; break; case 9: tmp[i] = (unsigned int)tmp[i] < (unsigned int)(-src[j]) ? tmp[i] : -src[j]; break; case 10: tmp[i] = (unsigned int)tmp[i] > (unsigned int)(src[j]) ? tmp[i] : src[j]; break; case 11: tmp[i] = src[10]; break; default: break; } } for(int k=0; k>4)); break; case 5: dst[i] |= src[j]<<(j>>4); break; case 6: dst[i] ^= src[j]; break; case 7: dst[i] = dst[i] < -src[j] ? dst[i] : -src[j]; break; case 8: dst[i] = dst[i] > src[j] ? dst[i] : src[j]; break; case 9: dst[i] = (unsigned int)dst[i] < (unsigned int)(-src[j]) ? dst[i] : -src[j]; break; case 10: dst[i] = (unsigned int)dst[i] > (unsigned int)(src[j]) ? dst[i] : src[j]; break; case 11: dst[i] = src[10]; break; default: break; } } } for(int i=0; i<12; i++) dst[i+12] = tmp[i]; } static void compiler_atomic_functions(const char* kernel_name) { if(!cl_check_ocl20(false)) return; const size_t n = GROUP_NUM * LOCAL_SIZE; int cpu_dst[24] = {0}, cpu_src[256]; globals[0] = n; locals[0] = LOCAL_SIZE; // Setup kernel and buffers OCL_CALL(cl_kernel_init, "compiler_atomic_functions_20.cl", kernel_name, SOURCE, "-cl-std=CL2.0"); OCL_CREATE_BUFFER(buf[0], 0, 24 * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[1], 0, locals[0] * sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, 16 * sizeof(int), NULL); OCL_SET_ARG(2, sizeof(cl_mem), &buf[1]); OCL_MAP_BUFFER(0); memset(buf_data[0], 0, 24 * sizeof(int)); ((int *)buf_data[0])[4] = -1; OCL_UNMAP_BUFFER(0); OCL_MAP_BUFFER(1); for (uint32_t i = 0; i < locals[0]; ++i) cpu_src[i] = ((int*)buf_data[1])[i] = rand() & 0xff; cpu_compiler_atomic(cpu_dst, cpu_src); OCL_UNMAP_BUFFER(1); OCL_NDRANGE(1); OCL_MAP_BUFFER(0); // Check results for(int i=0; i<24; i++) { //printf("The dst(%d) gpu(0x%x) cpu(0x%x)\n", i, ((uint32_t *)buf_data[0])[i], cpu_dst[i]); OCL_ASSERT(((int *)buf_data[0])[i] == cpu_dst[i]); } OCL_UNMAP_BUFFER(0); } #define compiler_atomic(kernel, version) \ static void compiler_atomic_functions_##version()\ {\ compiler_atomic_functions(kernel); \ } \ MAKE_UTEST_FROM_FUNCTION(compiler_atomic_functions_##version) compiler_atomic("compiler_atomic_functions_20", 20)