diff options
author | Yang Rong <rong.r.yang@intel.com> | 2013-07-02 15:22:24 +0800 |
---|---|---|
committer | Zhigang Gong <zhigang.gong@linux.intel.com> | 2013-07-03 18:19:42 +0800 |
commit | 046834c47eb579eb4b66df366ebb405c2bf97a1d (patch) | |
tree | e2c72fd2009b8f4f3a26b59674b1bec37ad0c73c /utests/compiler_atomic_functions.cpp | |
parent | 942f9f9277b5c22723734883989eb676b4ab3d0b (diff) | |
download | beignet-046834c47eb579eb4b66df366ebb405c2bf97a1d.tar.gz |
Fix atomic test failed in GT1.
Barrier only ensure one work group finish, can't guarantee all work item's atomic ops
have finished before the last atomic_add.
So use atomic_xchg to update first work group's local buffer to other global buffer position.
Signed-off-by: Yang Rong <rong.r.yang@intel.com>
Reviewed-by: Xing, Homer <homer.xing@intel.com>
Diffstat (limited to 'utests/compiler_atomic_functions.cpp')
-rw-r--r-- | utests/compiler_atomic_functions.cpp | 23 |
1 files changed, 12 insertions, 11 deletions
diff --git a/utests/compiler_atomic_functions.cpp b/utests/compiler_atomic_functions.cpp index 571e0c67..65f1c5a7 100644 --- a/utests/compiler_atomic_functions.cpp +++ b/utests/compiler_atomic_functions.cpp @@ -4,12 +4,12 @@ #include <string.h> #define GROUP_NUM 16 -#define LOCAL_SIZE 64 +#define LOCAL_SIZE 256 static void cpu_compiler_atomic(int *dst, int *src) { dst[4] = 0xffffffff; int tmp[16] = { 0 }; - + tmp[4] = -1; for(int j=0; j<LOCAL_SIZE; j++) { int i = j % 12; @@ -18,8 +18,8 @@ static void cpu_compiler_atomic(int *dst, int *src) case 1: tmp[i] -= 1; break; case 2: tmp[i] += src[j]; break; case 3: tmp[i] -= src[j]; break; - case 4: tmp[i] &= ~(src[j]<<(j>>2)); break; - case 5: tmp[i] |= src[j]<<(j>>2); break; + case 4: tmp[i] &= ~(src[j]<<(j>>4)); break; + case 5: tmp[i] |= src[j]<<(j>>4); break; case 6: tmp[i] ^= src[j]; break; case 7: tmp[i] = tmp[i] < -src[j] ? tmp[i] : -src[j]; break; case 8: tmp[i] = tmp[i] > src[j] ? tmp[i] : src[j]; break; @@ -39,8 +39,8 @@ static void cpu_compiler_atomic(int *dst, int *src) case 1: dst[i] -= 1; break; case 2: dst[i] += src[j]; break; case 3: dst[i] -= src[j]; break; - case 4: dst[i] &= ~(src[j]<<(j>>2)); break; - case 5: dst[i] |= src[j]<<(j>>2); break; + case 4: dst[i] &= ~(src[j]<<(j>>4)); break; + case 5: dst[i] |= src[j]<<(j>>4); break; case 6: dst[i] ^= src[j]; break; case 7: dst[i] = dst[i] < -src[j] ? dst[i] : -src[j]; break; case 8: dst[i] = dst[i] > src[j] ? dst[i] : src[j]; break; @@ -53,27 +53,28 @@ static void cpu_compiler_atomic(int *dst, int *src) } for(int i=0; i<12; i++) - dst[i] += tmp[i]; + dst[i+12] = tmp[i]; } static void compiler_atomic_functions(void) { const size_t n = GROUP_NUM * LOCAL_SIZE; - int cpu_dst[16] = {0}, cpu_src[256]; + int cpu_dst[24] = {0}, cpu_src[256]; globals[0] = n; locals[0] = LOCAL_SIZE; // Setup kernel and buffers OCL_CREATE_KERNEL("compiler_atomic_functions"); - OCL_CREATE_BUFFER(buf[0], 0, 16 * sizeof(int), NULL); + OCL_CREATE_BUFFER(buf[0], 0, 24 * sizeof(int), NULL); OCL_CREATE_BUFFER(buf[1], 0, locals[0] * sizeof(int), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, 16 * sizeof(int), NULL); OCL_SET_ARG(2, sizeof(cl_mem), &buf[1]); OCL_MAP_BUFFER(0); - memset(buf_data[0], 0, 16 * sizeof(int)); + memset(buf_data[0], 0, 24 * sizeof(int)); + ((int *)buf_data[0])[4] = -1; OCL_UNMAP_BUFFER(0); OCL_MAP_BUFFER(1); @@ -86,7 +87,7 @@ static void compiler_atomic_functions(void) OCL_MAP_BUFFER(0); // Check results - for(int i=0; i<12; i++) { + for(int i=0; i<24; i++) { //printf("The dst(%d) gpu(0x%x) cpu(0x%x)\n", i, ((uint32_t *)buf_data[0])[i], cpu_dst[i]); OCL_ASSERT(((int *)buf_data[0])[i] == cpu_dst[i]); } |