diff options
-rw-r--r-- | kernels/compiler_overflow.cl | 55 | ||||
-rw-r--r-- | utests/compiler_overflow.cpp | 97 |
2 files changed, 117 insertions, 35 deletions
diff --git a/kernels/compiler_overflow.cl b/kernels/compiler_overflow.cl index 75ed5ce2..af751b77 100644 --- a/kernels/compiler_overflow.cl +++ b/kernels/compiler_overflow.cl @@ -1,20 +1,45 @@ -#define COMPILER_OVERFLOW(TYPE) \ - kernel void compiler_overflow_##TYPE (global TYPE* src, global TYPE* dst) \ +#define COMPILER_OVERFLOW_ADD(TYPE, FUNC) \ + kernel void compiler_overflow_##TYPE##_##FUNC (global TYPE* src0, global TYPE* src1, global TYPE* dst) \ { \ - __global TYPE* A = &src[get_global_id(0)]; \ - TYPE B = 1; \ - *A += B; \ - TYPE carry = -convert_##TYPE((*A) < B); \ + __global TYPE* A = &src0[get_global_id(0)]; \ + __global TYPE* B = &src1[get_global_id(0)]; \ + __global TYPE* C = &dst[get_global_id(0)]; \ + *C = *A + *B; \ + TYPE carry = -convert_##TYPE(*C < *B); \ \ - (*A).y += carry.x; \ - carry.y += ((*A).y < carry.x); \ - (*A).z += carry.y; \ + (*C).y += carry.x; \ + carry.y += ((*C).y < carry.x); \ + (*C).z += carry.y; \ \ - carry.z += ((*A).z < carry.y); \ - (*A).w += carry.z; \ - dst[get_global_id(0)] = src[get_global_id(0)]; \ + carry.z += ((*C).z < carry.y); \ + (*C).w += carry.z; \ + carry.w += ((*C).w < carry.z); \ } -COMPILER_OVERFLOW(uint4) -COMPILER_OVERFLOW(ushort4) -COMPILER_OVERFLOW(uchar4) + +COMPILER_OVERFLOW_ADD(ulong4, add) +COMPILER_OVERFLOW_ADD(uint4, add) +COMPILER_OVERFLOW_ADD(ushort4, add) +COMPILER_OVERFLOW_ADD(uchar4, add) + +#define COMPILER_OVERFLOW_SUB(TYPE, FUNC) \ + kernel void compiler_overflow_##TYPE##_##FUNC (global TYPE* src0, global TYPE* src1, global TYPE* dst) \ +{ \ + __global TYPE* A = &src0[get_global_id(0)]; \ + __global TYPE* B = &src1[get_global_id(0)]; \ + __global TYPE* C = &dst[get_global_id(0)]; \ + TYPE borrow; \ + unsigned result; \ + size_t num = sizeof(*A)/sizeof((*A)[0]); \ + for (uint i = 0; i < num; i++ ) {\ + borrow[i] = __builtin_usub_overflow((*A)[i], (*B)[i], &result); \ + (*C)[i] = result; \ + }\ +\ + for (uint i = 0; i < num-1; i++ ) {\ + borrow[i+1] += (*C)[i+1] < borrow[i];(*C)[i+1] -= borrow[i]; \ + }\ +\ +} + +COMPILER_OVERFLOW_SUB(uint4, sub) diff --git a/utests/compiler_overflow.cpp b/utests/compiler_overflow.cpp index 1d3f53d1..1404cfea 100644 --- a/utests/compiler_overflow.cpp +++ b/utests/compiler_overflow.cpp @@ -3,6 +3,13 @@ namespace { typedef struct { + unsigned long x; + unsigned long y; + unsigned long z; + unsigned long w; +}ulong4; + +typedef struct { uint32_t x; uint32_t y; uint32_t z; @@ -23,8 +30,18 @@ typedef struct { uint8_t w; } uchar4; -template<typename T> -void test(const char *kernel_name, int s_type) +template <typename U> +U get_max() +{ + int shift_bit = sizeof(U)*8; + U u_max = 0; + for (int i = 0; i < shift_bit; i++) + u_max |= 1<<(shift_bit-i-1); + return u_max; +} + +template<typename T, typename U> +void test(const char *kernel_name, int func_type) { const size_t n = 16; @@ -32,41 +49,81 @@ void test(const char *kernel_name, int s_type) OCL_CREATE_KERNEL_FROM_FILE("compiler_overflow", kernel_name); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(T), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(T), NULL); + OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(T), NULL); OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); + + U max = get_max<U>(); OCL_MAP_BUFFER(0); for (uint32_t i = 0; i < n; ++i) { - ((T*)buf_data[0])[i].x = s_type?CL_INT_MAX:CL_UINT_MAX; - ((T*)buf_data[0])[i].y = s_type?CL_INT_MAX:CL_UINT_MAX; - ((T*)buf_data[0])[i].z = s_type?CL_INT_MAX:CL_UINT_MAX; - ((T*)buf_data[0])[i].w = i; + if(func_type == 0) { + ((T*)buf_data[0])[i].x = max; + ((T*)buf_data[0])[i].y = max; + ((T*)buf_data[0])[i].z = max; + ((T*)buf_data[0])[i].w = i; + }else if(func_type == 1) { + ((T*)buf_data[0])[i].x = 0; + ((T*)buf_data[0])[i].y = 0; + ((T*)buf_data[0])[i].z = 0; + ((T*)buf_data[0])[i].w = n+2-i; + }else + OCL_ASSERT(0); } OCL_UNMAP_BUFFER(0); + OCL_MAP_BUFFER(1); + for (uint32_t i = 0; i < n; ++i) { + ((T*)buf_data[1])[i].x = 1; + ((T*)buf_data[1])[i].y = 1; + ((T*)buf_data[1])[i].z = 1; + ((T*)buf_data[1])[i].w = 1; + } + OCL_UNMAP_BUFFER(1); globals[0] = n; locals[0] = 16; OCL_NDRANGE(1); - - OCL_MAP_BUFFER(1); + OCL_MAP_BUFFER(2); for (uint32_t i = 0; i < 16; ++i) { - OCL_ASSERT(((T*)buf_data[1])[i].x == 0); - OCL_ASSERT(((T*)buf_data[1])[i].y == 1); - OCL_ASSERT(((T*)buf_data[1])[i].z == 1); - OCL_ASSERT(((T*)buf_data[1])[i].w == i+2); + // printf("%u,%u,%u,%u\n", ((T*)buf_data[2])[i].x,((T*)buf_data[2])[i].y, ((T*)buf_data[2])[i].z, ((T*)buf_data[2])[i].w ); + if(func_type == 0) { + OCL_ASSERT(((T*)buf_data[2])[i].x == 0); + OCL_ASSERT(((T*)buf_data[2])[i].y == 1); + OCL_ASSERT(((T*)buf_data[2])[i].z == 1); + OCL_ASSERT(((T*)buf_data[2])[i].w == i+2); + }else if(func_type == 1) { + OCL_ASSERT(((T*)buf_data[2])[i].x == max); + OCL_ASSERT(((T*)buf_data[2])[i].y == max-1); + OCL_ASSERT(((T*)buf_data[2])[i].z == max-1); + OCL_ASSERT(((T*)buf_data[2])[i].w == n-i); + }else + OCL_ASSERT(0); } - OCL_UNMAP_BUFFER(1); + OCL_UNMAP_BUFFER(2); } } -#define compiler_overflow(type, kernel, s_type) \ -static void compiler_overflow_ ##type(void)\ +#define compiler_overflow_add(type, subtype, kernel, func_type) \ +static void compiler_overflow_add_ ##type(void)\ {\ - test<type>(# kernel, s_type);\ + test<type, subtype>(# kernel, func_type);\ }\ -MAKE_UTEST_FROM_FUNCTION(compiler_overflow_ ## type); +MAKE_UTEST_FROM_FUNCTION(compiler_overflow_add_ ## type); + +#define compiler_overflow_sub(type, subtype, kernel, func_type) \ +static void compiler_overflow_sub_ ##type(void)\ +{\ + test<type, subtype>(# kernel, func_type);\ +}\ +MAKE_UTEST_FROM_FUNCTION(compiler_overflow_sub_ ## type); + +compiler_overflow_add(ulong4, unsigned long, compiler_overflow_ulong4_add, 0) +compiler_overflow_add(uint4, uint32_t, compiler_overflow_uint4_add, 0) +compiler_overflow_add(ushort4, uint16_t, compiler_overflow_ushort4_add, 0) +compiler_overflow_add(uchar4, uint8_t, compiler_overflow_uchar4_add, 0) -compiler_overflow(uint4, compiler_overflow_uint4, 0) -compiler_overflow(ushort4, compiler_overflow_ushort4, 0) -compiler_overflow(uchar4, compiler_overflow_uchar4, 0) +// as llvm intrincs function doesn't support byte/short overflow, +// we just test uint overflow here. +compiler_overflow_sub(uint4, uint32_t, compiler_overflow_uint4_sub, 1) |