diff options
author | Pan Xiuli <xiuli.pan@intel.com> | 2016-08-08 11:31:27 +0800 |
---|---|---|
committer | Yang Rong <rong.r.yang@intel.com> | 2016-08-12 18:16:56 +0800 |
commit | 4889cdf76678eeda085e2bfc49c7a57e2ee0a6f9 (patch) | |
tree | 2c7b0bb802e0d8f5fcae2cca1acf26bd4fbb6e86 | |
parent | b673343987f751b8c006cc4920111545073827cb (diff) | |
download | beignet-4889cdf76678eeda085e2bfc49c7a57e2ee0a6f9.tar.gz |
Utest: Add half type mad test case
Mad now can support half type, add a test.
Signed-off-by: Pan Xiuli <xiuli.pan@intel.com>
Reviewed-by: Yang Rong <rong.r.yang@intel.com>
-rw-r--r-- | kernels/compiler_math_3op.cl | 17 | ||||
-rw-r--r-- | utests/compiler_math_3op.cpp | 58 |
2 files changed, 71 insertions, 4 deletions
diff --git a/kernels/compiler_math_3op.cl b/kernels/compiler_math_3op.cl index 9194ef01..1a43e1bc 100644 --- a/kernels/compiler_math_3op.cl +++ b/kernels/compiler_math_3op.cl @@ -1,4 +1,18 @@ -kernel void compiler_math_3op(global float *dst, global float *src1, global float *src2, global float *src3) { +#ifdef HALF +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +kernel void compiler_math_3op_half(global half *dst, global half *src1, global half *src2, global half *src3) { + int i = get_global_id(0); + const half x = src1[i], y = src2[i], z = src3[i]; + switch (i%2) { + case 0: dst[i] = mad(x, y, z); break; + case 1: dst[i] = fma(x, y, z); break; + default: dst[i] = 1.f; break; + }; + dst[0] = mad(src1[0],src2[0],src3[0]); +} +#else +kernel void compiler_math_3op_float(global float *dst, global float *src1, global float *src2, global float *src3) { + int i = get_global_id(0); const float x = src1[i], y = src2[i], z = src3[i]; switch (i%2) { @@ -8,3 +22,4 @@ kernel void compiler_math_3op(global float *dst, global float *src1, global floa }; dst[0] = mad(src1[0],src2[0],src3[0]); } +#endif diff --git a/utests/compiler_math_3op.cpp b/utests/compiler_math_3op.cpp index 523b72bd..e8713a11 100644 --- a/utests/compiler_math_3op.cpp +++ b/utests/compiler_math_3op.cpp @@ -13,13 +13,14 @@ static void cpu_compiler_math(float *dst, float *src1, float *src2, float *src3, dst[0] = (src1[0]*src2[0]+src3[0]); } -static void compiler_math_3op(void) +static void compiler_math_3op_float(void) { const size_t n = 32; float cpu_dst[32], cpu_src1[32], cpu_src2[32], cpu_src3[32]; // Setup kernel and buffers - OCL_CREATE_KERNEL("compiler_math_3op"); + OCL_CREATE_KERNEL_FROM_FILE("compiler_math_3op", + "compiler_math_3op_float"); OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL); OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(float), NULL); @@ -62,5 +63,56 @@ static void compiler_math_3op(void) OCL_UNMAP_BUFFER(0); } } +MAKE_UTEST_FROM_FUNCTION(compiler_math_3op_float) +static void compiler_math_3op_half(void) +{ + if (!cl_check_half()) + return; + const size_t n = 32; + float cpu_dst[32], cpu_src1[32], cpu_src2[32], cpu_src3[32]; + + // Setup kernel and buffers + OCL_CALL(cl_kernel_init, "compiler_math_3op.cl", + "compiler_math_3op_half", + SOURCE, "-DHALF"); + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(cl_half), NULL); + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(cl_half), NULL); + OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(cl_half), NULL); + OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(cl_half), NULL); + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); + OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]); + OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]); + globals[0] = 16; + locals[0] = 16; -MAKE_UTEST_FROM_FUNCTION(compiler_math_3op) + for (int j = 0; j < 1000; j ++) { + OCL_MAP_BUFFER(1); + OCL_MAP_BUFFER(2); + OCL_MAP_BUFFER(3); + for (uint32_t i = 0; i < 32; ++i) { + ((cl_half*)buf_data[1])[i] = __float_to_half(as_uint(cpu_src1[i] = 0.1f*(rand() & 63))); + ((cl_half*)buf_data[2])[i] = __float_to_half(as_uint(cpu_src2[i] = 0.02f*(rand() & 63))); + ((cl_half*)buf_data[3])[i] = __float_to_half(as_uint(cpu_src3[i] = 0.02f*(rand() & 63))); + } + OCL_UNMAP_BUFFER(1); + OCL_UNMAP_BUFFER(2); + OCL_UNMAP_BUFFER(3); + OCL_NDRANGE(1); + + for (int i = 0; i < 16; ++i) + cpu_compiler_math(cpu_dst, cpu_src1, cpu_src2, cpu_src3, i); + OCL_MAP_BUFFER(0); + for (int i = 0; i < 16; ++i) { + const float cpu = cpu_dst[i]; + bool isInf, infSign; + const float gpu = as_float(__half_to_float(((uint16_t*)buf_data[0])[i], &isInf, &infSign)); + //printf("cpu:(%f*%f+%f) = %f, gpu:%f\n", cpu_src1[i], cpu_src2[i], cpu_src3[i],cpu,gpu); + OCL_ASSERT(((fabs(cpu) < 6e-8f) && (gpu < 6e-8f)) || (fabs(cpu - gpu) <= 0.3 * fabs(cpu)) || + (isInf && ((infSign && cpu > 65504.0f) || (!infSign && cpu < -65504.0f))) || + (std::isnan(gpu) && std::isnan(cpu))); + } + OCL_UNMAP_BUFFER(0); + } +} +MAKE_UTEST_FROM_FUNCTION(compiler_math_3op_half) |