//#define BENCHMARK_NATIVE 1 //#define BENCHMARK_INTERNAL_FAST 2 /* benchmark pow performance */ kernel void bench_math_pow( global float *src, global float *dst, float pwr, uint loop) { float result = src[get_global_id(0)]; for(; loop > 0; loop--) { #if defined(BENCHMARK_NATIVE) result = native_powr(result, pwr); /* calls native */ #else result = pow(result, pwr); /* calls internal slow */ #endif } dst[get_global_id(0)] = result; } /* benchmark exp2 performance, exp2 is native */ kernel void bench_math_exp2( global float *src, global float *dst, float pwr, uint loop) { float result = src[get_global_id(0)]; for(; loop > 0; loop--) result = exp2(result) * 0.1f; dst[get_global_id(0)] = result; } /* benchmark exp performance */ /* calls internal fast (native) if (x > -0x1.6p1 && x < 0x1.6p1) */ kernel void bench_math_exp( global float *src, global float *dst, float pwr, uint loop) { float result = src[get_global_id(0)]; for(; loop > 0; loop--) { #if defined(BENCHMARK_NATIVE) result = native_exp((float)-0x1.6p1 - result * 0.1f); /* calls native */ #elif defined(BENCHMARK_INTERNAL_FAST) result = exp((float)-0x1.6p1 + result * 0.1f); /* calls internal fast */ #else result = exp((float)-0x1.6p1 - result * 0.1f); /* calls internal slow */ #endif } dst[get_global_id(0)] = result; } /* benchmark exp10 performance */ /* calls internal fast (native) if (x < -0x1.4p+5) || (x > +0x1.4p+5) */ kernel void bench_math_exp10( global float *src, global float *dst, float pwr, uint loop) { float result = src[get_global_id(0)]; for(; loop > 0; loop--) { #if defined(BENCHMARK_NATIVE) result = native_exp10((float)0x1.4p+5 + result * 0.1f); /* calls native */ #elif defined(BENCHMARK_INTERNAL_FAST) result = exp10((float)-0x1.4p+5 - result * 0.1f); /* calls internal fast */ #else result = exp10((float)-0x1.2p+5 - result * 0.1f); /* calls internal slow */ #endif } dst[get_global_id(0)] = result; } /* benchmark log2 performance */ /* calls internal fast (native) if (x > 0x1.1p0) */ kernel void bench_math_log2( global float *src, global float *dst, float pwr, uint loop) { float result = src[get_global_id(0)]; for(; loop > 0; loop--) { #if defined(BENCHMARK_NATIVE) result = native_log2((float)0x1.1p0 + result * 0.0001f); /* calls native */ #elif defined(BENCHMARK_INTERNAL_FAST) result = log2((float)0x1.1p0 + result * 0.0001f); /* calls internal fast */ #else result = log2((float)0x1.1p0 - result * 0.0001f); /* calls internal slow */ #endif } dst[get_global_id(0)] = result; } /* benchmark log performance */ /* calls internal fast (native) if (x > 0x1.1p0) */ kernel void bench_math_log( global float *src, global float *dst, float pwr, uint loop) { float result = src[get_global_id(0)]; for(; loop > 0; loop--) { #if defined(BENCHMARK_NATIVE) result = native_log((float)0x1.1p0 + result * 0.0001f); /* calls native */ #elif defined(BENCHMARK_INTERNAL_FAST) result = log((float)0x1.1p0 + result * 0.0001f); /* calls internal fast */ #else result = log((float)0x1.1p0 - result * 0.0001f); /* calls internal slow */ #endif } dst[get_global_id(0)] = result; } /* benchmark log10 performance */ /* calls internal fast (native) if (x > 0x1.1p0) */ kernel void bench_math_log10( global float *src, global float *dst, float pwr, uint loop) { float result = src[get_global_id(0)]; for(; loop > 0; loop--) { #if defined(BENCHMARK_NATIVE) result = native_log10((float)0x1.1p0 + result * 0.0001f); /* calls native */ #elif defined(BENCHMARK_INTERNAL_FAST) result = log10((float)0x1.1p0 + result * 0.0001f); /* calls internal fast */ #else result = log10((float)0x1.1p0 - result * 0.0001f); /* calls internal slow */ #endif } dst[get_global_id(0)] = result; } /* benchmark sqrt performance */ kernel void bench_math_sqrt( global float *src, global float *dst, float pwr, uint loop) { float result = src[get_global_id(0)]; for(; loop > 0; loop--) result = sqrt(result) + sqrt(pwr + result); dst[get_global_id(0)] = result; } /* benchmark sin performance */ kernel void bench_math_sin( global float *src, global float *dst, float pwr, uint loop) { float result = src[get_global_id(0)]; for(; loop > 0; loop--) { #if defined(BENCHMARK_NATIVE) result = native_sin(result); /* calls native */ #else result = sin(result); /* calls internal, random complexity */ //result = sin(0.1f + result); /* calls internal, (1) no reduction */ //result = sin(2.f + result); /* calls internal, (2) fast reduction */ //result = sin(4001 + result); /* calls internal, (3) slow reduction */ result *= 0x1p-16; #endif } dst[get_global_id(0)] = result; } /* benchmark cos performance */ kernel void bench_math_cos( global float *src, global float *dst, float pwr, uint loop) { float result = src[get_global_id(0)]; for(; loop > 0; loop--) { #if defined(BENCHMARK_NATIVE) result = native_cos(result); /* calls native */ #else result = cos(result); /* calls internal, random complexity */ //result = cos(0.1f + result); /* calls internal, (1) no reduction */ //result = cos(2.f + result); /* calls internal, (2) fast reduction */ //result = cos(4001.f + result); /* calls internal, (3) slow reduction */ result *= 0x1p-16; #endif } dst[get_global_id(0)] = result; } /* benchmark native tan performance */ kernel void bench_math_tan( global float *src, global float *dst, float pwr, uint loop) { float result = src[get_global_id(0)]; for(; loop > 0; loop--) { #if defined(BENCHMARK_NATIVE) result = native_tan(result); /* calls native */ #else result = tan(result); /* calls internal slow */ #endif } dst[get_global_id(0)] = result; } /* benchmark asin performance */ kernel void bench_math_asin( global float *src, global float *dst, float pwr, uint loop) { float result = src[get_global_id(0)]; for(; loop > 0; loop--) result = asin(pwr - 1); dst[get_global_id(0)] = result; } /* benchmark acos performance */ kernel void bench_math_acos( global float *src, global float *dst, float pwr, uint loop) { float result = src[get_global_id(0)]; for(; loop > 0; loop--) result = acos(pwr - 1); dst[get_global_id(0)] = result; }