summaryrefslogtreecommitdiff
path: root/backend
diff options
context:
space:
mode:
authorrander.wang <rander.wang@intel.com>2017-05-15 16:26:08 +0800
committerYang Rong <rong.r.yang@intel.com>2017-05-17 18:11:12 +0800
commit448f8f7a2a2901aa8807a212760539889bc3ebb8 (patch)
treeae64b2b7684b37608210fb62416983d7cf8304a2 /backend
parent733e9685ff6d8655d32f496a3bfad454a94b8c92 (diff)
downloadbeignet-448f8f7a2a2901aa8807a212760539889bc3ebb8.tar.gz
backend: refine asin function
refine the algorithm to remove unnecessary operations Signed-off-by: rander.wang <rander.wang@intel.com> Tested-by: Yang Rong <rong.r.yang@intel.com>
Diffstat (limited to 'backend')
-rw-r--r--backend/src/libocl/tmpl/ocl_math_common.tmpl.cl28
1 files changed, 7 insertions, 21 deletions
diff --git a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
index 166ee9cc..9d4100e3 100644
--- a/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math_common.tmpl.cl
@@ -1160,28 +1160,14 @@ INLINE float __gen_ocl_asin_util(float x) {
}
OVERLOADABLE float __gen_ocl_internal_asin(float x) {
- uint ix;
- union { uint i; float f; } u;
- u.f = x;
- ix = u.i & 0x7fffffff;
- if(ix == 0x3f800000) {
- return x * M_PI_2_F; /* asin(|1|)=+-pi/2 with inexact */
- }
- if(ix > 0x3f800000) { /* |x|>= 1 */
- return NAN; /* asin(|x|>1) is NaN */
- }
-
- if(ix < 0x32000000) { /* if |x| < 2**-27 */
- if(HUGE_VALF + x > FLT_ONE) return x; /* return x with inexact if x!=0*/
- }
+ float asinX2 =__gen_ocl_asin_util(x);
+ float absX = fabs(x);
+ float asinX1 = mad(2.0f , __gen_ocl_asin_util(native_sqrt(mad(-0.5f, absX, 0.5f))) , -M_PI_2_F);
- if(x < -0.5) {
- return 2 * __gen_ocl_asin_util(native_sqrt((1+x) / 2)) - M_PI_2_F;
- } else if(x > 0.5) {
- return M_PI_2_F - 2 * __gen_ocl_asin_util(native_sqrt((1-x) / 2));
- } else {
- return __gen_ocl_asin_util(x);
- }
+ float retVal = (x < 0.0f)?asinX1:-asinX1;
+ retVal = (absX > 0.5f)?retVal:asinX2;
+ retVal = (absX > 1.0f)?NAN:retVal;
+ return retVal;
}
OVERLOADABLE float __gen_ocl_internal_asinpi(float x) {
return __gen_ocl_internal_asin(x) / M_PI_F;