diff options
author | Wilco Dijkstra <wdijkstr@arm.com> | 2018-02-12 10:42:42 +0000 |
---|---|---|
committer | Wilco Dijkstra <wdijkstr@arm.com> | 2018-02-12 10:47:09 +0000 |
commit | c3d466cba1692708a19c6ff829d0386c83a0c6e5 (patch) | |
tree | d01ce6103dc25d3b662898c3429b8b103b8d3155 /sysdeps/x86_64 | |
parent | 7bb087bd7bfe3616c4c0974a3f7352b593353ea5 (diff) | |
download | glibc-c3d466cba1692708a19c6ff829d0386c83a0c6e5.tar.gz |
Remove slow paths from pow
Remove the slow paths from pow. Like several other double precision math
functions, pow is exactly rounded. This is not required from math functions
and causes major overheads as it requires multiple fallbacks using higher
precision arithmetic if a result is close to 0.5ULP. Ridiculous slowdowns
of up to 100000x have been reported when the highest precision path triggers.
All GLIBC math tests pass on AArch64 and x64 (with ULP of pow set to 1).
The worst case error is ~0.506ULP. A simple test over a few hundred million
values shows pow is 10% faster on average. This fixes BZ #13932.
[BZ #13932]
* sysdeps/ieee754/dbl-64/uexp.h (err_1): Remove.
* benchtests/pow-inputs: Update comment for slow path cases.
* manual/probes.texi (slowpow_p10): Delete removed probe.
(slowpow_p10): Likewise.
* math/Makefile: Remove halfulp.c and slowpow.c.
* sysdeps/aarch64/libm-test-ulps: Set ULP of pow to 1.
* sysdeps/generic/math_private.h (__exp1): Remove error argument.
(__halfulp): Remove.
(__slowpow): Remove.
* sysdeps/i386/fpu/halfulp.c: Delete file.
* sysdeps/i386/fpu/slowpow.c: Likewise.
* sysdeps/ia64/fpu/halfulp.c: Likewise.
* sysdeps/ia64/fpu/slowpow.c: Likewise.
* sysdeps/ieee754/dbl-64/e_exp.c (__exp1): Remove error argument,
improve comments and add error analysis.
* sysdeps/ieee754/dbl-64/e_pow.c (__ieee754_pow): Add error analysis.
(power1): Remove function:
(log1): Remove error argument, add error analysis.
(my_log2): Remove function.
* sysdeps/ieee754/dbl-64/halfulp.c: Delete file.
* sysdeps/ieee754/dbl-64/slowpow.c: Likewise.
* sysdeps/m68k/m680x0/fpu/halfulp.c: Likewise.
* sysdeps/m68k/m680x0/fpu/slowpow.c: Likewise.
* sysdeps/powerpc/power4/fpu/Makefile: Remove CPPFLAGS-slowpow.c.
* sysdeps/x86_64/fpu/libm-test-ulps: Set ULP of pow to 1.
* sysdeps/x86_64/fpu/multiarch/Makefile: Remove slowpow-fma.c,
slowpow-fma4.c, halfulp-fma.c, halfulp-fma4.c.
* sysdeps/x86_64/fpu/multiarch/e_pow-fma.c (__slowpow): Remove define.
* sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c (__slowpow): Likewise.
* sysdeps/x86_64/fpu/multiarch/halfulp-fma.c: Delete file.
* sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c: Likewise.
* sysdeps/x86_64/fpu/multiarch/slowpow-fma.c: Likewise.
* sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c: Likewise.
Diffstat (limited to 'sysdeps/x86_64')
-rw-r--r-- | sysdeps/x86_64/fpu/libm-test-ulps | 2 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/Makefile | 12 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/e_pow-fma.c | 1 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c | 1 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/halfulp-fma.c | 4 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c | 4 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/slowpow-fma.c | 11 | ||||
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c | 11 |
8 files changed, 6 insertions, 40 deletions
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps index 85552bd695..48e53f7ef2 100644 --- a/sysdeps/x86_64/fpu/libm-test-ulps +++ b/sysdeps/x86_64/fpu/libm-test-ulps @@ -2468,8 +2468,10 @@ Function: "log_vlen8_avx2": float: 2 Function: "pow": +double: 1 float: 1 float128: 2 +idouble: 1 ifloat: 1 ifloat128: 2 ildouble: 1 diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile index 9a89bfc286..9391eb5511 100644 --- a/sysdeps/x86_64/fpu/multiarch/Makefile +++ b/sysdeps/x86_64/fpu/multiarch/Makefile @@ -10,9 +10,9 @@ libm-sysdep_routines += s_ceil-sse4_1 s_ceilf-sse4_1 s_floor-sse4_1 \ libm-sysdep_routines += e_exp-fma e_log-fma e_pow-fma s_atan-fma \ e_asin-fma e_atan2-fma s_sin-fma s_tan-fma \ - mplog-fma mpa-fma slowexp-fma slowpow-fma \ + mplog-fma mpa-fma slowexp-fma \ sincos32-fma doasin-fma dosincos-fma \ - halfulp-fma mpexp-fma \ + mpexp-fma \ mpatan2-fma mpatan-fma mpsqrt-fma mptan-fma CFLAGS-doasin-fma.c = -mfma -mavx2 @@ -22,7 +22,6 @@ CFLAGS-e_atan2-fma.c = -mfma -mavx2 CFLAGS-e_exp-fma.c = -mfma -mavx2 CFLAGS-e_log-fma.c = -mfma -mavx2 CFLAGS-e_pow-fma.c = -mfma -mavx2 $(config-cflags-nofma) -CFLAGS-halfulp-fma.c = -mfma -mavx2 CFLAGS-mpa-fma.c = -mfma -mavx2 CFLAGS-mpatan-fma.c = -mfma -mavx2 CFLAGS-mpatan2-fma.c = -mfma -mavx2 @@ -33,7 +32,6 @@ CFLAGS-mptan-fma.c = -mfma -mavx2 CFLAGS-s_atan-fma.c = -mfma -mavx2 CFLAGS-sincos32-fma.c = -mfma -mavx2 CFLAGS-slowexp-fma.c = -mfma -mavx2 -CFLAGS-slowpow-fma.c = -mfma -mavx2 CFLAGS-s_sin-fma.c = -mfma -mavx2 CFLAGS-s_tan-fma.c = -mfma -mavx2 @@ -53,9 +51,9 @@ CFLAGS-s_sincosf-fma.c = -mfma -mavx2 libm-sysdep_routines += e_exp-fma4 e_log-fma4 e_pow-fma4 s_atan-fma4 \ e_asin-fma4 e_atan2-fma4 s_sin-fma4 s_tan-fma4 \ - mplog-fma4 mpa-fma4 slowexp-fma4 slowpow-fma4 \ + mplog-fma4 mpa-fma4 slowexp-fma4 \ sincos32-fma4 doasin-fma4 dosincos-fma4 \ - halfulp-fma4 mpexp-fma4 \ + mpexp-fma4 \ mpatan2-fma4 mpatan-fma4 mpsqrt-fma4 mptan-fma4 CFLAGS-doasin-fma4.c = -mfma4 @@ -65,7 +63,6 @@ CFLAGS-e_atan2-fma4.c = -mfma4 CFLAGS-e_exp-fma4.c = -mfma4 CFLAGS-e_log-fma4.c = -mfma4 CFLAGS-e_pow-fma4.c = -mfma4 $(config-cflags-nofma) -CFLAGS-halfulp-fma4.c = -mfma4 CFLAGS-mpa-fma4.c = -mfma4 CFLAGS-mpatan-fma4.c = -mfma4 CFLAGS-mpatan2-fma4.c = -mfma4 @@ -76,7 +73,6 @@ CFLAGS-mptan-fma4.c = -mfma4 CFLAGS-s_atan-fma4.c = -mfma4 CFLAGS-sincos32-fma4.c = -mfma4 CFLAGS-slowexp-fma4.c = -mfma4 -CFLAGS-slowpow-fma4.c = -mfma4 CFLAGS-s_sin-fma4.c = -mfma4 CFLAGS-s_tan-fma4.c = -mfma4 diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow-fma.c b/sysdeps/x86_64/fpu/multiarch/e_pow-fma.c index 6fd408342e..73c1e7fb89 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_pow-fma.c +++ b/sysdeps/x86_64/fpu/multiarch/e_pow-fma.c @@ -1,6 +1,5 @@ #define __ieee754_pow __ieee754_pow_fma #define __exp1 __exp1_fma -#define __slowpow __slowpow_fma #define SECTION __attribute__ ((section (".text.fma"))) #include <sysdeps/ieee754/dbl-64/e_pow.c> diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c index 5b3ea8e103..8971b655ca 100644 --- a/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c +++ b/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c @@ -1,6 +1,5 @@ #define __ieee754_pow __ieee754_pow_fma4 #define __exp1 __exp1_fma4 -#define __slowpow __slowpow_fma4 #define SECTION __attribute__ ((section (".text.fma4"))) #include <sysdeps/ieee754/dbl-64/e_pow.c> diff --git a/sysdeps/x86_64/fpu/multiarch/halfulp-fma.c b/sysdeps/x86_64/fpu/multiarch/halfulp-fma.c deleted file mode 100644 index 6ca70462ca..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/halfulp-fma.c +++ /dev/null @@ -1,4 +0,0 @@ -#define __halfulp __halfulp_fma -#define SECTION __attribute__ ((section (".text.fma"))) - -#include <sysdeps/ieee754/dbl-64/halfulp.c> diff --git a/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c b/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c deleted file mode 100644 index a00c17c016..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c +++ /dev/null @@ -1,4 +0,0 @@ -#define __halfulp __halfulp_fma4 -#define SECTION __attribute__ ((section (".text.fma4"))) - -#include <sysdeps/ieee754/dbl-64/halfulp.c> diff --git a/sysdeps/x86_64/fpu/multiarch/slowpow-fma.c b/sysdeps/x86_64/fpu/multiarch/slowpow-fma.c deleted file mode 100644 index 160ed683ab..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/slowpow-fma.c +++ /dev/null @@ -1,11 +0,0 @@ -#define __slowpow __slowpow_fma -#define __add __add_fma -#define __dbl_mp __dbl_mp_fma -#define __mpexp __mpexp_fma -#define __mplog __mplog_fma -#define __mul __mul_fma -#define __sub __sub_fma -#define __halfulp __halfulp_fma -#define SECTION __attribute__ ((section (".text.fma"))) - -#include <sysdeps/ieee754/dbl-64/slowpow.c> diff --git a/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c b/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c deleted file mode 100644 index 69d69823bb..0000000000 --- a/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c +++ /dev/null @@ -1,11 +0,0 @@ -#define __slowpow __slowpow_fma4 -#define __add __add_fma4 -#define __dbl_mp __dbl_mp_fma4 -#define __mpexp __mpexp_fma4 -#define __mplog __mplog_fma4 -#define __mul __mul_fma4 -#define __sub __sub_fma4 -#define __halfulp __halfulp_fma4 -#define SECTION __attribute__ ((section (".text.fma4"))) - -#include <sysdeps/ieee754/dbl-64/slowpow.c> |