summaryrefslogtreecommitdiff
path: root/sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c
diff options
context:
space:
mode:
authorWilco Dijkstra <wdijkstr@arm.com>2018-08-14 10:45:59 +0100
committerWilco Dijkstra <wdijkstr@arm.com>2018-08-14 10:45:59 +0100
commit599cf3976679e1b345307d9c02057f02aa95528f (patch)
treeede9ff73c0eb51bce1a9c540b6daf5c0675b5afd /sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c
parente95c6f61920a0f9237cfb292fa44ad500e1df09b (diff)
downloadglibc-599cf3976679e1b345307d9c02057f02aa95528f.tar.gz
Improve performance of sinf and cosf
The second patch improves performance of sinf and cosf using the same algorithms and polynomials. The returned values are identical to sincosf for the same input. ULP definitions for AArch64 and x64 are updated. sinf/cosf througput gains on Cortex-A72: * |x| < 0x1p-12 : 1.2x * |x| < M_PI_4 : 1.8x * |x| < 2 * M_PI: 1.7x * |x| < 120.0 : 2.3x * |x| < Inf : 3.0x * NEWS: Mention sinf, cosf, sincosf. * sysdeps/aarch64/libm-test-ulps: Update ULP for sinf, cosf, sincosf. * sysdeps/x86_64/fpu/libm-test-ulps: Update ULP for sinf and cosf. * sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c: Add definitions of constants rather than including generic sincosf.h. * sysdeps/x86_64/fpu/s_sincosf_data.c: Remove. * sysdeps/ieee754/flt-32/s_cosf.c (cosf): Rewrite. * sysdeps/ieee754/flt-32/s_sincosf.h (reduced_sin): Remove. (reduced_cos): Remove. (sinf_poly): New function. * sysdeps/ieee754/flt-32/s_sinf.c (sinf): Rewrite.
Diffstat (limited to 'sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c')
-rw-r--r--sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c33
1 files changed, 32 insertions, 1 deletions
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c b/sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c
index 64abe7abca..0b80c4fe0d 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c
@@ -21,7 +21,6 @@
#include <math_private.h>
#include <x86intrin.h>
#include <libm-alias-float.h>
-#include "s_sincosf.h"
#define SINCOSF __sincosf_fma
@@ -31,6 +30,38 @@
# define SINCOSF_FUNC SINCOSF
#endif
+/* PI/2 with 98 bits of accuracy. */
+static const double PI_2_hi = 0x1.921fb544p+0;
+static const double PI_2_lo = 0x1.0b4611a626332p-34;
+
+static const double SMALL = 0x1p-50; /* 2^-50. */
+static const double inv_PI_4 = 0x1.45f306dc9c883p+0; /* 4/PI. */
+
+#define FLOAT_EXPONENT_SHIFT 23
+#define FLOAT_EXPONENT_BIAS 127
+
+static const double pio2_table[] = {
+ 0 * M_PI_2,
+ 1 * M_PI_2,
+ 2 * M_PI_2,
+ 3 * M_PI_2,
+ 4 * M_PI_2,
+ 5 * M_PI_2
+};
+
+static const double invpio4_table[] = {
+ 0x0p+0,
+ 0x1.45f306cp+0,
+ 0x1.c9c882ap-28,
+ 0x1.4fe13a8p-58,
+ 0x1.f47d4dp-85,
+ 0x1.bb81b6cp-112,
+ 0x1.4acc9ep-142,
+ 0x1.0e4107cp-169
+};
+
+static const double ones[] = { 1.0, -1.0 };
+
/* Chebyshev constants for sin and cos, range -PI/4 - PI/4. */
static const __v2df V0 = { -0x1.5555555551cd9p-3, -0x1.ffffffffe98aep-2};
static const __v2df V1 = { 0x1.1111110c2688bp-7, 0x1.55555545c50c7p-5 };