summaryrefslogtreecommitdiff
path: root/sysdeps/i386/i686/fpu
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2017-10-22 08:11:15 -0700
committerH.J. Lu <hjl.tools@gmail.com>2017-10-22 08:12:41 -0700
commit5313581cb52fd5d3d2cf222ddb6f8f86f090974f (patch)
treee2ec5d944c1089cec1de29c5c72c3fb600a8c3fb /sysdeps/i386/i686/fpu
parent6089a3ee24cede17e9443aef0aa72fa1a0ba1548 (diff)
downloadglibc-5313581cb52fd5d3d2cf222ddb6f8f86f090974f.tar.gz
i386: Replace assembly versions of e_powf with generic e_powf.c
This patch replaces i386 assembly versions of e_powf with generic e_powf.c. For workload-spec2017.wrf, on Nehalem, it improves performance by: Before After Improvement reciprocal-throughput 230.855 78.3358 194% latency 231.685 94.1259 146% On Skylake, it improves performance by: Before After Improvement reciprocal-throughput 239.858 47.4713 405% latency 247.57 93.8798 163% On IvyBridge with --disable-multi-arch, it improves performance by: Before After Improvement reciprocal-throughput 269.078 63.3758 324% latency 271.473 102.091 165% * sysdeps/i386/fpu/e_powf.S: Removed. * sysdeps/i386/fpu/e_powf_log2_data.c: Likewise. * sysdeps/i386/fpu/w_powf.c: Likewise. * sysdeps/i386/fpu/libm-test-ulps: Updated for generic e_powf.c. * sysdeps/i386/i686/fpu/multiarch/libm-test-ulps: Likewise. * sysdeps/i386/i686/fpu/multiarch/Makefile (libm-sysdep_routines): Add e_powf-sse2. (CFLAGS-e_powf-sse2.c): New. * sysdeps/i386/i686/fpu/multiarch/e_powf-sse2.c: New file. * sysdeps/i386/i686/fpu/multiarch/e_powf.c: Likewise.
Diffstat (limited to 'sysdeps/i386/i686/fpu')
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/Makefile3
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/e_powf-sse2.c3
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/e_powf.c43
-rw-r--r--sysdeps/i386/i686/fpu/multiarch/libm-test-ulps18
4 files changed, 60 insertions, 7 deletions
diff --git a/sysdeps/i386/i686/fpu/multiarch/Makefile b/sysdeps/i386/i686/fpu/multiarch/Makefile
index eee3b8b1fd..c0fa9761d3 100644
--- a/sysdeps/i386/i686/fpu/multiarch/Makefile
+++ b/sysdeps/i386/i686/fpu/multiarch/Makefile
@@ -1,9 +1,10 @@
ifeq ($(subdir),math)
libm-sysdep_routines += e_exp2f-sse2 e_expf-sse2 e_logf-sse2 e_log2f-sse2 \
- s_sinf-sse2 s_cosf-sse2 s_sincosf-sse2
+ e_powf-sse2 s_sinf-sse2 s_cosf-sse2 s_sincosf-sse2
CFLAGS-e_exp2f-sse2.c = -msse2 -mfpmath=sse
CFLAGS-e_expf-sse2.c = -msse2 -mfpmath=sse
CFLAGS-e_log2f-sse2.c = -msse2 -mfpmath=sse
CFLAGS-e_logf-sse2.c = -msse2 -mfpmath=sse
+CFLAGS-e_powf-sse2.c = -msse2 -mfpmath=sse
endif
diff --git a/sysdeps/i386/i686/fpu/multiarch/e_powf-sse2.c b/sysdeps/i386/i686/fpu/multiarch/e_powf-sse2.c
new file mode 100644
index 0000000000..c56f6ee89f
--- /dev/null
+++ b/sysdeps/i386/i686/fpu/multiarch/e_powf-sse2.c
@@ -0,0 +1,3 @@
+#define __powf __powf_sse2
+
+#include <sysdeps/ieee754/flt-32/e_powf.c>
diff --git a/sysdeps/i386/i686/fpu/multiarch/e_powf.c b/sysdeps/i386/i686/fpu/multiarch/e_powf.c
new file mode 100644
index 0000000000..4dc4c87326
--- /dev/null
+++ b/sysdeps/i386/i686/fpu/multiarch/e_powf.c
@@ -0,0 +1,43 @@
+/* Multiple versions of powf.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define powf __redirect_powf
+#define __DECL_SIMD___redirect_powf
+#include <math.h>
+#undef powf
+
+#define SYMBOL_NAME powf
+#include "ifunc-sse2.h"
+
+libc_ifunc_redirected (__redirect_powf, __powf, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (__powf_ia32, __GI___powf, __redirect_powf)
+ __attribute__ ((visibility ("hidden")));
+
+# include <shlib-compat.h>
+versioned_symbol (libm, __powf, powf, GLIBC_2_27);
+#else
+weak_alias (__powf, powf)
+#endif
+
+strong_alias (__powf, __ieee754_powf)
+strong_alias (__powf, __powf_finite)
+
+#define __powf __powf_ia32
+#include <sysdeps/ieee754/flt-32/e_powf.c>
diff --git a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
index b5d74df580..26d90ec636 100644
--- a/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
+++ b/sysdeps/i386/i686/fpu/multiarch/libm-test-ulps
@@ -2370,24 +2370,30 @@ ldouble: 1
Function: "pow_downward":
double: 1
+float: 1
float128: 2
idouble: 1
+ifloat: 1
ifloat128: 2
ildouble: 4
ldouble: 4
Function: "pow_towardzero":
double: 1
+float: 1
float128: 2
idouble: 1
+ifloat: 1
ifloat128: 2
ildouble: 4
ldouble: 4
Function: "pow_upward":
double: 1
+float: 1
float128: 2
idouble: 1
+ifloat: 1
ifloat128: 2
ildouble: 4
ldouble: 4
@@ -2577,30 +2583,30 @@ ldouble: 5
Function: "tgamma_downward":
double: 3
-float: 4
+float: 5
float128: 5
idouble: 3
-ifloat: 4
+ifloat: 5
ifloat128: 5
ildouble: 5
ldouble: 5
Function: "tgamma_towardzero":
double: 4
-float: 4
+float: 5
float128: 5
idouble: 4
-ifloat: 4
+ifloat: 5
ifloat128: 5
ildouble: 5
ldouble: 5
Function: "tgamma_upward":
double: 4
-float: 4
+float: 6
float128: 4
idouble: 4
-ifloat: 4
+ifloat: 6
ifloat128: 4
ildouble: 5
ldouble: 5