summaryrefslogtreecommitdiff
path: root/sysdeps/i386
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@gmail.com>2011-10-14 23:41:47 -0400
committerUlrich Drepper <drepper@gmail.com>2011-10-14 23:41:47 -0400
commit38ad40ceca8ba35761e79cfce4aaef0d0f7583e6 (patch)
tree336de2d8f4b91a770418446fe2c322662d321bcd /sysdeps/i386
parent396a21b1d016c52e011f8921e0728aa62a1e9df0 (diff)
downloadglibc-38ad40ceca8ba35761e79cfce4aaef0d0f7583e6.tar.gz
Optimize x86-32 log
Diffstat (limited to 'sysdeps/i386')
-rw-r--r--sysdeps/i386/fpu/e_log.S20
-rw-r--r--sysdeps/i386/fpu/e_logf.S20
-rw-r--r--sysdeps/i386/fpu/e_logl.S20
-rw-r--r--sysdeps/i386/i686/fpu/e_log.S80
-rw-r--r--sysdeps/i386/i686/fpu/e_logf.S81
-rw-r--r--sysdeps/i386/i686/fpu/e_logl.S81
6 files changed, 299 insertions, 3 deletions
diff --git a/sysdeps/i386/fpu/e_log.S b/sysdeps/i386/fpu/e_log.S
index 8110a84535..a2e4d89a40 100644
--- a/sysdeps/i386/fpu/e_log.S
+++ b/sysdeps/i386/fpu/e_log.S
@@ -62,4 +62,22 @@ ENTRY(__ieee754_log)
fstp %st(1)
ret
END (__ieee754_log)
-strong_alias (__ieee754_log, __log_finite)
+
+ENTRY(__log_finite)
+ fldln2 // log(2)
+ fldl 4(%esp) // x : log(2)
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ fld %st // x : x : log(2)
+ fsubl MO(one) // x-1 : x : log(2)
+ fld %st // x-1 : x-1 : x : log(2)
+ fabs // |x-1| : x-1 : x : log(2)
+ fcompl MO(limit) // x-1 : x : log(2)
+ fnstsw // x-1 : x : log(2)
+ andb $0x45, %ah
+ jz 2b
+ fstp %st(1) // x-1 : log(2)
+ fyl2xp1 // log(x)
+ ret
+END(__log_finite)
diff --git a/sysdeps/i386/fpu/e_logf.S b/sysdeps/i386/fpu/e_logf.S
index b683e13853..1992cc2f82 100644
--- a/sysdeps/i386/fpu/e_logf.S
+++ b/sysdeps/i386/fpu/e_logf.S
@@ -63,4 +63,22 @@ ENTRY(__ieee754_logf)
fstp %st(1)
ret
END (__ieee754_logf)
-strong_alias (__ieee754_logf, __logf_finite)
+
+ENTRY(__logf_finite)
+ fldln2 // log(2)
+ flds 4(%esp) // x : log(2)
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ fld %st // x : x : log(2)
+ fsubl MO(one) // x-1 : x : log(2)
+ fld %st // x-1 : x-1 : x : log(2)
+ fabs // |x-1| : x-1 : x : log(2)
+ fcompl MO(limit) // x-1 : x : log(2)
+ fnstsw // x-1 : x : log(2)
+ andb $0x45, %ah
+ jz 2b
+ fstp %st(1) // x-1 : log(2)
+ fyl2xp1 // log(x)
+ ret
+END(__logf_finite)
diff --git a/sysdeps/i386/fpu/e_logl.S b/sysdeps/i386/fpu/e_logl.S
index ee1fb16bc4..bfb72a30e9 100644
--- a/sysdeps/i386/fpu/e_logl.S
+++ b/sysdeps/i386/fpu/e_logl.S
@@ -63,4 +63,22 @@ ENTRY(__ieee754_logl)
fstp %st(1)
ret
END (__ieee754_logl)
-strong_alias (__ieee754_logl, __logl_finite)
+
+ENTRY(__logl_finite)
+ fldln2 // log(2)
+ fldt 4(%esp) // x : log(2)
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ fld %st // x : x : log(2)
+ fsubl MO(one) // x-1 : x : log(2)
+ fld %st // x-1 : x-1 : x : log(2)
+ fabs // |x-1| : x-1 : x : log(2)
+ fcompl MO(limit) // x-1 : x : log(2)
+ fnstsw // x-1 : x : log(2)
+ andb $0x45, %ah
+ jz 2b
+ fstp %st(1) // x-1 : log(2)
+ fyl2xp1 // log(x)
+ ret
+END(__logl_finite)
diff --git a/sysdeps/i386/i686/fpu/e_log.S b/sysdeps/i386/i686/fpu/e_log.S
new file mode 100644
index 0000000000..c6524b1854
--- /dev/null
+++ b/sysdeps/i386/i686/fpu/e_log.S
@@ -0,0 +1,80 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ *
+ * Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>.
+ * Adapted for i686 instructions.
+ */
+
+#include <machine/asm.h>
+
+#ifdef __ELF__
+ .section .rodata.cst8,"aM",@progbits,8
+#else
+ .text
+#endif
+ .p2align 3
+ ASM_TYPE_DIRECTIVE(one,@object)
+one: .double 1.0
+ ASM_SIZE_DIRECTIVE(one)
+ /* It is not important that this constant is precise. It is only
+ a value which is known to be on the safe side for using the
+ fyl2xp1 instruction. */
+ ASM_TYPE_DIRECTIVE(limit,@object)
+limit: .double 0.29
+ ASM_SIZE_DIRECTIVE(limit)
+
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%edx)
+#else
+# define MO(op) op
+#endif
+
+ .text
+ENTRY(__ieee754_log)
+ fldln2 // log(2)
+ fldl 4(%esp) // x : log(2)
+ fucomi %st
+ jp 3f
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ fld %st // x : x : log(2)
+ fsubl MO(one) // x-1 : x : log(2)
+ fld %st // x-1 : x-1 : x : log(2)
+ fabs // |x-1| : x-1 : x : log(2)
+ fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2)
+ fcomip %st(1) // |x-1| : x-1 : x : log(2)
+ fstp %st(0) // x-1 : x : log(2)
+ jc 2f
+ fstp %st(1) // x-1 : log(2)
+ fyl2xp1 // log(x)
+ ret
+
+2: fstp %st(0) // x : log(2)
+ fyl2x // log(x)
+ ret
+
+3: fstp %st(1)
+ ret
+END (__ieee754_log)
+
+ENTRY(__log_finite)
+ fldln2 // log(2)
+ fldl 4(%esp) // x : log(2)
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ fld %st // x : x : log(2)
+ fsubl MO(one) // x-1 : x : log(2)
+ fld %st // x-1 : x-1 : x : log(2)
+ fabs // |x-1| : x-1 : x : log(2)
+ fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2)
+ fcomip %st(1) // |x-1| : x-1 : x : log(2)
+ fstp %st(0) // x-1 : x : log(2)
+ jc 2b
+ fstp %st(1) // x-1 : log(2)
+ fyl2xp1 // log(x)
+ ret
+END(__log_finite)
diff --git a/sysdeps/i386/i686/fpu/e_logf.S b/sysdeps/i386/i686/fpu/e_logf.S
new file mode 100644
index 0000000000..64f8807f35
--- /dev/null
+++ b/sysdeps/i386/i686/fpu/e_logf.S
@@ -0,0 +1,81 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ * Adapted for float by Ulrich Drepper <drepper@cygnus.com>.
+ *
+ * Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>.
+ * Adapted for i686 instructions.
+ */
+
+#include <machine/asm.h>
+
+#ifdef __ELF__
+ .section .rodata.cst8,"aM",@progbits,8
+#else
+ .text
+#endif
+ .p2align 3
+ ASM_TYPE_DIRECTIVE(one,@object)
+one: .double 1.0
+ ASM_SIZE_DIRECTIVE(one)
+ /* It is not important that this constant is precise. It is only
+ a value which is known to be on the safe side for using the
+ fyl2xp1 instruction. */
+ ASM_TYPE_DIRECTIVE(limit,@object)
+limit: .double 0.29
+ ASM_SIZE_DIRECTIVE(limit)
+
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%edx)
+#else
+# define MO(op) op
+#endif
+
+ .text
+ENTRY(__ieee754_logf)
+ fldln2 // log(2)
+ flds 4(%esp) // x : log(2)
+ fucomi %st
+ jp 3f
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ fld %st // x : x : log(2)
+ fsubl MO(one) // x-1 : x : log(2)
+ fld %st // x-1 : x-1 : x : log(2)
+ fabs // |x-1| : x-1 : x : log(2)
+ fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2)
+ fcomip %st(1) // |x-1| : x-1 : x : log(2)
+ fstp %st(0) // x-1 : x : log(2)
+ jc 2f
+ fstp %st(1) // x-1 : log(2)
+ fyl2xp1 // log(x)
+ ret
+
+2: fstp %st(0) // x : log(2)
+ fyl2x // log(x)
+ ret
+
+3: fstp %st(1)
+ ret
+END (__ieee754_logf)
+
+ENTRY(__logf_finite)
+ fldln2 // log(2)
+ flds 4(%esp) // x : log(2)
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ fld %st // x : x : log(2)
+ fsubl MO(one) // x-1 : x : log(2)
+ fld %st // x-1 : x-1 : x : log(2)
+ fabs // |x-1| : x-1 : x : log(2)
+ fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2)
+ fcomip %st(1) // |x-1| : x-1 : x : log(2)
+ fstp %st(0) // x-1 : x : log(2)
+ jc 2b
+ fstp %st(1) // x-1 : log(2)
+ fyl2xp1 // log(x)
+ ret
+END(__logf_finite)
diff --git a/sysdeps/i386/i686/fpu/e_logl.S b/sysdeps/i386/i686/fpu/e_logl.S
new file mode 100644
index 0000000000..4e79a5a4b5
--- /dev/null
+++ b/sysdeps/i386/i686/fpu/e_logl.S
@@ -0,0 +1,81 @@
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ *
+ * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>.
+ * Changed to use fyl2xp1 for values near 1, <drepper@cygnus.com>.
+ * Adapted for i686 instructions.
+ */
+
+#include <machine/asm.h>
+
+#ifdef __ELF__
+ .section .rodata.cst8,"aM",@progbits,8
+#else
+ .text
+#endif
+ .p2align 3
+ ASM_TYPE_DIRECTIVE(one,@object)
+one: .double 1.0
+ ASM_SIZE_DIRECTIVE(one)
+ /* It is not important that this constant is precise. It is only
+ a value which is known to be on the safe side for using the
+ fyl2xp1 instruction. */
+ ASM_TYPE_DIRECTIVE(limit,@object)
+limit: .double 0.29
+ ASM_SIZE_DIRECTIVE(limit)
+
+
+#ifdef PIC
+# define MO(op) op##@GOTOFF(%edx)
+#else
+# define MO(op) op
+#endif
+
+ .text
+ENTRY(__ieee754_logl)
+ fldln2 // log(2)
+ fldt 4(%esp) // x : log(2)
+ fucomi %st
+ jp 3f
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ fld %st // x : x : log(2)
+ fsubl MO(one) // x-1 : x : log(2)
+ fld %st // x-1 : x-1 : x : log(2)
+ fabs // |x-1| : x-1 : x : log(2)
+ fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2)
+ fcomip %st(1) // |x-1| : x-1 : x : log(2)
+ fstp %st(0) // x-1 : x : log(2)
+ jc 2f
+ fstp %st(1) // x-1 : log(2)
+ fyl2xp1 // log(x)
+ ret
+
+2: fstp %st(0) // x : log(2)
+ fyl2x // log(x)
+ ret
+
+3: fstp %st(1)
+ ret
+END (__ieee754_logl)
+
+ENTRY(__logl_finite)
+ fldln2 // log(2)
+ fldt 4(%esp) // x : log(2)
+#ifdef PIC
+ LOAD_PIC_REG (dx)
+#endif
+ fld %st // x : x : log(2)
+ fsubl MO(one) // x-1 : x : log(2)
+ fld %st // x-1 : x-1 : x : log(2)
+ fabs // |x-1| : x-1 : x : log(2)
+ fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2)
+ fcomip %st(1) // |x-1| : x-1 : x : log(2)
+ fstp %st(0) // x-1 : x : log(2)
+ jc 2b
+ fstp %st(1) // x-1 : log(2)
+ fyl2xp1 // log(x)
+ ret
+END(__logl_finite)