From 38ad40ceca8ba35761e79cfce4aaef0d0f7583e6 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Fri, 14 Oct 2011 23:41:47 -0400 Subject: Optimize x86-32 log --- sysdeps/i386/fpu/e_log.S | 20 ++++++++++- sysdeps/i386/fpu/e_logf.S | 20 ++++++++++- sysdeps/i386/fpu/e_logl.S | 20 ++++++++++- sysdeps/i386/i686/fpu/e_log.S | 80 +++++++++++++++++++++++++++++++++++++++++ sysdeps/i386/i686/fpu/e_logf.S | 81 ++++++++++++++++++++++++++++++++++++++++++ sysdeps/i386/i686/fpu/e_logl.S | 81 ++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 299 insertions(+), 3 deletions(-) create mode 100644 sysdeps/i386/i686/fpu/e_log.S create mode 100644 sysdeps/i386/i686/fpu/e_logf.S create mode 100644 sysdeps/i386/i686/fpu/e_logl.S (limited to 'sysdeps/i386') diff --git a/sysdeps/i386/fpu/e_log.S b/sysdeps/i386/fpu/e_log.S index 8110a84535..a2e4d89a40 100644 --- a/sysdeps/i386/fpu/e_log.S +++ b/sysdeps/i386/fpu/e_log.S @@ -62,4 +62,22 @@ ENTRY(__ieee754_log) fstp %st(1) ret END (__ieee754_log) -strong_alias (__ieee754_log, __log_finite) + +ENTRY(__log_finite) + fldln2 // log(2) + fldl 4(%esp) // x : log(2) +#ifdef PIC + LOAD_PIC_REG (dx) +#endif + fld %st // x : x : log(2) + fsubl MO(one) // x-1 : x : log(2) + fld %st // x-1 : x-1 : x : log(2) + fabs // |x-1| : x-1 : x : log(2) + fcompl MO(limit) // x-1 : x : log(2) + fnstsw // x-1 : x : log(2) + andb $0x45, %ah + jz 2b + fstp %st(1) // x-1 : log(2) + fyl2xp1 // log(x) + ret +END(__log_finite) diff --git a/sysdeps/i386/fpu/e_logf.S b/sysdeps/i386/fpu/e_logf.S index b683e13853..1992cc2f82 100644 --- a/sysdeps/i386/fpu/e_logf.S +++ b/sysdeps/i386/fpu/e_logf.S @@ -63,4 +63,22 @@ ENTRY(__ieee754_logf) fstp %st(1) ret END (__ieee754_logf) -strong_alias (__ieee754_logf, __logf_finite) + +ENTRY(__logf_finite) + fldln2 // log(2) + flds 4(%esp) // x : log(2) +#ifdef PIC + LOAD_PIC_REG (dx) +#endif + fld %st // x : x : log(2) + fsubl MO(one) // x-1 : x : log(2) + fld %st // x-1 : x-1 : x : log(2) + fabs // |x-1| : x-1 : x : log(2) + fcompl MO(limit) // x-1 : x : log(2) + fnstsw // x-1 : x : log(2) + andb $0x45, %ah + jz 2b + fstp %st(1) // x-1 : log(2) + fyl2xp1 // log(x) + ret +END(__logf_finite) diff --git a/sysdeps/i386/fpu/e_logl.S b/sysdeps/i386/fpu/e_logl.S index ee1fb16bc4..bfb72a30e9 100644 --- a/sysdeps/i386/fpu/e_logl.S +++ b/sysdeps/i386/fpu/e_logl.S @@ -63,4 +63,22 @@ ENTRY(__ieee754_logl) fstp %st(1) ret END (__ieee754_logl) -strong_alias (__ieee754_logl, __logl_finite) + +ENTRY(__logl_finite) + fldln2 // log(2) + fldt 4(%esp) // x : log(2) +#ifdef PIC + LOAD_PIC_REG (dx) +#endif + fld %st // x : x : log(2) + fsubl MO(one) // x-1 : x : log(2) + fld %st // x-1 : x-1 : x : log(2) + fabs // |x-1| : x-1 : x : log(2) + fcompl MO(limit) // x-1 : x : log(2) + fnstsw // x-1 : x : log(2) + andb $0x45, %ah + jz 2b + fstp %st(1) // x-1 : log(2) + fyl2xp1 // log(x) + ret +END(__logl_finite) diff --git a/sysdeps/i386/i686/fpu/e_log.S b/sysdeps/i386/i686/fpu/e_log.S new file mode 100644 index 0000000000..c6524b1854 --- /dev/null +++ b/sysdeps/i386/i686/fpu/e_log.S @@ -0,0 +1,80 @@ +/* + * Written by J.T. Conklin . + * Public domain. + * + * Changed to use fyl2xp1 for values near 1, . + * Adapted for i686 instructions. + */ + +#include + +#ifdef __ELF__ + .section .rodata.cst8,"aM",@progbits,8 +#else + .text +#endif + .p2align 3 + ASM_TYPE_DIRECTIVE(one,@object) +one: .double 1.0 + ASM_SIZE_DIRECTIVE(one) + /* It is not important that this constant is precise. It is only + a value which is known to be on the safe side for using the + fyl2xp1 instruction. */ + ASM_TYPE_DIRECTIVE(limit,@object) +limit: .double 0.29 + ASM_SIZE_DIRECTIVE(limit) + + +#ifdef PIC +# define MO(op) op##@GOTOFF(%edx) +#else +# define MO(op) op +#endif + + .text +ENTRY(__ieee754_log) + fldln2 // log(2) + fldl 4(%esp) // x : log(2) + fucomi %st + jp 3f +#ifdef PIC + LOAD_PIC_REG (dx) +#endif + fld %st // x : x : log(2) + fsubl MO(one) // x-1 : x : log(2) + fld %st // x-1 : x-1 : x : log(2) + fabs // |x-1| : x-1 : x : log(2) + fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2) + fcomip %st(1) // |x-1| : x-1 : x : log(2) + fstp %st(0) // x-1 : x : log(2) + jc 2f + fstp %st(1) // x-1 : log(2) + fyl2xp1 // log(x) + ret + +2: fstp %st(0) // x : log(2) + fyl2x // log(x) + ret + +3: fstp %st(1) + ret +END (__ieee754_log) + +ENTRY(__log_finite) + fldln2 // log(2) + fldl 4(%esp) // x : log(2) +#ifdef PIC + LOAD_PIC_REG (dx) +#endif + fld %st // x : x : log(2) + fsubl MO(one) // x-1 : x : log(2) + fld %st // x-1 : x-1 : x : log(2) + fabs // |x-1| : x-1 : x : log(2) + fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2) + fcomip %st(1) // |x-1| : x-1 : x : log(2) + fstp %st(0) // x-1 : x : log(2) + jc 2b + fstp %st(1) // x-1 : log(2) + fyl2xp1 // log(x) + ret +END(__log_finite) diff --git a/sysdeps/i386/i686/fpu/e_logf.S b/sysdeps/i386/i686/fpu/e_logf.S new file mode 100644 index 0000000000..64f8807f35 --- /dev/null +++ b/sysdeps/i386/i686/fpu/e_logf.S @@ -0,0 +1,81 @@ +/* + * Written by J.T. Conklin . + * Public domain. + * Adapted for float by Ulrich Drepper . + * + * Changed to use fyl2xp1 for values near 1, . + * Adapted for i686 instructions. + */ + +#include + +#ifdef __ELF__ + .section .rodata.cst8,"aM",@progbits,8 +#else + .text +#endif + .p2align 3 + ASM_TYPE_DIRECTIVE(one,@object) +one: .double 1.0 + ASM_SIZE_DIRECTIVE(one) + /* It is not important that this constant is precise. It is only + a value which is known to be on the safe side for using the + fyl2xp1 instruction. */ + ASM_TYPE_DIRECTIVE(limit,@object) +limit: .double 0.29 + ASM_SIZE_DIRECTIVE(limit) + + +#ifdef PIC +# define MO(op) op##@GOTOFF(%edx) +#else +# define MO(op) op +#endif + + .text +ENTRY(__ieee754_logf) + fldln2 // log(2) + flds 4(%esp) // x : log(2) + fucomi %st + jp 3f +#ifdef PIC + LOAD_PIC_REG (dx) +#endif + fld %st // x : x : log(2) + fsubl MO(one) // x-1 : x : log(2) + fld %st // x-1 : x-1 : x : log(2) + fabs // |x-1| : x-1 : x : log(2) + fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2) + fcomip %st(1) // |x-1| : x-1 : x : log(2) + fstp %st(0) // x-1 : x : log(2) + jc 2f + fstp %st(1) // x-1 : log(2) + fyl2xp1 // log(x) + ret + +2: fstp %st(0) // x : log(2) + fyl2x // log(x) + ret + +3: fstp %st(1) + ret +END (__ieee754_logf) + +ENTRY(__logf_finite) + fldln2 // log(2) + flds 4(%esp) // x : log(2) +#ifdef PIC + LOAD_PIC_REG (dx) +#endif + fld %st // x : x : log(2) + fsubl MO(one) // x-1 : x : log(2) + fld %st // x-1 : x-1 : x : log(2) + fabs // |x-1| : x-1 : x : log(2) + fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2) + fcomip %st(1) // |x-1| : x-1 : x : log(2) + fstp %st(0) // x-1 : x : log(2) + jc 2b + fstp %st(1) // x-1 : log(2) + fyl2xp1 // log(x) + ret +END(__logf_finite) diff --git a/sysdeps/i386/i686/fpu/e_logl.S b/sysdeps/i386/i686/fpu/e_logl.S new file mode 100644 index 0000000000..4e79a5a4b5 --- /dev/null +++ b/sysdeps/i386/i686/fpu/e_logl.S @@ -0,0 +1,81 @@ +/* + * Written by J.T. Conklin . + * Public domain. + * + * Adapted for `long double' by Ulrich Drepper . + * Changed to use fyl2xp1 for values near 1, . + * Adapted for i686 instructions. + */ + +#include + +#ifdef __ELF__ + .section .rodata.cst8,"aM",@progbits,8 +#else + .text +#endif + .p2align 3 + ASM_TYPE_DIRECTIVE(one,@object) +one: .double 1.0 + ASM_SIZE_DIRECTIVE(one) + /* It is not important that this constant is precise. It is only + a value which is known to be on the safe side for using the + fyl2xp1 instruction. */ + ASM_TYPE_DIRECTIVE(limit,@object) +limit: .double 0.29 + ASM_SIZE_DIRECTIVE(limit) + + +#ifdef PIC +# define MO(op) op##@GOTOFF(%edx) +#else +# define MO(op) op +#endif + + .text +ENTRY(__ieee754_logl) + fldln2 // log(2) + fldt 4(%esp) // x : log(2) + fucomi %st + jp 3f +#ifdef PIC + LOAD_PIC_REG (dx) +#endif + fld %st // x : x : log(2) + fsubl MO(one) // x-1 : x : log(2) + fld %st // x-1 : x-1 : x : log(2) + fabs // |x-1| : x-1 : x : log(2) + fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2) + fcomip %st(1) // |x-1| : x-1 : x : log(2) + fstp %st(0) // x-1 : x : log(2) + jc 2f + fstp %st(1) // x-1 : log(2) + fyl2xp1 // log(x) + ret + +2: fstp %st(0) // x : log(2) + fyl2x // log(x) + ret + +3: fstp %st(1) + ret +END (__ieee754_logl) + +ENTRY(__logl_finite) + fldln2 // log(2) + fldt 4(%esp) // x : log(2) +#ifdef PIC + LOAD_PIC_REG (dx) +#endif + fld %st // x : x : log(2) + fsubl MO(one) // x-1 : x : log(2) + fld %st // x-1 : x-1 : x : log(2) + fabs // |x-1| : x-1 : x : log(2) + fld MO(limit) // 0.29 : |x-1| : x-1 : x : log(2) + fcomip %st(1) // |x-1| : x-1 : x : log(2) + fstp %st(0) // x-1 : x : log(2) + jc 2b + fstp %st(1) // x-1 : log(2) + fyl2xp1 // log(x) + ret +END(__logl_finite) -- cgit v1.2.1