diff options
author | ro <ro@138bc75d-0d04-0410-961f-82ee72b054a4> | 2011-11-02 15:03:19 +0000 |
---|---|---|
committer | ro <ro@138bc75d-0d04-0410-961f-82ee72b054a4> | 2011-11-02 15:03:19 +0000 |
commit | 9213d2eb44a8b9bcc432b57e246d9b52d5bdc949 (patch) | |
tree | bfbde9a54f663fb7556b9dacd07709ef97c1961c /gcc/config/xtensa | |
parent | 237490bf10db39b859bd28598ff64f1bd2c84421 (diff) | |
download | gcc-9213d2eb44a8b9bcc432b57e246d9b52d5bdc949.tar.gz |
Move libgcc1 to toplevel libgcc
gcc:
* Makefile.in (LIB1ASMSRC): Don't export.
(libgcc.mvars): Don't emit LIB1ASMFUNCS, LIB1ASMSRC.
* config/arm/arm.c: Update lib1funcs.asm filename.
* config/arm/linux-eabi.h: Likewise.
* config/arm/bpabi-v6m.S, config/arm/bpabi.S,
config/arm/ieee754-df.S, config/arm/ieee754-sf.S: Move to
../libgcc/config/arm.
* config/arm/lib1funcs.asm: Move to ../libgcc/config/arm/lib1funcs.S.
* config/arm/t-arm (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
* config/arm/t-arm-elf (LIB1ASMFUNCS): Remove.
* config/arm/t-bpabi: Likewise.
* config/arm/t-linux (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
* config/arm/t-linux-eabi (LIB1ASMFUNCS): Remove.
* config/arm/t-strongarm-elf: Likewise.
* config/arm/t-symbian: Likewise.
* config/arm/t-vxworks: Likewise.
* config/arm/t-wince-pe: Likewise.
* config/avr/libgcc.S: Move to ../libgcc/config/avr.
* config/avr/t-avr (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
* config/bfin/lib1funcs.asm: Move to
../libgcc/config/bfin/lib1funcs.S.
* config/bfin/t-bfin: Remove.
* config/bfin/t-bfin-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
* config/bfin/t-bfin-linux: Likewise.
* config/bfin/t-bfin-uclinux: Likewise.
* config/c6x/lib1funcs.asm: Move to
../libgcc/config/c6x/lib1funcs.S.
* config/c6x/t-c6x-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
* config/fr30/lib1funcs.asm: Move to
../libgcc/config/fr30/lib1funcs.S.
* config/fr30/t-fr30 (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
* config/frv/lib1funcs.asm: Move to
../libgcc/config/frv/lib1funcs.S.
* config/frv/t-frv (CROSS_LIBGCC1, LIB1ASMSRC, LIB1ASMFUNCS): Remove.
* config/h8300/fixunssfsi.c: Update lib1funcs.asm filename.
* config/h8300/lib1funcs.asm: Move to
../libgcc/config/h8300/lib1funcs.S.
* config/h8300/t-h8300 (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
* config/i386/cygwin.asm: Move to ../libgcc/config/i386/cygwin.S.
* config/i386/t-cygming (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
* config/i386/t-interix: Likewise.
* config/ia64/lib1funcs.asm: Move to
../libgcc/config/ia64/lib1funcs.S.
* config/ia64/t-hpux (LIB1ASMFUNCS, LIBGCC1_TEST): Remove.
* config/ia64/t-ia64 (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
* config/iq2000/t-iq2000 (LIBGCC1, CROSS_LIBGCC1): Remove.
* config/m32c/m32c.c: Update m32c-lib1.S filename.
* config/m32c/m32c-lib1.S: Move to ../libgcc/config/m32c/lib1funcs.S.
* config/m32c/t-m32c (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
* config/m32r/t-linux (CROSS_LIBGCC1, LIBGCC1, LIBGCC1_TEST): Remove.
* config/m68k/lb1sf68.asm: Move to ../libgcc/config/m68k/lb1sf68.S.
* config/m68k/t-floatlib (LIB1ASMSRC, LIB1ASMFUNCS): New file.
* config/mcore/lib1.asm: Move to ../libgcc/config/mcore/lib1funcs.S.
* config/mcore/t-mcore (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
* config/mep/mep-lib1.asm: Move to ../libgcc/config/mep/lib1funcs.S.
* config/mep/t-mep (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
* config/mips/mips16.S: Move to ../libgcc/config/mips.
* config/mips/t-libgcc-mips16: Remove.
* config/mips/t-sr71k (LIBGCC1, CROSS_LIBGCC1): Remove.
* config/pa/milli64.S: Move to ../libgcc/config/pa.
* config/pa/t-linux (LIB1ASMFUNCS, LIB1ASMSRC): Remove.
* config/pa/t-linux64: Likewise.
* config/picochip/libgccExtras/fake_libgcc.asm: Move to
../libgcc/config/picochip/lib1funcs.S.
* config/picochip/t-picochip (LIB1ASMFUNCS, LIB1ASMSRC): Remove.
* config/sh/lib1funcs.asm: Move to ../libgcc/config/sh/lib1funcs.S.
* config/sh/lib1funcs.h: Move to ../libgcc/config/sh.
* config/sh/sh.h: Update lib1funcs.asm filename.
* config/sh/t-linux (LIB1ASMFUNCS_CACHE): Remove.
* config/sh/t-netbsd: Likewise.
* config/sh/t-sh (LIB1ASMSRC, LIB1ASMFUNCS, LIB1ASMFUNCS_CACHE):
Remove.
* config/sh/t-sh64 (LIB1ASMFUNCS): Remove.
* config/sparc/lb1spc.asm: Move to ../libgcc/config/sparc/lb1spc.S.
* config/sparc/lb1spl.asm: Remove.
* config/sparc/t-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
* config/sparc/t-leon: Likewise.
* config/spu/t-spu-elf (LIBGCC1, CROSS_LIBGCC1): Remove.
* config/v850/lib1funcs.asm: Move to ../libgcc/config/v850/lib1funcs.S.
* config/v850/t-v850 (LIB1ASMSRC, LIB1ASMFUNCS): Remove
* config/vax/lib1funcs.asm: Move to ../libgcc/config/vax/lib1funcs.S.
* config/vax/t-linux: Remove.
* config/xtensa/ieee754-df.S, config/xtensa/ieee754-sf.S: Move to
../libgcc/config/xtensa.
* config/xtensa/lib1funcs.asm: Move to
../libgcc/config/xtensa/lib1funcs.S.
* config/xtensa/t-xtensa (LIB1ASMSRC, LIB1ASMFUNCS): Remove.
* config.gcc (bfin*-rtems*): Remove bfin/t-bfin from tmake_file.
(bfin*-*): Likewise.
(mips64*-*-linux*, mipsisa64*-*-linux*): Remove
mips/t-libgcc-mips16 from tmake_file.
(mips*-*-linux*): Likewise.
(mips*-sde-elf*): Likewise.
(mipsisa32-*-elf*, mipsisa32el-*-elf*, mipsisa32r2-*-elf*)
(mipsisa32r2el-*-elf*, mipsisa64-*-elf*, mipsisa64el-*-elf*)
(mipsisa64r2-*-elf*, mipsisa64r2el-*-elf*): Likewise.
(mipsisa64sb1-*-elf*, mipsisa64sb1el-*-elf*): Likewise.
(mips-*-elf*, mipsel-*-elf*): Likewise.
(mips64-*-elf*, mips64el-*-elf*): Likewise.
(mips64orion-*-elf*, mips64orionel-*-elf*): Likewise.
(mips*-*-rtems*): Likewise.
(mipstx39-*-elf*, mipstx39el-*-elf*): Likewise.
(vax-*-linux*): Remove vax/t-linux from tmake_file.
libgcc:
* Makefile.in ($(lib1asmfuncs-o), $(lib1asmfuncs-s-o)): Use
$(srcdir) to refer to $(LIB1ASMSRC).
Use $<.
* config/arm/bpabi-v6m.S, config/arm/bpabi.S,
config/arm/ieee754-df.S, config/arm/ieee754-sf.S,
config/arm/lib1funcs.S: New files.
* config/arm/libunwind.S [!__symbian__]: Use lib1funcs.S.
* config/arm/t-arm: New file.
* config/arm/t-bpabi (LIB1ASMFUNCS): Set.
* config/arm/t-elf, config/arm/t-linux, config/arm/t-linux-eabi,
config/arm/t-strongarm-elf: New files.
* config/arm/t-symbian (LIB1ASMFUNCS): Set.
* config/arm/t-vxworks, config/arm/t-wince-pe: New files.
* config/avr/lib1funcs.S: New file.
* config/avr/t-avr (LIB1ASMSRC, LIB1ASMFUNCS): Set.
* config/bfin/lib1funcs.S, config/bfin/t-bfin: New files.
* config/c6x/lib1funcs.S: New file.
* config/c6x/t-elf (LIB1ASMSRC, LIB1ASMFUNCS): Set.
* config/fr30/lib1funcs.S, config/fr30/t-fr30: New files.
* config/frv/lib1funcs.S: New file.
* config/frv/t-frv (LIB1ASMSRC, LIB1ASMFUNCS): Set.
* config/h8300/lib1funcs.S, config/h8300/t-h8300: New files.
* config/i386/cygwin.S, config/i386/t-chkstk: New files.
* config/ia64/__divxf3.asm: Rename to ...
* config/ia64/__divxf3.S: ... this.
Adapt lib1funcs.asm filename.
* config/ia64/_fixtfdi.asm: Rename to ...
* config/ia64/_fixtfdi.S: ... this.
Adapt lib1funcs.asm filename.
* config/ia64/_fixunstfdi.asm: Rename to ...
* config/ia64/_fixunstfdi.S: ... this.
Adapt lib1funcs.asm filename.
* config/ia64/_floatditf.asm: Rename to ...
* config/ia64/_floatditf.S: ... this.
Adapt lib1funcs.asm filename.
* config/ia64/lib1funcs.S: New file.
* config/ia64/t-hpux (LIB1ASMFUNCS): Set.
* config/ia64/t-ia64 (LIB1ASMSRC, LIB1ASMFUNCS): Set.
* config/ia64/t-softfp-compat (libgcc1-tf-compats): Adapt suffix.
* config/m32c/lib1funcs.S, config/m32c/t-m32c: New files.
* config/m68k/lb1sf68.S, config/m68k/t-floatlib: New files.
* config/mcore/lib1funcs.S, config/mcore/t-mcore: New files.
* config/mep/lib1funcs.S: New file.
* config/mep/t-mep (LIB1ASMSRC, LIB1ASMFUNCS): Set.
* config/mips/mips16.S: New file.
* config/mips/t-mips16 (LIB1ASMSRC, LIB1ASMFUNCS): Set.
* config/pa/milli64.S: New file.
* config/pa/t-linux, config/pa/t-linux64: New files.
* config/picochip/lib1funcs.S: New file.
* config/picochip/t-picochip (LIB1ASMSRC, LIB1ASMFUNCS): Set.
* config/sh/lib1funcs.S, config/sh/lib1funcs.h: New files.
* config/sh/t-linux (LIB1ASMFUNCS_CACHE): Set.
* config/sh/t-netbsd: New file.
* config/sh/t-sh (LIB1ASMSRC, LIB1ASMFUNCS, LIB1ASMFUNCS_CACHE): Set.
Use $(srcdir) to refer to lib1funcs.S, adapt filename.
* config/sh/t-sh64: New file.
* config/sparc/lb1spc.S: New file.
* config/sparc/t-softmul (LIB1ASMSRC): Adapt sparc/lb1spc.asm
filename.
* config/v850/lib1funcs.S, config/v850/t-v850: New files.
* config/vax/lib1funcs.S, config/vax/t-linux: New files.
* config/xtensa/ieee754-df.S, config/xtensa/ieee754-sf.S,
config/xtensa/lib1funcs.S: New files.
* config/xtensa/t-xtensa (LIB1ASMSRC, LIB1ASMFUNCS): Set.
* config.host (arm-wrs-vxworks): Add arm/t-arm, arm/t-vxworks to
tmake_file.
(arm*-*-freebsd*): Add arm/t-arm, arm/t-strongarm-elf to tmake_file.
(arm*-*-netbsdelf*): Add arm/t-arm to tmake_file.
(arm*-*-linux*): Likewise.
Add arm/t-elf, arm/t-bpabi, arm/t-linux-eabi to tmake_file for
arm*-*-linux-*eabi, add arm/t-linux otherwise.
(arm*-*-uclinux*): Add arm/t-arm, arm/t-elf to tmake_file.
(arm*-*-ecos-elf): Likewise.
(arm*-*-eabi*, arm*-*-symbianelf*): Likewise.
(arm*-*-rtems*): Likewise.
(arm*-*-elf): Likewise.
(arm*-wince-pe*): Add arm/t-arm, arm/t-wince-pe to tmake_file.
(avr-*-rtems*): Add to tmake_file, add avr/t-avr.
(bfin*-elf*): Add bfin/t-bfin to tmake_file.
(bfin*-uclinux*): Likewise.
(bfin*-linux-uclibc*): Likewise.
(bfin*-rtems*): Likewise.
(bfin*-*): Likewise.
(fido-*-elf): Merge into m68k-*-elf*.
(fr30-*-elf)): Add fr30/t-fr30 to tmake_file.
(frv-*-*linux*): Add frv/t-frv to tmake_file.
(h8300-*-rtems*): Add h8300/t-h8300 to tmake_file.
(h8300-*-elf*): Likewise.
(hppa*64*-*-linux*): Add pa/t-linux, pa/t-linux64 to tmake_file.
(hppa*-*-linux*): Add pa/t-linux to tmake_file.
(i[34567]86-*-cygwin*): Add i386/t-chkstk to tmake_file.
(i[34567]86-*-mingw*): Likewise.
(x86_64-*-mingw*): Likewise.
(i[34567]86-*-interix3*): Likewise.
(ia64*-*-hpux*): Add ia64/t-ia64, ia64/t-hpux to tmake_file.
(ia64-hp-*vms*): Add ia64/t-ia64 to tmake_file.
(m68k-*-elf*): Also handle fido-*-elf.
Add m68k/t-floatlib to tmake_file.
(m68k-*-uclinux*): Add m68k/t-floatlib to tmake_file.
(m68k-*-linux*): Likewise.
(m68k-*-rtems*): Likewise.
(mcore-*-elf): Add mcore/t-mcore to tmake_file.
(sh-*-elf*, sh[12346l]*-*-elf*): Add sh/t-sh64 to tmake_file for
sh64*-*-*.
(sh-*-linux*, sh[2346lbe]*-*-linux*): Add sh/t-sh to tmake_file.
Add sh/t-sh64 to tmake_file for sh64*-*-linux*.
(sh-*-netbsdelf*, shl*-*-netbsdelf*, sh5-*-netbsd*)
(sh5l*-*-netbsd*, sh64-*-netbsd*, sh64l*-*-netbsd*): Add sh/t-sh,
sh/t-netbsd to tmake_file.
Add sh/t-sh64 to tmake_file for sh5*-*-netbsd*, sh64*-netbsd*.
(sh-*-rtems*): Add sh/t-sh to tmake_file.
(sh-wrs-vxworks): Likewise.
(sparc-*-linux*): Add sparc/t-softmul to tmake_file except for
*-leon[3-9]*.
(v850*-*-*): Add v850/t-v850 to tmake_file.
(vax-*-linux*): Add vax/t-linux to tmake_file.
(m32c-*-elf*, m32c-*-rtems*): Add m32c/t-m32c to tmake_file.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@180773 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/xtensa')
-rw-r--r-- | gcc/config/xtensa/ieee754-df.S | 2388 | ||||
-rw-r--r-- | gcc/config/xtensa/ieee754-sf.S | 1757 | ||||
-rw-r--r-- | gcc/config/xtensa/lib1funcs.asm | 845 | ||||
-rw-r--r-- | gcc/config/xtensa/t-xtensa | 12 |
4 files changed, 0 insertions, 5002 deletions
diff --git a/gcc/config/xtensa/ieee754-df.S b/gcc/config/xtensa/ieee754-df.S deleted file mode 100644 index 9b46889bdc2..00000000000 --- a/gcc/config/xtensa/ieee754-df.S +++ /dev/null @@ -1,2388 +0,0 @@ -/* IEEE-754 double-precision functions for Xtensa - Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc. - Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. - - This file is part of GCC. - - GCC is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - GCC is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public - License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -#ifdef __XTENSA_EB__ -#define xh a2 -#define xl a3 -#define yh a4 -#define yl a5 -#else -#define xh a3 -#define xl a2 -#define yh a5 -#define yl a4 -#endif - -/* Warning! The branch displacements for some Xtensa branch instructions - are quite small, and this code has been carefully laid out to keep - branch targets in range. If you change anything, be sure to check that - the assembler is not relaxing anything to branch over a jump. */ - -#ifdef L_negdf2 - - .align 4 - .global __negdf2 - .type __negdf2, @function -__negdf2: - leaf_entry sp, 16 - movi a4, 0x80000000 - xor xh, xh, a4 - leaf_return - -#endif /* L_negdf2 */ - -#ifdef L_addsubdf3 - - /* Addition */ -__adddf3_aux: - - /* Handle NaNs and Infinities. (This code is placed before the - start of the function just to keep it in range of the limited - branch displacements.) */ - -.Ladd_xnan_or_inf: - /* If y is neither Infinity nor NaN, return x. */ - bnall yh, a6, 1f - /* If x is a NaN, return it. Otherwise, return y. */ - slli a7, xh, 12 - or a7, a7, xl - beqz a7, .Ladd_ynan_or_inf -1: leaf_return - -.Ladd_ynan_or_inf: - /* Return y. */ - mov xh, yh - mov xl, yl - leaf_return - -.Ladd_opposite_signs: - /* Operand signs differ. Do a subtraction. */ - slli a7, a6, 11 - xor yh, yh, a7 - j .Lsub_same_sign - - .align 4 - .global __adddf3 - .type __adddf3, @function -__adddf3: - leaf_entry sp, 16 - movi a6, 0x7ff00000 - - /* Check if the two operands have the same sign. */ - xor a7, xh, yh - bltz a7, .Ladd_opposite_signs - -.Ladd_same_sign: - /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */ - ball xh, a6, .Ladd_xnan_or_inf - ball yh, a6, .Ladd_ynan_or_inf - - /* Compare the exponents. The smaller operand will be shifted - right by the exponent difference and added to the larger - one. */ - extui a7, xh, 20, 12 - extui a8, yh, 20, 12 - bltu a7, a8, .Ladd_shiftx - -.Ladd_shifty: - /* Check if the smaller (or equal) exponent is zero. */ - bnone yh, a6, .Ladd_yexpzero - - /* Replace yh sign/exponent with 0x001. */ - or yh, yh, a6 - slli yh, yh, 11 - srli yh, yh, 11 - -.Ladd_yexpdiff: - /* Compute the exponent difference. Optimize for difference < 32. */ - sub a10, a7, a8 - bgeui a10, 32, .Ladd_bigshifty - - /* Shift yh/yl right by the exponent difference. Any bits that are - shifted out of yl are saved in a9 for rounding the result. */ - ssr a10 - movi a9, 0 - src a9, yl, a9 - src yl, yh, yl - srl yh, yh - -.Ladd_addy: - /* Do the 64-bit addition. */ - add xl, xl, yl - add xh, xh, yh - bgeu xl, yl, 1f - addi xh, xh, 1 -1: - /* Check if the add overflowed into the exponent. */ - extui a10, xh, 20, 12 - beq a10, a7, .Ladd_round - mov a8, a7 - j .Ladd_carry - -.Ladd_yexpzero: - /* y is a subnormal value. Replace its sign/exponent with zero, - i.e., no implicit "1.0", and increment the apparent exponent - because subnormals behave as if they had the minimum (nonzero) - exponent. Test for the case when both exponents are zero. */ - slli yh, yh, 12 - srli yh, yh, 12 - bnone xh, a6, .Ladd_bothexpzero - addi a8, a8, 1 - j .Ladd_yexpdiff - -.Ladd_bothexpzero: - /* Both exponents are zero. Handle this as a special case. There - is no need to shift or round, and the normal code for handling - a carry into the exponent field will not work because it - assumes there is an implicit "1.0" that needs to be added. */ - add xl, xl, yl - add xh, xh, yh - bgeu xl, yl, 1f - addi xh, xh, 1 -1: leaf_return - -.Ladd_bigshifty: - /* Exponent difference > 64 -- just return the bigger value. */ - bgeui a10, 64, 1b - - /* Shift yh/yl right by the exponent difference. Any bits that are - shifted out are saved in a9 for rounding the result. */ - ssr a10 - sll a11, yl /* lost bits shifted out of yl */ - src a9, yh, yl - srl yl, yh - movi yh, 0 - beqz a11, .Ladd_addy - or a9, a9, a10 /* any positive, nonzero value will work */ - j .Ladd_addy - -.Ladd_xexpzero: - /* Same as "yexpzero" except skip handling the case when both - exponents are zero. */ - slli xh, xh, 12 - srli xh, xh, 12 - addi a7, a7, 1 - j .Ladd_xexpdiff - -.Ladd_shiftx: - /* Same thing as the "shifty" code, but with x and y swapped. Also, - because the exponent difference is always nonzero in this version, - the shift sequence can use SLL and skip loading a constant zero. */ - bnone xh, a6, .Ladd_xexpzero - - or xh, xh, a6 - slli xh, xh, 11 - srli xh, xh, 11 - -.Ladd_xexpdiff: - sub a10, a8, a7 - bgeui a10, 32, .Ladd_bigshiftx - - ssr a10 - sll a9, xl - src xl, xh, xl - srl xh, xh - -.Ladd_addx: - add xl, xl, yl - add xh, xh, yh - bgeu xl, yl, 1f - addi xh, xh, 1 -1: - /* Check if the add overflowed into the exponent. */ - extui a10, xh, 20, 12 - bne a10, a8, .Ladd_carry - -.Ladd_round: - /* Round up if the leftover fraction is >= 1/2. */ - bgez a9, 1f - addi xl, xl, 1 - beqz xl, .Ladd_roundcarry - - /* Check if the leftover fraction is exactly 1/2. */ - slli a9, a9, 1 - beqz a9, .Ladd_exactlyhalf -1: leaf_return - -.Ladd_bigshiftx: - /* Mostly the same thing as "bigshifty".... */ - bgeui a10, 64, .Ladd_returny - - ssr a10 - sll a11, xl - src a9, xh, xl - srl xl, xh - movi xh, 0 - beqz a11, .Ladd_addx - or a9, a9, a10 - j .Ladd_addx - -.Ladd_returny: - mov xh, yh - mov xl, yl - leaf_return - -.Ladd_carry: - /* The addition has overflowed into the exponent field, so the - value needs to be renormalized. The mantissa of the result - can be recovered by subtracting the original exponent and - adding 0x100000 (which is the explicit "1.0" for the - mantissa of the non-shifted operand -- the "1.0" for the - shifted operand was already added). The mantissa can then - be shifted right by one bit. The explicit "1.0" of the - shifted mantissa then needs to be replaced by the exponent, - incremented by one to account for the normalizing shift. - It is faster to combine these operations: do the shift first - and combine the additions and subtractions. If x is the - original exponent, the result is: - shifted mantissa - (x << 19) + (1 << 19) + (x << 20) - or: - shifted mantissa + ((x + 1) << 19) - Note that the exponent is incremented here by leaving the - explicit "1.0" of the mantissa in the exponent field. */ - - /* Shift xh/xl right by one bit. Save the lsb of xl. */ - mov a10, xl - ssai 1 - src xl, xh, xl - srl xh, xh - - /* See explanation above. The original exponent is in a8. */ - addi a8, a8, 1 - slli a8, a8, 19 - add xh, xh, a8 - - /* Return an Infinity if the exponent overflowed. */ - ball xh, a6, .Ladd_infinity - - /* Same thing as the "round" code except the msb of the leftover - fraction is bit 0 of a10, with the rest of the fraction in a9. */ - bbci.l a10, 0, 1f - addi xl, xl, 1 - beqz xl, .Ladd_roundcarry - beqz a9, .Ladd_exactlyhalf -1: leaf_return - -.Ladd_infinity: - /* Clear the mantissa. */ - movi xl, 0 - srli xh, xh, 20 - slli xh, xh, 20 - - /* The sign bit may have been lost in a carry-out. Put it back. */ - slli a8, a8, 1 - or xh, xh, a8 - leaf_return - -.Ladd_exactlyhalf: - /* Round down to the nearest even value. */ - srli xl, xl, 1 - slli xl, xl, 1 - leaf_return - -.Ladd_roundcarry: - /* xl is always zero when the rounding increment overflows, so - there's no need to round it to an even value. */ - addi xh, xh, 1 - /* Overflow to the exponent is OK. */ - leaf_return - - - /* Subtraction */ -__subdf3_aux: - - /* Handle NaNs and Infinities. (This code is placed before the - start of the function just to keep it in range of the limited - branch displacements.) */ - -.Lsub_xnan_or_inf: - /* If y is neither Infinity nor NaN, return x. */ - bnall yh, a6, 1f - /* Both x and y are either NaN or Inf, so the result is NaN. */ - movi a4, 0x80000 /* make it a quiet NaN */ - or xh, xh, a4 -1: leaf_return - -.Lsub_ynan_or_inf: - /* Negate y and return it. */ - slli a7, a6, 11 - xor xh, yh, a7 - mov xl, yl - leaf_return - -.Lsub_opposite_signs: - /* Operand signs differ. Do an addition. */ - slli a7, a6, 11 - xor yh, yh, a7 - j .Ladd_same_sign - - .align 4 - .global __subdf3 - .type __subdf3, @function -__subdf3: - leaf_entry sp, 16 - movi a6, 0x7ff00000 - - /* Check if the two operands have the same sign. */ - xor a7, xh, yh - bltz a7, .Lsub_opposite_signs - -.Lsub_same_sign: - /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */ - ball xh, a6, .Lsub_xnan_or_inf - ball yh, a6, .Lsub_ynan_or_inf - - /* Compare the operands. In contrast to addition, the entire - value matters here. */ - extui a7, xh, 20, 11 - extui a8, yh, 20, 11 - bltu xh, yh, .Lsub_xsmaller - beq xh, yh, .Lsub_compare_low - -.Lsub_ysmaller: - /* Check if the smaller (or equal) exponent is zero. */ - bnone yh, a6, .Lsub_yexpzero - - /* Replace yh sign/exponent with 0x001. */ - or yh, yh, a6 - slli yh, yh, 11 - srli yh, yh, 11 - -.Lsub_yexpdiff: - /* Compute the exponent difference. Optimize for difference < 32. */ - sub a10, a7, a8 - bgeui a10, 32, .Lsub_bigshifty - - /* Shift yh/yl right by the exponent difference. Any bits that are - shifted out of yl are saved in a9 for rounding the result. */ - ssr a10 - movi a9, 0 - src a9, yl, a9 - src yl, yh, yl - srl yh, yh - -.Lsub_suby: - /* Do the 64-bit subtraction. */ - sub xh, xh, yh - bgeu xl, yl, 1f - addi xh, xh, -1 -1: sub xl, xl, yl - - /* Subtract the leftover bits in a9 from zero and propagate any - borrow from xh/xl. */ - neg a9, a9 - beqz a9, 1f - addi a5, xh, -1 - moveqz xh, a5, xl - addi xl, xl, -1 -1: - /* Check if the subtract underflowed into the exponent. */ - extui a10, xh, 20, 11 - beq a10, a7, .Lsub_round - j .Lsub_borrow - -.Lsub_compare_low: - /* The high words are equal. Compare the low words. */ - bltu xl, yl, .Lsub_xsmaller - bltu yl, xl, .Lsub_ysmaller - /* The operands are equal. Return 0.0. */ - movi xh, 0 - movi xl, 0 -1: leaf_return - -.Lsub_yexpzero: - /* y is a subnormal value. Replace its sign/exponent with zero, - i.e., no implicit "1.0". Unless x is also a subnormal, increment - y's apparent exponent because subnormals behave as if they had - the minimum (nonzero) exponent. */ - slli yh, yh, 12 - srli yh, yh, 12 - bnone xh, a6, .Lsub_yexpdiff - addi a8, a8, 1 - j .Lsub_yexpdiff - -.Lsub_bigshifty: - /* Exponent difference > 64 -- just return the bigger value. */ - bgeui a10, 64, 1b - - /* Shift yh/yl right by the exponent difference. Any bits that are - shifted out are saved in a9 for rounding the result. */ - ssr a10 - sll a11, yl /* lost bits shifted out of yl */ - src a9, yh, yl - srl yl, yh - movi yh, 0 - beqz a11, .Lsub_suby - or a9, a9, a10 /* any positive, nonzero value will work */ - j .Lsub_suby - -.Lsub_xsmaller: - /* Same thing as the "ysmaller" code, but with x and y swapped and - with y negated. */ - bnone xh, a6, .Lsub_xexpzero - - or xh, xh, a6 - slli xh, xh, 11 - srli xh, xh, 11 - -.Lsub_xexpdiff: - sub a10, a8, a7 - bgeui a10, 32, .Lsub_bigshiftx - - ssr a10 - movi a9, 0 - src a9, xl, a9 - src xl, xh, xl - srl xh, xh - - /* Negate y. */ - slli a11, a6, 11 - xor yh, yh, a11 - -.Lsub_subx: - sub xl, yl, xl - sub xh, yh, xh - bgeu yl, xl, 1f - addi xh, xh, -1 -1: - /* Subtract the leftover bits in a9 from zero and propagate any - borrow from xh/xl. */ - neg a9, a9 - beqz a9, 1f - addi a5, xh, -1 - moveqz xh, a5, xl - addi xl, xl, -1 -1: - /* Check if the subtract underflowed into the exponent. */ - extui a10, xh, 20, 11 - bne a10, a8, .Lsub_borrow - -.Lsub_round: - /* Round up if the leftover fraction is >= 1/2. */ - bgez a9, 1f - addi xl, xl, 1 - beqz xl, .Lsub_roundcarry - - /* Check if the leftover fraction is exactly 1/2. */ - slli a9, a9, 1 - beqz a9, .Lsub_exactlyhalf -1: leaf_return - -.Lsub_xexpzero: - /* Same as "yexpzero". */ - slli xh, xh, 12 - srli xh, xh, 12 - bnone yh, a6, .Lsub_xexpdiff - addi a7, a7, 1 - j .Lsub_xexpdiff - -.Lsub_bigshiftx: - /* Mostly the same thing as "bigshifty", but with the sign bit of the - shifted value set so that the subsequent subtraction flips the - sign of y. */ - bgeui a10, 64, .Lsub_returny - - ssr a10 - sll a11, xl - src a9, xh, xl - srl xl, xh - slli xh, a6, 11 /* set sign bit of xh */ - beqz a11, .Lsub_subx - or a9, a9, a10 - j .Lsub_subx - -.Lsub_returny: - /* Negate and return y. */ - slli a7, a6, 11 - xor xh, yh, a7 - mov xl, yl - leaf_return - -.Lsub_borrow: - /* The subtraction has underflowed into the exponent field, so the - value needs to be renormalized. Shift the mantissa left as - needed to remove any leading zeros and adjust the exponent - accordingly. If the exponent is not large enough to remove - all the leading zeros, the result will be a subnormal value. */ - - slli a8, xh, 12 - beqz a8, .Lsub_xhzero - do_nsau a6, a8, a7, a11 - srli a8, a8, 12 - bge a6, a10, .Lsub_subnormal - addi a6, a6, 1 - -.Lsub_shift_lt32: - /* Shift the mantissa (a8/xl/a9) left by a6. */ - ssl a6 - src a8, a8, xl - src xl, xl, a9 - sll a9, a9 - - /* Combine the shifted mantissa with the sign and exponent, - decrementing the exponent by a6. (The exponent has already - been decremented by one due to the borrow from the subtraction, - but adding the mantissa will increment the exponent by one.) */ - srli xh, xh, 20 - sub xh, xh, a6 - slli xh, xh, 20 - add xh, xh, a8 - j .Lsub_round - -.Lsub_exactlyhalf: - /* Round down to the nearest even value. */ - srli xl, xl, 1 - slli xl, xl, 1 - leaf_return - -.Lsub_roundcarry: - /* xl is always zero when the rounding increment overflows, so - there's no need to round it to an even value. */ - addi xh, xh, 1 - /* Overflow to the exponent is OK. */ - leaf_return - -.Lsub_xhzero: - /* When normalizing the result, all the mantissa bits in the high - word are zero. Shift by "20 + (leading zero count of xl) + 1". */ - do_nsau a6, xl, a7, a11 - addi a6, a6, 21 - blt a10, a6, .Lsub_subnormal - -.Lsub_normalize_shift: - bltui a6, 32, .Lsub_shift_lt32 - - ssl a6 - src a8, xl, a9 - sll xl, a9 - movi a9, 0 - - srli xh, xh, 20 - sub xh, xh, a6 - slli xh, xh, 20 - add xh, xh, a8 - j .Lsub_round - -.Lsub_subnormal: - /* The exponent is too small to shift away all the leading zeros. - Set a6 to the current exponent (which has already been - decremented by the borrow) so that the exponent of the result - will be zero. Do not add 1 to a6 in this case, because: (1) - adding the mantissa will not increment the exponent, so there is - no need to subtract anything extra from the exponent to - compensate, and (2) the effective exponent of a subnormal is 1 - not 0 so the shift amount must be 1 smaller than normal. */ - mov a6, a10 - j .Lsub_normalize_shift - -#endif /* L_addsubdf3 */ - -#ifdef L_muldf3 - - /* Multiplication */ -#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 -#define XCHAL_NO_MUL 1 -#endif - -__muldf3_aux: - - /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). - (This code is placed before the start of the function just to - keep it in range of the limited branch displacements.) */ - -.Lmul_xexpzero: - /* Clear the sign bit of x. */ - slli xh, xh, 1 - srli xh, xh, 1 - - /* If x is zero, return zero. */ - or a10, xh, xl - beqz a10, .Lmul_return_zero - - /* Normalize x. Adjust the exponent in a8. */ - beqz xh, .Lmul_xh_zero - do_nsau a10, xh, a11, a12 - addi a10, a10, -11 - ssl a10 - src xh, xh, xl - sll xl, xl - movi a8, 1 - sub a8, a8, a10 - j .Lmul_xnormalized -.Lmul_xh_zero: - do_nsau a10, xl, a11, a12 - addi a10, a10, -11 - movi a8, -31 - sub a8, a8, a10 - ssl a10 - bltz a10, .Lmul_xl_srl - sll xh, xl - movi xl, 0 - j .Lmul_xnormalized -.Lmul_xl_srl: - srl xh, xl - sll xl, xl - j .Lmul_xnormalized - -.Lmul_yexpzero: - /* Clear the sign bit of y. */ - slli yh, yh, 1 - srli yh, yh, 1 - - /* If y is zero, return zero. */ - or a10, yh, yl - beqz a10, .Lmul_return_zero - - /* Normalize y. Adjust the exponent in a9. */ - beqz yh, .Lmul_yh_zero - do_nsau a10, yh, a11, a12 - addi a10, a10, -11 - ssl a10 - src yh, yh, yl - sll yl, yl - movi a9, 1 - sub a9, a9, a10 - j .Lmul_ynormalized -.Lmul_yh_zero: - do_nsau a10, yl, a11, a12 - addi a10, a10, -11 - movi a9, -31 - sub a9, a9, a10 - ssl a10 - bltz a10, .Lmul_yl_srl - sll yh, yl - movi yl, 0 - j .Lmul_ynormalized -.Lmul_yl_srl: - srl yh, yl - sll yl, yl - j .Lmul_ynormalized - -.Lmul_return_zero: - /* Return zero with the appropriate sign bit. */ - srli xh, a7, 31 - slli xh, xh, 31 - movi xl, 0 - j .Lmul_done - -.Lmul_xnan_or_inf: - /* If y is zero, return NaN. */ - bnez yl, 1f - slli a8, yh, 1 - bnez a8, 1f - movi a4, 0x80000 /* make it a quiet NaN */ - or xh, xh, a4 - j .Lmul_done -1: - /* If y is NaN, return y. */ - bnall yh, a6, .Lmul_returnx - slli a8, yh, 12 - or a8, a8, yl - beqz a8, .Lmul_returnx - -.Lmul_returny: - mov xh, yh - mov xl, yl - -.Lmul_returnx: - /* Set the sign bit and return. */ - extui a7, a7, 31, 1 - slli xh, xh, 1 - ssai 1 - src xh, a7, xh - j .Lmul_done - -.Lmul_ynan_or_inf: - /* If x is zero, return NaN. */ - bnez xl, .Lmul_returny - slli a8, xh, 1 - bnez a8, .Lmul_returny - movi a7, 0x80000 /* make it a quiet NaN */ - or xh, yh, a7 - j .Lmul_done - - .align 4 - .global __muldf3 - .type __muldf3, @function -__muldf3: -#if __XTENSA_CALL0_ABI__ - leaf_entry sp, 32 - addi sp, sp, -32 - s32i a12, sp, 16 - s32i a13, sp, 20 - s32i a14, sp, 24 - s32i a15, sp, 28 -#elif XCHAL_NO_MUL - /* This is not really a leaf function; allocate enough stack space - to allow CALL12s to a helper function. */ - leaf_entry sp, 64 -#else - leaf_entry sp, 32 -#endif - movi a6, 0x7ff00000 - - /* Get the sign of the result. */ - xor a7, xh, yh - - /* Check for NaN and infinity. */ - ball xh, a6, .Lmul_xnan_or_inf - ball yh, a6, .Lmul_ynan_or_inf - - /* Extract the exponents. */ - extui a8, xh, 20, 11 - extui a9, yh, 20, 11 - - beqz a8, .Lmul_xexpzero -.Lmul_xnormalized: - beqz a9, .Lmul_yexpzero -.Lmul_ynormalized: - - /* Add the exponents. */ - add a8, a8, a9 - - /* Replace sign/exponent fields with explicit "1.0". */ - movi a10, 0x1fffff - or xh, xh, a6 - and xh, xh, a10 - or yh, yh, a6 - and yh, yh, a10 - - /* Multiply 64x64 to 128 bits. The result ends up in xh/xl/a6. - The least-significant word of the result is thrown away except - that if it is nonzero, the lsb of a6 is set to 1. */ -#if XCHAL_HAVE_MUL32_HIGH - - /* Compute a6 with any carry-outs in a10. */ - movi a10, 0 - mull a6, xl, yh - mull a11, xh, yl - add a6, a6, a11 - bgeu a6, a11, 1f - addi a10, a10, 1 -1: - muluh a11, xl, yl - add a6, a6, a11 - bgeu a6, a11, 1f - addi a10, a10, 1 -1: - /* If the low word of the result is nonzero, set the lsb of a6. */ - mull a11, xl, yl - beqz a11, 1f - movi a9, 1 - or a6, a6, a9 -1: - /* Compute xl with any carry-outs in a9. */ - movi a9, 0 - mull a11, xh, yh - add a10, a10, a11 - bgeu a10, a11, 1f - addi a9, a9, 1 -1: - muluh a11, xh, yl - add a10, a10, a11 - bgeu a10, a11, 1f - addi a9, a9, 1 -1: - muluh xl, xl, yh - add xl, xl, a10 - bgeu xl, a10, 1f - addi a9, a9, 1 -1: - /* Compute xh. */ - muluh xh, xh, yh - add xh, xh, a9 - -#else /* ! XCHAL_HAVE_MUL32_HIGH */ - - /* Break the inputs into 16-bit chunks and compute 16 32-bit partial - products. These partial products are: - - 0 xll * yll - - 1 xll * ylh - 2 xlh * yll - - 3 xll * yhl - 4 xlh * ylh - 5 xhl * yll - - 6 xll * yhh - 7 xlh * yhl - 8 xhl * ylh - 9 xhh * yll - - 10 xlh * yhh - 11 xhl * yhl - 12 xhh * ylh - - 13 xhl * yhh - 14 xhh * yhl - - 15 xhh * yhh - - where the input chunks are (hh, hl, lh, ll). If using the Mul16 - or Mul32 multiplier options, these input chunks must be stored in - separate registers. For Mac16, the UMUL.AA.* opcodes can specify - that the inputs come from either half of the registers, so there - is no need to shift them out ahead of time. If there is no - multiply hardware, the 16-bit chunks can be extracted when setting - up the arguments to the separate multiply function. */ - - /* Save a7 since it is needed to hold a temporary value. */ - s32i a7, sp, 4 -#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL - /* Calling a separate multiply function will clobber a0 and requires - use of a8 as a temporary, so save those values now. (The function - uses a custom ABI so nothing else needs to be saved.) */ - s32i a0, sp, 0 - s32i a8, sp, 8 -#endif - -#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 - -#define xlh a12 -#define ylh a13 -#define xhh a14 -#define yhh a15 - - /* Get the high halves of the inputs into registers. */ - srli xlh, xl, 16 - srli ylh, yl, 16 - srli xhh, xh, 16 - srli yhh, yh, 16 - -#define xll xl -#define yll yl -#define xhl xh -#define yhl yh - -#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16 - /* Clear the high halves of the inputs. This does not matter - for MUL16 because the high bits are ignored. */ - extui xl, xl, 0, 16 - extui xh, xh, 0, 16 - extui yl, yl, 0, 16 - extui yh, yh, 0, 16 -#endif -#endif /* MUL16 || MUL32 */ - - -#if XCHAL_HAVE_MUL16 - -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - mul16u dst, xreg ## xhalf, yreg ## yhalf - -#elif XCHAL_HAVE_MUL32 - -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - mull dst, xreg ## xhalf, yreg ## yhalf - -#elif XCHAL_HAVE_MAC16 - -/* The preprocessor insists on inserting a space when concatenating after - a period in the definition of do_mul below. These macros are a workaround - using underscores instead of periods when doing the concatenation. */ -#define umul_aa_ll umul.aa.ll -#define umul_aa_lh umul.aa.lh -#define umul_aa_hl umul.aa.hl -#define umul_aa_hh umul.aa.hh - -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - umul_aa_ ## xhalf ## yhalf xreg, yreg; \ - rsr dst, ACCLO - -#else /* no multiply hardware */ - -#define set_arg_l(dst, src) \ - extui dst, src, 0, 16 -#define set_arg_h(dst, src) \ - srli dst, src, 16 - -#if __XTENSA_CALL0_ABI__ -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - set_arg_ ## xhalf (a13, xreg); \ - set_arg_ ## yhalf (a14, yreg); \ - call0 .Lmul_mulsi3; \ - mov dst, a12 -#else -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - set_arg_ ## xhalf (a14, xreg); \ - set_arg_ ## yhalf (a15, yreg); \ - call12 .Lmul_mulsi3; \ - mov dst, a14 -#endif /* __XTENSA_CALL0_ABI__ */ - -#endif /* no multiply hardware */ - - /* Add pp1 and pp2 into a10 with carry-out in a9. */ - do_mul(a10, xl, l, yl, h) /* pp 1 */ - do_mul(a11, xl, h, yl, l) /* pp 2 */ - movi a9, 0 - add a10, a10, a11 - bgeu a10, a11, 1f - addi a9, a9, 1 -1: - /* Initialize a6 with a9/a10 shifted into position. Note that - this value can be safely incremented without any carry-outs. */ - ssai 16 - src a6, a9, a10 - - /* Compute the low word into a10. */ - do_mul(a11, xl, l, yl, l) /* pp 0 */ - sll a10, a10 - add a10, a10, a11 - bgeu a10, a11, 1f - addi a6, a6, 1 -1: - /* Compute the contributions of pp0-5 to a6, with carry-outs in a9. - This is good enough to determine the low half of a6, so that any - nonzero bits from the low word of the result can be collapsed - into a6, freeing up a register. */ - movi a9, 0 - do_mul(a11, xl, l, yh, l) /* pp 3 */ - add a6, a6, a11 - bgeu a6, a11, 1f - addi a9, a9, 1 -1: - do_mul(a11, xl, h, yl, h) /* pp 4 */ - add a6, a6, a11 - bgeu a6, a11, 1f - addi a9, a9, 1 -1: - do_mul(a11, xh, l, yl, l) /* pp 5 */ - add a6, a6, a11 - bgeu a6, a11, 1f - addi a9, a9, 1 -1: - /* Collapse any nonzero bits from the low word into a6. */ - beqz a10, 1f - movi a11, 1 - or a6, a6, a11 -1: - /* Add pp6-9 into a11 with carry-outs in a10. */ - do_mul(a7, xl, l, yh, h) /* pp 6 */ - do_mul(a11, xh, h, yl, l) /* pp 9 */ - movi a10, 0 - add a11, a11, a7 - bgeu a11, a7, 1f - addi a10, a10, 1 -1: - do_mul(a7, xl, h, yh, l) /* pp 7 */ - add a11, a11, a7 - bgeu a11, a7, 1f - addi a10, a10, 1 -1: - do_mul(a7, xh, l, yl, h) /* pp 8 */ - add a11, a11, a7 - bgeu a11, a7, 1f - addi a10, a10, 1 -1: - /* Shift a10/a11 into position, and add low half of a11 to a6. */ - src a10, a10, a11 - add a10, a10, a9 - sll a11, a11 - add a6, a6, a11 - bgeu a6, a11, 1f - addi a10, a10, 1 -1: - /* Add pp10-12 into xl with carry-outs in a9. */ - movi a9, 0 - do_mul(xl, xl, h, yh, h) /* pp 10 */ - add xl, xl, a10 - bgeu xl, a10, 1f - addi a9, a9, 1 -1: - do_mul(a10, xh, l, yh, l) /* pp 11 */ - add xl, xl, a10 - bgeu xl, a10, 1f - addi a9, a9, 1 -1: - do_mul(a10, xh, h, yl, h) /* pp 12 */ - add xl, xl, a10 - bgeu xl, a10, 1f - addi a9, a9, 1 -1: - /* Add pp13-14 into a11 with carry-outs in a10. */ - do_mul(a11, xh, l, yh, h) /* pp 13 */ - do_mul(a7, xh, h, yh, l) /* pp 14 */ - movi a10, 0 - add a11, a11, a7 - bgeu a11, a7, 1f - addi a10, a10, 1 -1: - /* Shift a10/a11 into position, and add low half of a11 to a6. */ - src a10, a10, a11 - add a10, a10, a9 - sll a11, a11 - add xl, xl, a11 - bgeu xl, a11, 1f - addi a10, a10, 1 -1: - /* Compute xh. */ - do_mul(xh, xh, h, yh, h) /* pp 15 */ - add xh, xh, a10 - - /* Restore values saved on the stack during the multiplication. */ - l32i a7, sp, 4 -#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL - l32i a0, sp, 0 - l32i a8, sp, 8 -#endif -#endif /* ! XCHAL_HAVE_MUL32_HIGH */ - - /* Shift left by 12 bits, unless there was a carry-out from the - multiply, in which case, shift by 11 bits and increment the - exponent. Note: It is convenient to use the constant 0x3ff - instead of 0x400 when removing the extra exponent bias (so that - it is easy to construct 0x7fe for the overflow check). Reverse - the logic here to decrement the exponent sum by one unless there - was a carry-out. */ - movi a4, 11 - srli a5, xh, 21 - 12 - bnez a5, 1f - addi a4, a4, 1 - addi a8, a8, -1 -1: ssl a4 - src xh, xh, xl - src xl, xl, a6 - sll a6, a6 - - /* Subtract the extra bias from the exponent sum (plus one to account - for the explicit "1.0" of the mantissa that will be added to the - exponent in the final result). */ - movi a4, 0x3ff - sub a8, a8, a4 - - /* Check for over/underflow. The value in a8 is one less than the - final exponent, so values in the range 0..7fd are OK here. */ - slli a4, a4, 1 /* 0x7fe */ - bgeu a8, a4, .Lmul_overflow - -.Lmul_round: - /* Round. */ - bgez a6, .Lmul_rounded - addi xl, xl, 1 - beqz xl, .Lmul_roundcarry - slli a6, a6, 1 - beqz a6, .Lmul_exactlyhalf - -.Lmul_rounded: - /* Add the exponent to the mantissa. */ - slli a8, a8, 20 - add xh, xh, a8 - -.Lmul_addsign: - /* Add the sign bit. */ - srli a7, a7, 31 - slli a7, a7, 31 - or xh, xh, a7 - -.Lmul_done: -#if __XTENSA_CALL0_ABI__ - l32i a12, sp, 16 - l32i a13, sp, 20 - l32i a14, sp, 24 - l32i a15, sp, 28 - addi sp, sp, 32 -#endif - leaf_return - -.Lmul_exactlyhalf: - /* Round down to the nearest even value. */ - srli xl, xl, 1 - slli xl, xl, 1 - j .Lmul_rounded - -.Lmul_roundcarry: - /* xl is always zero when the rounding increment overflows, so - there's no need to round it to an even value. */ - addi xh, xh, 1 - /* Overflow is OK -- it will be added to the exponent. */ - j .Lmul_rounded - -.Lmul_overflow: - bltz a8, .Lmul_underflow - /* Return +/- Infinity. */ - addi a8, a4, 1 /* 0x7ff */ - slli xh, a8, 20 - movi xl, 0 - j .Lmul_addsign - -.Lmul_underflow: - /* Create a subnormal value, where the exponent field contains zero, - but the effective exponent is 1. The value of a8 is one less than - the actual exponent, so just negate it to get the shift amount. */ - neg a8, a8 - mov a9, a6 - ssr a8 - bgeui a8, 32, .Lmul_bigshift - - /* Shift xh/xl right. Any bits that are shifted out of xl are saved - in a6 (combined with the shifted-out bits currently in a6) for - rounding the result. */ - sll a6, xl - src xl, xh, xl - srl xh, xh - j 1f - -.Lmul_bigshift: - bgeui a8, 64, .Lmul_flush_to_zero - sll a10, xl /* lost bits shifted out of xl */ - src a6, xh, xl - srl xl, xh - movi xh, 0 - or a9, a9, a10 - - /* Set the exponent to zero. */ -1: movi a8, 0 - - /* Pack any nonzero bits shifted out into a6. */ - beqz a9, .Lmul_round - movi a9, 1 - or a6, a6, a9 - j .Lmul_round - -.Lmul_flush_to_zero: - /* Return zero with the appropriate sign bit. */ - srli xh, a7, 31 - slli xh, xh, 31 - movi xl, 0 - j .Lmul_done - -#if XCHAL_NO_MUL - - /* For Xtensa processors with no multiply hardware, this simplified - version of _mulsi3 is used for multiplying 16-bit chunks of - the floating-point mantissas. When using CALL0, this function - uses a custom ABI: the inputs are passed in a13 and a14, the - result is returned in a12, and a8 and a15 are clobbered. */ - .align 4 -.Lmul_mulsi3: - leaf_entry sp, 16 - .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2 - movi \dst, 0 -1: add \tmp1, \src2, \dst - extui \tmp2, \src1, 0, 1 - movnez \dst, \tmp1, \tmp2 - - do_addx2 \tmp1, \src2, \dst, \tmp1 - extui \tmp2, \src1, 1, 1 - movnez \dst, \tmp1, \tmp2 - - do_addx4 \tmp1, \src2, \dst, \tmp1 - extui \tmp2, \src1, 2, 1 - movnez \dst, \tmp1, \tmp2 - - do_addx8 \tmp1, \src2, \dst, \tmp1 - extui \tmp2, \src1, 3, 1 - movnez \dst, \tmp1, \tmp2 - - srli \src1, \src1, 4 - slli \src2, \src2, 4 - bnez \src1, 1b - .endm -#if __XTENSA_CALL0_ABI__ - mul_mulsi3_body a12, a13, a14, a15, a8 -#else - /* The result will be written into a2, so save that argument in a4. */ - mov a4, a2 - mul_mulsi3_body a2, a4, a3, a5, a6 -#endif - leaf_return -#endif /* XCHAL_NO_MUL */ -#endif /* L_muldf3 */ - -#ifdef L_divdf3 - - /* Division */ -__divdf3_aux: - - /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). - (This code is placed before the start of the function just to - keep it in range of the limited branch displacements.) */ - -.Ldiv_yexpzero: - /* Clear the sign bit of y. */ - slli yh, yh, 1 - srli yh, yh, 1 - - /* Check for division by zero. */ - or a10, yh, yl - beqz a10, .Ldiv_yzero - - /* Normalize y. Adjust the exponent in a9. */ - beqz yh, .Ldiv_yh_zero - do_nsau a10, yh, a11, a9 - addi a10, a10, -11 - ssl a10 - src yh, yh, yl - sll yl, yl - movi a9, 1 - sub a9, a9, a10 - j .Ldiv_ynormalized -.Ldiv_yh_zero: - do_nsau a10, yl, a11, a9 - addi a10, a10, -11 - movi a9, -31 - sub a9, a9, a10 - ssl a10 - bltz a10, .Ldiv_yl_srl - sll yh, yl - movi yl, 0 - j .Ldiv_ynormalized -.Ldiv_yl_srl: - srl yh, yl - sll yl, yl - j .Ldiv_ynormalized - -.Ldiv_yzero: - /* y is zero. Return NaN if x is also zero; otherwise, infinity. */ - slli xh, xh, 1 - srli xh, xh, 1 - or xl, xl, xh - srli xh, a7, 31 - slli xh, xh, 31 - or xh, xh, a6 - bnez xl, 1f - movi a4, 0x80000 /* make it a quiet NaN */ - or xh, xh, a4 -1: movi xl, 0 - leaf_return - -.Ldiv_xexpzero: - /* Clear the sign bit of x. */ - slli xh, xh, 1 - srli xh, xh, 1 - - /* If x is zero, return zero. */ - or a10, xh, xl - beqz a10, .Ldiv_return_zero - - /* Normalize x. Adjust the exponent in a8. */ - beqz xh, .Ldiv_xh_zero - do_nsau a10, xh, a11, a8 - addi a10, a10, -11 - ssl a10 - src xh, xh, xl - sll xl, xl - movi a8, 1 - sub a8, a8, a10 - j .Ldiv_xnormalized -.Ldiv_xh_zero: - do_nsau a10, xl, a11, a8 - addi a10, a10, -11 - movi a8, -31 - sub a8, a8, a10 - ssl a10 - bltz a10, .Ldiv_xl_srl - sll xh, xl - movi xl, 0 - j .Ldiv_xnormalized -.Ldiv_xl_srl: - srl xh, xl - sll xl, xl - j .Ldiv_xnormalized - -.Ldiv_return_zero: - /* Return zero with the appropriate sign bit. */ - srli xh, a7, 31 - slli xh, xh, 31 - movi xl, 0 - leaf_return - -.Ldiv_xnan_or_inf: - /* Set the sign bit of the result. */ - srli a7, yh, 31 - slli a7, a7, 31 - xor xh, xh, a7 - /* If y is NaN or Inf, return NaN. */ - bnall yh, a6, 1f - movi a4, 0x80000 /* make it a quiet NaN */ - or xh, xh, a4 -1: leaf_return - -.Ldiv_ynan_or_inf: - /* If y is Infinity, return zero. */ - slli a8, yh, 12 - or a8, a8, yl - beqz a8, .Ldiv_return_zero - /* y is NaN; return it. */ - mov xh, yh - mov xl, yl - leaf_return - -.Ldiv_highequal1: - bltu xl, yl, 2f - j 3f - - .align 4 - .global __divdf3 - .type __divdf3, @function -__divdf3: - leaf_entry sp, 16 - movi a6, 0x7ff00000 - - /* Get the sign of the result. */ - xor a7, xh, yh - - /* Check for NaN and infinity. */ - ball xh, a6, .Ldiv_xnan_or_inf - ball yh, a6, .Ldiv_ynan_or_inf - - /* Extract the exponents. */ - extui a8, xh, 20, 11 - extui a9, yh, 20, 11 - - beqz a9, .Ldiv_yexpzero -.Ldiv_ynormalized: - beqz a8, .Ldiv_xexpzero -.Ldiv_xnormalized: - - /* Subtract the exponents. */ - sub a8, a8, a9 - - /* Replace sign/exponent fields with explicit "1.0". */ - movi a10, 0x1fffff - or xh, xh, a6 - and xh, xh, a10 - or yh, yh, a6 - and yh, yh, a10 - - /* Set SAR for left shift by one. */ - ssai (32 - 1) - - /* The first digit of the mantissa division must be a one. - Shift x (and adjust the exponent) as needed to make this true. */ - bltu yh, xh, 3f - beq yh, xh, .Ldiv_highequal1 -2: src xh, xh, xl - sll xl, xl - addi a8, a8, -1 -3: - /* Do the first subtraction and shift. */ - sub xh, xh, yh - bgeu xl, yl, 1f - addi xh, xh, -1 -1: sub xl, xl, yl - src xh, xh, xl - sll xl, xl - - /* Put the quotient into a10/a11. */ - movi a10, 0 - movi a11, 1 - - /* Divide one bit at a time for 52 bits. */ - movi a9, 52 -#if XCHAL_HAVE_LOOPS - loop a9, .Ldiv_loopend -#endif -.Ldiv_loop: - /* Shift the quotient << 1. */ - src a10, a10, a11 - sll a11, a11 - - /* Is this digit a 0 or 1? */ - bltu xh, yh, 3f - beq xh, yh, .Ldiv_highequal2 - - /* Output a 1 and subtract. */ -2: addi a11, a11, 1 - sub xh, xh, yh - bgeu xl, yl, 1f - addi xh, xh, -1 -1: sub xl, xl, yl - - /* Shift the dividend << 1. */ -3: src xh, xh, xl - sll xl, xl - -#if !XCHAL_HAVE_LOOPS - addi a9, a9, -1 - bnez a9, .Ldiv_loop -#endif -.Ldiv_loopend: - - /* Add the exponent bias (less one to account for the explicit "1.0" - of the mantissa that will be added to the exponent in the final - result). */ - movi a9, 0x3fe - add a8, a8, a9 - - /* Check for over/underflow. The value in a8 is one less than the - final exponent, so values in the range 0..7fd are OK here. */ - addmi a9, a9, 0x400 /* 0x7fe */ - bgeu a8, a9, .Ldiv_overflow - -.Ldiv_round: - /* Round. The remainder (<< 1) is in xh/xl. */ - bltu xh, yh, .Ldiv_rounded - beq xh, yh, .Ldiv_highequal3 -.Ldiv_roundup: - addi a11, a11, 1 - beqz a11, .Ldiv_roundcarry - -.Ldiv_rounded: - mov xl, a11 - /* Add the exponent to the mantissa. */ - slli a8, a8, 20 - add xh, a10, a8 - -.Ldiv_addsign: - /* Add the sign bit. */ - srli a7, a7, 31 - slli a7, a7, 31 - or xh, xh, a7 - leaf_return - -.Ldiv_highequal2: - bgeu xl, yl, 2b - j 3b - -.Ldiv_highequal3: - bltu xl, yl, .Ldiv_rounded - bne xl, yl, .Ldiv_roundup - - /* Remainder is exactly half the divisor. Round even. */ - addi a11, a11, 1 - beqz a11, .Ldiv_roundcarry - srli a11, a11, 1 - slli a11, a11, 1 - j .Ldiv_rounded - -.Ldiv_overflow: - bltz a8, .Ldiv_underflow - /* Return +/- Infinity. */ - addi a8, a9, 1 /* 0x7ff */ - slli xh, a8, 20 - movi xl, 0 - j .Ldiv_addsign - -.Ldiv_underflow: - /* Create a subnormal value, where the exponent field contains zero, - but the effective exponent is 1. The value of a8 is one less than - the actual exponent, so just negate it to get the shift amount. */ - neg a8, a8 - ssr a8 - bgeui a8, 32, .Ldiv_bigshift - - /* Shift a10/a11 right. Any bits that are shifted out of a11 are - saved in a6 for rounding the result. */ - sll a6, a11 - src a11, a10, a11 - srl a10, a10 - j 1f - -.Ldiv_bigshift: - bgeui a8, 64, .Ldiv_flush_to_zero - sll a9, a11 /* lost bits shifted out of a11 */ - src a6, a10, a11 - srl a11, a10 - movi a10, 0 - or xl, xl, a9 - - /* Set the exponent to zero. */ -1: movi a8, 0 - - /* Pack any nonzero remainder (in xh/xl) into a6. */ - or xh, xh, xl - beqz xh, 1f - movi a9, 1 - or a6, a6, a9 - - /* Round a10/a11 based on the bits shifted out into a6. */ -1: bgez a6, .Ldiv_rounded - addi a11, a11, 1 - beqz a11, .Ldiv_roundcarry - slli a6, a6, 1 - bnez a6, .Ldiv_rounded - srli a11, a11, 1 - slli a11, a11, 1 - j .Ldiv_rounded - -.Ldiv_roundcarry: - /* a11 is always zero when the rounding increment overflows, so - there's no need to round it to an even value. */ - addi a10, a10, 1 - /* Overflow to the exponent field is OK. */ - j .Ldiv_rounded - -.Ldiv_flush_to_zero: - /* Return zero with the appropriate sign bit. */ - srli xh, a7, 31 - slli xh, xh, 31 - movi xl, 0 - leaf_return - -#endif /* L_divdf3 */ - -#ifdef L_cmpdf2 - - /* Equal and Not Equal */ - - .align 4 - .global __eqdf2 - .global __nedf2 - .set __nedf2, __eqdf2 - .type __eqdf2, @function -__eqdf2: - leaf_entry sp, 16 - bne xl, yl, 2f - bne xh, yh, 4f - - /* The values are equal but NaN != NaN. Check the exponent. */ - movi a6, 0x7ff00000 - ball xh, a6, 3f - - /* Equal. */ - movi a2, 0 - leaf_return - - /* Not equal. */ -2: movi a2, 1 - leaf_return - - /* Check if the mantissas are nonzero. */ -3: slli a7, xh, 12 - or a7, a7, xl - j 5f - - /* Check if x and y are zero with different signs. */ -4: or a7, xh, yh - slli a7, a7, 1 - or a7, a7, xl /* xl == yl here */ - - /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa - or x when exponent(x) = 0x7ff and x == y. */ -5: movi a2, 0 - movi a3, 1 - movnez a2, a3, a7 - leaf_return - - - /* Greater Than */ - - .align 4 - .global __gtdf2 - .type __gtdf2, @function -__gtdf2: - leaf_entry sp, 16 - movi a6, 0x7ff00000 - ball xh, a6, 2f -1: bnall yh, a6, .Lle_cmp - - /* Check if y is a NaN. */ - slli a7, yh, 12 - or a7, a7, yl - beqz a7, .Lle_cmp - movi a2, 0 - leaf_return - - /* Check if x is a NaN. */ -2: slli a7, xh, 12 - or a7, a7, xl - beqz a7, 1b - movi a2, 0 - leaf_return - - - /* Less Than or Equal */ - - .align 4 - .global __ledf2 - .type __ledf2, @function -__ledf2: - leaf_entry sp, 16 - movi a6, 0x7ff00000 - ball xh, a6, 2f -1: bnall yh, a6, .Lle_cmp - - /* Check if y is a NaN. */ - slli a7, yh, 12 - or a7, a7, yl - beqz a7, .Lle_cmp - movi a2, 1 - leaf_return - - /* Check if x is a NaN. */ -2: slli a7, xh, 12 - or a7, a7, xl - beqz a7, 1b - movi a2, 1 - leaf_return - -.Lle_cmp: - /* Check if x and y have different signs. */ - xor a7, xh, yh - bltz a7, .Lle_diff_signs - - /* Check if x is negative. */ - bltz xh, .Lle_xneg - - /* Check if x <= y. */ - bltu xh, yh, 4f - bne xh, yh, 5f - bltu yl, xl, 5f -4: movi a2, 0 - leaf_return - -.Lle_xneg: - /* Check if y <= x. */ - bltu yh, xh, 4b - bne yh, xh, 5f - bgeu xl, yl, 4b -5: movi a2, 1 - leaf_return - -.Lle_diff_signs: - bltz xh, 4b - - /* Check if both x and y are zero. */ - or a7, xh, yh - slli a7, a7, 1 - or a7, a7, xl - or a7, a7, yl - movi a2, 1 - movi a3, 0 - moveqz a2, a3, a7 - leaf_return - - - /* Greater Than or Equal */ - - .align 4 - .global __gedf2 - .type __gedf2, @function -__gedf2: - leaf_entry sp, 16 - movi a6, 0x7ff00000 - ball xh, a6, 2f -1: bnall yh, a6, .Llt_cmp - - /* Check if y is a NaN. */ - slli a7, yh, 12 - or a7, a7, yl - beqz a7, .Llt_cmp - movi a2, -1 - leaf_return - - /* Check if x is a NaN. */ -2: slli a7, xh, 12 - or a7, a7, xl - beqz a7, 1b - movi a2, -1 - leaf_return - - - /* Less Than */ - - .align 4 - .global __ltdf2 - .type __ltdf2, @function -__ltdf2: - leaf_entry sp, 16 - movi a6, 0x7ff00000 - ball xh, a6, 2f -1: bnall yh, a6, .Llt_cmp - - /* Check if y is a NaN. */ - slli a7, yh, 12 - or a7, a7, yl - beqz a7, .Llt_cmp - movi a2, 0 - leaf_return - - /* Check if x is a NaN. */ -2: slli a7, xh, 12 - or a7, a7, xl - beqz a7, 1b - movi a2, 0 - leaf_return - -.Llt_cmp: - /* Check if x and y have different signs. */ - xor a7, xh, yh - bltz a7, .Llt_diff_signs - - /* Check if x is negative. */ - bltz xh, .Llt_xneg - - /* Check if x < y. */ - bltu xh, yh, 4f - bne xh, yh, 5f - bgeu xl, yl, 5f -4: movi a2, -1 - leaf_return - -.Llt_xneg: - /* Check if y < x. */ - bltu yh, xh, 4b - bne yh, xh, 5f - bltu yl, xl, 4b -5: movi a2, 0 - leaf_return - -.Llt_diff_signs: - bgez xh, 5b - - /* Check if both x and y are nonzero. */ - or a7, xh, yh - slli a7, a7, 1 - or a7, a7, xl - or a7, a7, yl - movi a2, 0 - movi a3, -1 - movnez a2, a3, a7 - leaf_return - - - /* Unordered */ - - .align 4 - .global __unorddf2 - .type __unorddf2, @function -__unorddf2: - leaf_entry sp, 16 - movi a6, 0x7ff00000 - ball xh, a6, 3f -1: ball yh, a6, 4f -2: movi a2, 0 - leaf_return - -3: slli a7, xh, 12 - or a7, a7, xl - beqz a7, 1b - movi a2, 1 - leaf_return - -4: slli a7, yh, 12 - or a7, a7, yl - beqz a7, 2b - movi a2, 1 - leaf_return - -#endif /* L_cmpdf2 */ - -#ifdef L_fixdfsi - - .align 4 - .global __fixdfsi - .type __fixdfsi, @function -__fixdfsi: - leaf_entry sp, 16 - - /* Check for NaN and Infinity. */ - movi a6, 0x7ff00000 - ball xh, a6, .Lfixdfsi_nan_or_inf - - /* Extract the exponent and check if 0 < (exp - 0x3fe) < 32. */ - extui a4, xh, 20, 11 - extui a5, a6, 19, 10 /* 0x3fe */ - sub a4, a4, a5 - bgei a4, 32, .Lfixdfsi_maxint - blti a4, 1, .Lfixdfsi_zero - - /* Add explicit "1.0" and shift << 11. */ - or a7, xh, a6 - ssai (32 - 11) - src a5, a7, xl - - /* Shift back to the right, based on the exponent. */ - ssl a4 /* shift by 32 - a4 */ - srl a5, a5 - - /* Negate the result if sign != 0. */ - neg a2, a5 - movgez a2, a5, a7 - leaf_return - -.Lfixdfsi_nan_or_inf: - /* Handle Infinity and NaN. */ - slli a4, xh, 12 - or a4, a4, xl - beqz a4, .Lfixdfsi_maxint - - /* Translate NaN to +maxint. */ - movi xh, 0 - -.Lfixdfsi_maxint: - slli a4, a6, 11 /* 0x80000000 */ - addi a5, a4, -1 /* 0x7fffffff */ - movgez a4, a5, xh - mov a2, a4 - leaf_return - -.Lfixdfsi_zero: - movi a2, 0 - leaf_return - -#endif /* L_fixdfsi */ - -#ifdef L_fixdfdi - - .align 4 - .global __fixdfdi - .type __fixdfdi, @function -__fixdfdi: - leaf_entry sp, 16 - - /* Check for NaN and Infinity. */ - movi a6, 0x7ff00000 - ball xh, a6, .Lfixdfdi_nan_or_inf - - /* Extract the exponent and check if 0 < (exp - 0x3fe) < 64. */ - extui a4, xh, 20, 11 - extui a5, a6, 19, 10 /* 0x3fe */ - sub a4, a4, a5 - bgei a4, 64, .Lfixdfdi_maxint - blti a4, 1, .Lfixdfdi_zero - - /* Add explicit "1.0" and shift << 11. */ - or a7, xh, a6 - ssai (32 - 11) - src xh, a7, xl - sll xl, xl - - /* Shift back to the right, based on the exponent. */ - ssl a4 /* shift by 64 - a4 */ - bgei a4, 32, .Lfixdfdi_smallshift - srl xl, xh - movi xh, 0 - -.Lfixdfdi_shifted: - /* Negate the result if sign != 0. */ - bgez a7, 1f - neg xl, xl - neg xh, xh - beqz xl, 1f - addi xh, xh, -1 -1: leaf_return - -.Lfixdfdi_smallshift: - src xl, xh, xl - srl xh, xh - j .Lfixdfdi_shifted - -.Lfixdfdi_nan_or_inf: - /* Handle Infinity and NaN. */ - slli a4, xh, 12 - or a4, a4, xl - beqz a4, .Lfixdfdi_maxint - - /* Translate NaN to +maxint. */ - movi xh, 0 - -.Lfixdfdi_maxint: - slli a7, a6, 11 /* 0x80000000 */ - bgez xh, 1f - mov xh, a7 - movi xl, 0 - leaf_return - -1: addi xh, a7, -1 /* 0x7fffffff */ - movi xl, -1 - leaf_return - -.Lfixdfdi_zero: - movi xh, 0 - movi xl, 0 - leaf_return - -#endif /* L_fixdfdi */ - -#ifdef L_fixunsdfsi - - .align 4 - .global __fixunsdfsi - .type __fixunsdfsi, @function -__fixunsdfsi: - leaf_entry sp, 16 - - /* Check for NaN and Infinity. */ - movi a6, 0x7ff00000 - ball xh, a6, .Lfixunsdfsi_nan_or_inf - - /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32. */ - extui a4, xh, 20, 11 - extui a5, a6, 20, 10 /* 0x3ff */ - sub a4, a4, a5 - bgei a4, 32, .Lfixunsdfsi_maxint - bltz a4, .Lfixunsdfsi_zero - - /* Add explicit "1.0" and shift << 11. */ - or a7, xh, a6 - ssai (32 - 11) - src a5, a7, xl - - /* Shift back to the right, based on the exponent. */ - addi a4, a4, 1 - beqi a4, 32, .Lfixunsdfsi_bigexp - ssl a4 /* shift by 32 - a4 */ - srl a5, a5 - - /* Negate the result if sign != 0. */ - neg a2, a5 - movgez a2, a5, a7 - leaf_return - -.Lfixunsdfsi_nan_or_inf: - /* Handle Infinity and NaN. */ - slli a4, xh, 12 - or a4, a4, xl - beqz a4, .Lfixunsdfsi_maxint - - /* Translate NaN to 0xffffffff. */ - movi a2, -1 - leaf_return - -.Lfixunsdfsi_maxint: - slli a4, a6, 11 /* 0x80000000 */ - movi a5, -1 /* 0xffffffff */ - movgez a4, a5, xh - mov a2, a4 - leaf_return - -.Lfixunsdfsi_zero: - movi a2, 0 - leaf_return - -.Lfixunsdfsi_bigexp: - /* Handle unsigned maximum exponent case. */ - bltz xh, 1f - mov a2, a5 /* no shift needed */ - leaf_return - - /* Return 0x80000000 if negative. */ -1: slli a2, a6, 11 - leaf_return - -#endif /* L_fixunsdfsi */ - -#ifdef L_fixunsdfdi - - .align 4 - .global __fixunsdfdi - .type __fixunsdfdi, @function -__fixunsdfdi: - leaf_entry sp, 16 - - /* Check for NaN and Infinity. */ - movi a6, 0x7ff00000 - ball xh, a6, .Lfixunsdfdi_nan_or_inf - - /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64. */ - extui a4, xh, 20, 11 - extui a5, a6, 20, 10 /* 0x3ff */ - sub a4, a4, a5 - bgei a4, 64, .Lfixunsdfdi_maxint - bltz a4, .Lfixunsdfdi_zero - - /* Add explicit "1.0" and shift << 11. */ - or a7, xh, a6 - ssai (32 - 11) - src xh, a7, xl - sll xl, xl - - /* Shift back to the right, based on the exponent. */ - addi a4, a4, 1 - beqi a4, 64, .Lfixunsdfdi_bigexp - ssl a4 /* shift by 64 - a4 */ - bgei a4, 32, .Lfixunsdfdi_smallshift - srl xl, xh - movi xh, 0 - -.Lfixunsdfdi_shifted: - /* Negate the result if sign != 0. */ - bgez a7, 1f - neg xl, xl - neg xh, xh - beqz xl, 1f - addi xh, xh, -1 -1: leaf_return - -.Lfixunsdfdi_smallshift: - src xl, xh, xl - srl xh, xh - j .Lfixunsdfdi_shifted - -.Lfixunsdfdi_nan_or_inf: - /* Handle Infinity and NaN. */ - slli a4, xh, 12 - or a4, a4, xl - beqz a4, .Lfixunsdfdi_maxint - - /* Translate NaN to 0xffffffff.... */ -1: movi xh, -1 - movi xl, -1 - leaf_return - -.Lfixunsdfdi_maxint: - bgez xh, 1b -2: slli xh, a6, 11 /* 0x80000000 */ - movi xl, 0 - leaf_return - -.Lfixunsdfdi_zero: - movi xh, 0 - movi xl, 0 - leaf_return - -.Lfixunsdfdi_bigexp: - /* Handle unsigned maximum exponent case. */ - bltz a7, 2b - leaf_return /* no shift needed */ - -#endif /* L_fixunsdfdi */ - -#ifdef L_floatsidf - - .align 4 - .global __floatunsidf - .type __floatunsidf, @function -__floatunsidf: - leaf_entry sp, 16 - beqz a2, .Lfloatsidf_return_zero - - /* Set the sign to zero and jump to the floatsidf code. */ - movi a7, 0 - j .Lfloatsidf_normalize - - .align 4 - .global __floatsidf - .type __floatsidf, @function -__floatsidf: - leaf_entry sp, 16 - - /* Check for zero. */ - beqz a2, .Lfloatsidf_return_zero - - /* Save the sign. */ - extui a7, a2, 31, 1 - - /* Get the absolute value. */ -#if XCHAL_HAVE_ABS - abs a2, a2 -#else - neg a4, a2 - movltz a2, a4, a2 -#endif - -.Lfloatsidf_normalize: - /* Normalize with the first 1 bit in the msb. */ - do_nsau a4, a2, a5, a6 - ssl a4 - sll a5, a2 - - /* Shift the mantissa into position. */ - srli xh, a5, 11 - slli xl, a5, (32 - 11) - - /* Set the exponent. */ - movi a5, 0x41d /* 0x3fe + 31 */ - sub a5, a5, a4 - slli a5, a5, 20 - add xh, xh, a5 - - /* Add the sign and return. */ - slli a7, a7, 31 - or xh, xh, a7 - leaf_return - -.Lfloatsidf_return_zero: - movi a3, 0 - leaf_return - -#endif /* L_floatsidf */ - -#ifdef L_floatdidf - - .align 4 - .global __floatundidf - .type __floatundidf, @function -__floatundidf: - leaf_entry sp, 16 - - /* Check for zero. */ - or a4, xh, xl - beqz a4, 2f - - /* Set the sign to zero and jump to the floatdidf code. */ - movi a7, 0 - j .Lfloatdidf_normalize - - .align 4 - .global __floatdidf - .type __floatdidf, @function -__floatdidf: - leaf_entry sp, 16 - - /* Check for zero. */ - or a4, xh, xl - beqz a4, 2f - - /* Save the sign. */ - extui a7, xh, 31, 1 - - /* Get the absolute value. */ - bgez xh, .Lfloatdidf_normalize - neg xl, xl - neg xh, xh - beqz xl, .Lfloatdidf_normalize - addi xh, xh, -1 - -.Lfloatdidf_normalize: - /* Normalize with the first 1 bit in the msb of xh. */ - beqz xh, .Lfloatdidf_bigshift - do_nsau a4, xh, a5, a6 - ssl a4 - src xh, xh, xl - sll xl, xl - -.Lfloatdidf_shifted: - /* Shift the mantissa into position, with rounding bits in a6. */ - ssai 11 - sll a6, xl - src xl, xh, xl - srl xh, xh - - /* Set the exponent. */ - movi a5, 0x43d /* 0x3fe + 63 */ - sub a5, a5, a4 - slli a5, a5, 20 - add xh, xh, a5 - - /* Add the sign. */ - slli a7, a7, 31 - or xh, xh, a7 - - /* Round up if the leftover fraction is >= 1/2. */ - bgez a6, 2f - addi xl, xl, 1 - beqz xl, .Lfloatdidf_roundcarry - - /* Check if the leftover fraction is exactly 1/2. */ - slli a6, a6, 1 - beqz a6, .Lfloatdidf_exactlyhalf -2: leaf_return - -.Lfloatdidf_bigshift: - /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */ - do_nsau a4, xl, a5, a6 - ssl a4 - sll xh, xl - movi xl, 0 - addi a4, a4, 32 - j .Lfloatdidf_shifted - -.Lfloatdidf_exactlyhalf: - /* Round down to the nearest even value. */ - srli xl, xl, 1 - slli xl, xl, 1 - leaf_return - -.Lfloatdidf_roundcarry: - /* xl is always zero when the rounding increment overflows, so - there's no need to round it to an even value. */ - addi xh, xh, 1 - /* Overflow to the exponent is OK. */ - leaf_return - -#endif /* L_floatdidf */ - -#ifdef L_truncdfsf2 - - .align 4 - .global __truncdfsf2 - .type __truncdfsf2, @function -__truncdfsf2: - leaf_entry sp, 16 - - /* Adjust the exponent bias. */ - movi a4, (0x3ff - 0x7f) << 20 - sub a5, xh, a4 - - /* Check for underflow. */ - xor a6, xh, a5 - bltz a6, .Ltrunc_underflow - extui a6, a5, 20, 11 - beqz a6, .Ltrunc_underflow - - /* Check for overflow. */ - movi a4, 255 - bge a6, a4, .Ltrunc_overflow - - /* Shift a5/xl << 3 into a5/a4. */ - ssai (32 - 3) - src a5, a5, xl - sll a4, xl - -.Ltrunc_addsign: - /* Add the sign bit. */ - extui a6, xh, 31, 1 - slli a6, a6, 31 - or a2, a6, a5 - - /* Round up if the leftover fraction is >= 1/2. */ - bgez a4, 1f - addi a2, a2, 1 - /* Overflow to the exponent is OK. The answer will be correct. */ - - /* Check if the leftover fraction is exactly 1/2. */ - slli a4, a4, 1 - beqz a4, .Ltrunc_exactlyhalf -1: leaf_return - -.Ltrunc_exactlyhalf: - /* Round down to the nearest even value. */ - srli a2, a2, 1 - slli a2, a2, 1 - leaf_return - -.Ltrunc_overflow: - /* Check if exponent == 0x7ff. */ - movi a4, 0x7ff00000 - bnall xh, a4, 1f - - /* Check if mantissa is nonzero. */ - slli a5, xh, 12 - or a5, a5, xl - beqz a5, 1f - - /* Shift a4 to set a bit in the mantissa, making a quiet NaN. */ - srli a4, a4, 1 - -1: slli a4, a4, 4 /* 0xff000000 or 0xff800000 */ - /* Add the sign bit. */ - extui a6, xh, 31, 1 - ssai 1 - src a2, a6, a4 - leaf_return - -.Ltrunc_underflow: - /* Find shift count for a subnormal. Flush to zero if >= 32. */ - extui a6, xh, 20, 11 - movi a5, 0x3ff - 0x7f - sub a6, a5, a6 - addi a6, a6, 1 - bgeui a6, 32, 1f - - /* Replace the exponent with an explicit "1.0". */ - slli a5, a5, 13 /* 0x700000 */ - or a5, a5, xh - slli a5, a5, 11 - srli a5, a5, 11 - - /* Shift the mantissa left by 3 bits (into a5/a4). */ - ssai (32 - 3) - src a5, a5, xl - sll a4, xl - - /* Shift right by a6. */ - ssr a6 - sll a7, a4 - src a4, a5, a4 - srl a5, a5 - beqz a7, .Ltrunc_addsign - or a4, a4, a6 /* any positive, nonzero value will work */ - j .Ltrunc_addsign - - /* Return +/- zero. */ -1: extui a2, xh, 31, 1 - slli a2, a2, 31 - leaf_return - -#endif /* L_truncdfsf2 */ - -#ifdef L_extendsfdf2 - - .align 4 - .global __extendsfdf2 - .type __extendsfdf2, @function -__extendsfdf2: - leaf_entry sp, 16 - - /* Save the sign bit and then shift it off. */ - extui a5, a2, 31, 1 - slli a5, a5, 31 - slli a4, a2, 1 - - /* Extract and check the exponent. */ - extui a6, a2, 23, 8 - beqz a6, .Lextend_expzero - addi a6, a6, 1 - beqi a6, 256, .Lextend_nan_or_inf - - /* Shift >> 3 into a4/xl. */ - srli a4, a4, 4 - slli xl, a2, (32 - 3) - - /* Adjust the exponent bias. */ - movi a6, (0x3ff - 0x7f) << 20 - add a4, a4, a6 - - /* Add the sign bit. */ - or xh, a4, a5 - leaf_return - -.Lextend_nan_or_inf: - movi a4, 0x7ff00000 - - /* Check for NaN. */ - slli a7, a2, 9 - beqz a7, 1f - - slli a6, a6, 11 /* 0x80000 */ - or a4, a4, a6 - - /* Add the sign and return. */ -1: or xh, a4, a5 - movi xl, 0 - leaf_return - -.Lextend_expzero: - beqz a4, 1b - - /* Normalize it to have 8 zero bits before the first 1 bit. */ - do_nsau a7, a4, a2, a3 - addi a7, a7, -8 - ssl a7 - sll a4, a4 - - /* Shift >> 3 into a4/xl. */ - slli xl, a4, (32 - 3) - srli a4, a4, 3 - - /* Set the exponent. */ - movi a6, 0x3fe - 0x7f - sub a6, a6, a7 - slli a6, a6, 20 - add a4, a4, a6 - - /* Add the sign and return. */ - or xh, a4, a5 - leaf_return - -#endif /* L_extendsfdf2 */ - - diff --git a/gcc/config/xtensa/ieee754-sf.S b/gcc/config/xtensa/ieee754-sf.S deleted file mode 100644 index d75be0e5ae5..00000000000 --- a/gcc/config/xtensa/ieee754-sf.S +++ /dev/null @@ -1,1757 +0,0 @@ -/* IEEE-754 single-precision functions for Xtensa - Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc. - Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. - - This file is part of GCC. - - GCC is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - GCC is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY - or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public - License for more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -#ifdef __XTENSA_EB__ -#define xh a2 -#define xl a3 -#define yh a4 -#define yl a5 -#else -#define xh a3 -#define xl a2 -#define yh a5 -#define yl a4 -#endif - -/* Warning! The branch displacements for some Xtensa branch instructions - are quite small, and this code has been carefully laid out to keep - branch targets in range. If you change anything, be sure to check that - the assembler is not relaxing anything to branch over a jump. */ - -#ifdef L_negsf2 - - .align 4 - .global __negsf2 - .type __negsf2, @function -__negsf2: - leaf_entry sp, 16 - movi a4, 0x80000000 - xor a2, a2, a4 - leaf_return - -#endif /* L_negsf2 */ - -#ifdef L_addsubsf3 - - /* Addition */ -__addsf3_aux: - - /* Handle NaNs and Infinities. (This code is placed before the - start of the function just to keep it in range of the limited - branch displacements.) */ - -.Ladd_xnan_or_inf: - /* If y is neither Infinity nor NaN, return x. */ - bnall a3, a6, 1f - /* If x is a NaN, return it. Otherwise, return y. */ - slli a7, a2, 9 - beqz a7, .Ladd_ynan_or_inf -1: leaf_return - -.Ladd_ynan_or_inf: - /* Return y. */ - mov a2, a3 - leaf_return - -.Ladd_opposite_signs: - /* Operand signs differ. Do a subtraction. */ - slli a7, a6, 8 - xor a3, a3, a7 - j .Lsub_same_sign - - .align 4 - .global __addsf3 - .type __addsf3, @function -__addsf3: - leaf_entry sp, 16 - movi a6, 0x7f800000 - - /* Check if the two operands have the same sign. */ - xor a7, a2, a3 - bltz a7, .Ladd_opposite_signs - -.Ladd_same_sign: - /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */ - ball a2, a6, .Ladd_xnan_or_inf - ball a3, a6, .Ladd_ynan_or_inf - - /* Compare the exponents. The smaller operand will be shifted - right by the exponent difference and added to the larger - one. */ - extui a7, a2, 23, 9 - extui a8, a3, 23, 9 - bltu a7, a8, .Ladd_shiftx - -.Ladd_shifty: - /* Check if the smaller (or equal) exponent is zero. */ - bnone a3, a6, .Ladd_yexpzero - - /* Replace y sign/exponent with 0x008. */ - or a3, a3, a6 - slli a3, a3, 8 - srli a3, a3, 8 - -.Ladd_yexpdiff: - /* Compute the exponent difference. */ - sub a10, a7, a8 - - /* Exponent difference > 32 -- just return the bigger value. */ - bgeui a10, 32, 1f - - /* Shift y right by the exponent difference. Any bits that are - shifted out of y are saved in a9 for rounding the result. */ - ssr a10 - movi a9, 0 - src a9, a3, a9 - srl a3, a3 - - /* Do the addition. */ - add a2, a2, a3 - - /* Check if the add overflowed into the exponent. */ - extui a10, a2, 23, 9 - beq a10, a7, .Ladd_round - mov a8, a7 - j .Ladd_carry - -.Ladd_yexpzero: - /* y is a subnormal value. Replace its sign/exponent with zero, - i.e., no implicit "1.0", and increment the apparent exponent - because subnormals behave as if they had the minimum (nonzero) - exponent. Test for the case when both exponents are zero. */ - slli a3, a3, 9 - srli a3, a3, 9 - bnone a2, a6, .Ladd_bothexpzero - addi a8, a8, 1 - j .Ladd_yexpdiff - -.Ladd_bothexpzero: - /* Both exponents are zero. Handle this as a special case. There - is no need to shift or round, and the normal code for handling - a carry into the exponent field will not work because it - assumes there is an implicit "1.0" that needs to be added. */ - add a2, a2, a3 -1: leaf_return - -.Ladd_xexpzero: - /* Same as "yexpzero" except skip handling the case when both - exponents are zero. */ - slli a2, a2, 9 - srli a2, a2, 9 - addi a7, a7, 1 - j .Ladd_xexpdiff - -.Ladd_shiftx: - /* Same thing as the "shifty" code, but with x and y swapped. Also, - because the exponent difference is always nonzero in this version, - the shift sequence can use SLL and skip loading a constant zero. */ - bnone a2, a6, .Ladd_xexpzero - - or a2, a2, a6 - slli a2, a2, 8 - srli a2, a2, 8 - -.Ladd_xexpdiff: - sub a10, a8, a7 - bgeui a10, 32, .Ladd_returny - - ssr a10 - sll a9, a2 - srl a2, a2 - - add a2, a2, a3 - - /* Check if the add overflowed into the exponent. */ - extui a10, a2, 23, 9 - bne a10, a8, .Ladd_carry - -.Ladd_round: - /* Round up if the leftover fraction is >= 1/2. */ - bgez a9, 1f - addi a2, a2, 1 - - /* Check if the leftover fraction is exactly 1/2. */ - slli a9, a9, 1 - beqz a9, .Ladd_exactlyhalf -1: leaf_return - -.Ladd_returny: - mov a2, a3 - leaf_return - -.Ladd_carry: - /* The addition has overflowed into the exponent field, so the - value needs to be renormalized. The mantissa of the result - can be recovered by subtracting the original exponent and - adding 0x800000 (which is the explicit "1.0" for the - mantissa of the non-shifted operand -- the "1.0" for the - shifted operand was already added). The mantissa can then - be shifted right by one bit. The explicit "1.0" of the - shifted mantissa then needs to be replaced by the exponent, - incremented by one to account for the normalizing shift. - It is faster to combine these operations: do the shift first - and combine the additions and subtractions. If x is the - original exponent, the result is: - shifted mantissa - (x << 22) + (1 << 22) + (x << 23) - or: - shifted mantissa + ((x + 1) << 22) - Note that the exponent is incremented here by leaving the - explicit "1.0" of the mantissa in the exponent field. */ - - /* Shift x right by one bit. Save the lsb. */ - mov a10, a2 - srli a2, a2, 1 - - /* See explanation above. The original exponent is in a8. */ - addi a8, a8, 1 - slli a8, a8, 22 - add a2, a2, a8 - - /* Return an Infinity if the exponent overflowed. */ - ball a2, a6, .Ladd_infinity - - /* Same thing as the "round" code except the msb of the leftover - fraction is bit 0 of a10, with the rest of the fraction in a9. */ - bbci.l a10, 0, 1f - addi a2, a2, 1 - beqz a9, .Ladd_exactlyhalf -1: leaf_return - -.Ladd_infinity: - /* Clear the mantissa. */ - srli a2, a2, 23 - slli a2, a2, 23 - - /* The sign bit may have been lost in a carry-out. Put it back. */ - slli a8, a8, 1 - or a2, a2, a8 - leaf_return - -.Ladd_exactlyhalf: - /* Round down to the nearest even value. */ - srli a2, a2, 1 - slli a2, a2, 1 - leaf_return - - - /* Subtraction */ -__subsf3_aux: - - /* Handle NaNs and Infinities. (This code is placed before the - start of the function just to keep it in range of the limited - branch displacements.) */ - -.Lsub_xnan_or_inf: - /* If y is neither Infinity nor NaN, return x. */ - bnall a3, a6, 1f - /* Both x and y are either NaN or Inf, so the result is NaN. */ - movi a4, 0x400000 /* make it a quiet NaN */ - or a2, a2, a4 -1: leaf_return - -.Lsub_ynan_or_inf: - /* Negate y and return it. */ - slli a7, a6, 8 - xor a2, a3, a7 - leaf_return - -.Lsub_opposite_signs: - /* Operand signs differ. Do an addition. */ - slli a7, a6, 8 - xor a3, a3, a7 - j .Ladd_same_sign - - .align 4 - .global __subsf3 - .type __subsf3, @function -__subsf3: - leaf_entry sp, 16 - movi a6, 0x7f800000 - - /* Check if the two operands have the same sign. */ - xor a7, a2, a3 - bltz a7, .Lsub_opposite_signs - -.Lsub_same_sign: - /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */ - ball a2, a6, .Lsub_xnan_or_inf - ball a3, a6, .Lsub_ynan_or_inf - - /* Compare the operands. In contrast to addition, the entire - value matters here. */ - extui a7, a2, 23, 8 - extui a8, a3, 23, 8 - bltu a2, a3, .Lsub_xsmaller - -.Lsub_ysmaller: - /* Check if the smaller (or equal) exponent is zero. */ - bnone a3, a6, .Lsub_yexpzero - - /* Replace y sign/exponent with 0x008. */ - or a3, a3, a6 - slli a3, a3, 8 - srli a3, a3, 8 - -.Lsub_yexpdiff: - /* Compute the exponent difference. */ - sub a10, a7, a8 - - /* Exponent difference > 32 -- just return the bigger value. */ - bgeui a10, 32, 1f - - /* Shift y right by the exponent difference. Any bits that are - shifted out of y are saved in a9 for rounding the result. */ - ssr a10 - movi a9, 0 - src a9, a3, a9 - srl a3, a3 - - sub a2, a2, a3 - - /* Subtract the leftover bits in a9 from zero and propagate any - borrow from a2. */ - neg a9, a9 - addi a10, a2, -1 - movnez a2, a10, a9 - - /* Check if the subtract underflowed into the exponent. */ - extui a10, a2, 23, 8 - beq a10, a7, .Lsub_round - j .Lsub_borrow - -.Lsub_yexpzero: - /* Return zero if the inputs are equal. (For the non-subnormal - case, subtracting the "1.0" will cause a borrow from the exponent - and this case can be detected when handling the borrow.) */ - beq a2, a3, .Lsub_return_zero - - /* y is a subnormal value. Replace its sign/exponent with zero, - i.e., no implicit "1.0". Unless x is also a subnormal, increment - y's apparent exponent because subnormals behave as if they had - the minimum (nonzero) exponent. */ - slli a3, a3, 9 - srli a3, a3, 9 - bnone a2, a6, .Lsub_yexpdiff - addi a8, a8, 1 - j .Lsub_yexpdiff - -.Lsub_returny: - /* Negate and return y. */ - slli a7, a6, 8 - xor a2, a3, a7 -1: leaf_return - -.Lsub_xsmaller: - /* Same thing as the "ysmaller" code, but with x and y swapped and - with y negated. */ - bnone a2, a6, .Lsub_xexpzero - - or a2, a2, a6 - slli a2, a2, 8 - srli a2, a2, 8 - -.Lsub_xexpdiff: - sub a10, a8, a7 - bgeui a10, 32, .Lsub_returny - - ssr a10 - movi a9, 0 - src a9, a2, a9 - srl a2, a2 - - /* Negate y. */ - slli a11, a6, 8 - xor a3, a3, a11 - - sub a2, a3, a2 - - neg a9, a9 - addi a10, a2, -1 - movnez a2, a10, a9 - - /* Check if the subtract underflowed into the exponent. */ - extui a10, a2, 23, 8 - bne a10, a8, .Lsub_borrow - -.Lsub_round: - /* Round up if the leftover fraction is >= 1/2. */ - bgez a9, 1f - addi a2, a2, 1 - - /* Check if the leftover fraction is exactly 1/2. */ - slli a9, a9, 1 - beqz a9, .Lsub_exactlyhalf -1: leaf_return - -.Lsub_xexpzero: - /* Same as "yexpzero". */ - beq a2, a3, .Lsub_return_zero - slli a2, a2, 9 - srli a2, a2, 9 - bnone a3, a6, .Lsub_xexpdiff - addi a7, a7, 1 - j .Lsub_xexpdiff - -.Lsub_return_zero: - movi a2, 0 - leaf_return - -.Lsub_borrow: - /* The subtraction has underflowed into the exponent field, so the - value needs to be renormalized. Shift the mantissa left as - needed to remove any leading zeros and adjust the exponent - accordingly. If the exponent is not large enough to remove - all the leading zeros, the result will be a subnormal value. */ - - slli a8, a2, 9 - beqz a8, .Lsub_xzero - do_nsau a6, a8, a7, a11 - srli a8, a8, 9 - bge a6, a10, .Lsub_subnormal - addi a6, a6, 1 - -.Lsub_normalize_shift: - /* Shift the mantissa (a8/a9) left by a6. */ - ssl a6 - src a8, a8, a9 - sll a9, a9 - - /* Combine the shifted mantissa with the sign and exponent, - decrementing the exponent by a6. (The exponent has already - been decremented by one due to the borrow from the subtraction, - but adding the mantissa will increment the exponent by one.) */ - srli a2, a2, 23 - sub a2, a2, a6 - slli a2, a2, 23 - add a2, a2, a8 - j .Lsub_round - -.Lsub_exactlyhalf: - /* Round down to the nearest even value. */ - srli a2, a2, 1 - slli a2, a2, 1 - leaf_return - -.Lsub_xzero: - /* If there was a borrow from the exponent, and the mantissa and - guard digits are all zero, then the inputs were equal and the - result should be zero. */ - beqz a9, .Lsub_return_zero - - /* Only the guard digit is nonzero. Shift by min(24, a10). */ - addi a11, a10, -24 - movi a6, 24 - movltz a6, a10, a11 - j .Lsub_normalize_shift - -.Lsub_subnormal: - /* The exponent is too small to shift away all the leading zeros. - Set a6 to the current exponent (which has already been - decremented by the borrow) so that the exponent of the result - will be zero. Do not add 1 to a6 in this case, because: (1) - adding the mantissa will not increment the exponent, so there is - no need to subtract anything extra from the exponent to - compensate, and (2) the effective exponent of a subnormal is 1 - not 0 so the shift amount must be 1 smaller than normal. */ - mov a6, a10 - j .Lsub_normalize_shift - -#endif /* L_addsubsf3 */ - -#ifdef L_mulsf3 - - /* Multiplication */ -#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 -#define XCHAL_NO_MUL 1 -#endif - -__mulsf3_aux: - - /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). - (This code is placed before the start of the function just to - keep it in range of the limited branch displacements.) */ - -.Lmul_xexpzero: - /* Clear the sign bit of x. */ - slli a2, a2, 1 - srli a2, a2, 1 - - /* If x is zero, return zero. */ - beqz a2, .Lmul_return_zero - - /* Normalize x. Adjust the exponent in a8. */ - do_nsau a10, a2, a11, a12 - addi a10, a10, -8 - ssl a10 - sll a2, a2 - movi a8, 1 - sub a8, a8, a10 - j .Lmul_xnormalized - -.Lmul_yexpzero: - /* Clear the sign bit of y. */ - slli a3, a3, 1 - srli a3, a3, 1 - - /* If y is zero, return zero. */ - beqz a3, .Lmul_return_zero - - /* Normalize y. Adjust the exponent in a9. */ - do_nsau a10, a3, a11, a12 - addi a10, a10, -8 - ssl a10 - sll a3, a3 - movi a9, 1 - sub a9, a9, a10 - j .Lmul_ynormalized - -.Lmul_return_zero: - /* Return zero with the appropriate sign bit. */ - srli a2, a7, 31 - slli a2, a2, 31 - j .Lmul_done - -.Lmul_xnan_or_inf: - /* If y is zero, return NaN. */ - slli a8, a3, 1 - bnez a8, 1f - movi a4, 0x400000 /* make it a quiet NaN */ - or a2, a2, a4 - j .Lmul_done -1: - /* If y is NaN, return y. */ - bnall a3, a6, .Lmul_returnx - slli a8, a3, 9 - beqz a8, .Lmul_returnx - -.Lmul_returny: - mov a2, a3 - -.Lmul_returnx: - /* Set the sign bit and return. */ - extui a7, a7, 31, 1 - slli a2, a2, 1 - ssai 1 - src a2, a7, a2 - j .Lmul_done - -.Lmul_ynan_or_inf: - /* If x is zero, return NaN. */ - slli a8, a2, 1 - bnez a8, .Lmul_returny - movi a7, 0x400000 /* make it a quiet NaN */ - or a2, a3, a7 - j .Lmul_done - - .align 4 - .global __mulsf3 - .type __mulsf3, @function -__mulsf3: -#if __XTENSA_CALL0_ABI__ - leaf_entry sp, 32 - addi sp, sp, -32 - s32i a12, sp, 16 - s32i a13, sp, 20 - s32i a14, sp, 24 - s32i a15, sp, 28 -#elif XCHAL_NO_MUL - /* This is not really a leaf function; allocate enough stack space - to allow CALL12s to a helper function. */ - leaf_entry sp, 64 -#else - leaf_entry sp, 32 -#endif - movi a6, 0x7f800000 - - /* Get the sign of the result. */ - xor a7, a2, a3 - - /* Check for NaN and infinity. */ - ball a2, a6, .Lmul_xnan_or_inf - ball a3, a6, .Lmul_ynan_or_inf - - /* Extract the exponents. */ - extui a8, a2, 23, 8 - extui a9, a3, 23, 8 - - beqz a8, .Lmul_xexpzero -.Lmul_xnormalized: - beqz a9, .Lmul_yexpzero -.Lmul_ynormalized: - - /* Add the exponents. */ - add a8, a8, a9 - - /* Replace sign/exponent fields with explicit "1.0". */ - movi a10, 0xffffff - or a2, a2, a6 - and a2, a2, a10 - or a3, a3, a6 - and a3, a3, a10 - - /* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */ - -#if XCHAL_HAVE_MUL32_HIGH - - mull a6, a2, a3 - muluh a2, a2, a3 - -#else - - /* Break the inputs into 16-bit chunks and compute 4 32-bit partial - products. These partial products are: - - 0 xl * yl - - 1 xl * yh - 2 xh * yl - - 3 xh * yh - - If using the Mul16 or Mul32 multiplier options, these input - chunks must be stored in separate registers. For Mac16, the - UMUL.AA.* opcodes can specify that the inputs come from either - half of the registers, so there is no need to shift them out - ahead of time. If there is no multiply hardware, the 16-bit - chunks can be extracted when setting up the arguments to the - separate multiply function. */ - -#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL - /* Calling a separate multiply function will clobber a0 and requires - use of a8 as a temporary, so save those values now. (The function - uses a custom ABI so nothing else needs to be saved.) */ - s32i a0, sp, 0 - s32i a8, sp, 4 -#endif - -#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 - -#define a2h a4 -#define a3h a5 - - /* Get the high halves of the inputs into registers. */ - srli a2h, a2, 16 - srli a3h, a3, 16 - -#define a2l a2 -#define a3l a3 - -#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16 - /* Clear the high halves of the inputs. This does not matter - for MUL16 because the high bits are ignored. */ - extui a2, a2, 0, 16 - extui a3, a3, 0, 16 -#endif -#endif /* MUL16 || MUL32 */ - - -#if XCHAL_HAVE_MUL16 - -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - mul16u dst, xreg ## xhalf, yreg ## yhalf - -#elif XCHAL_HAVE_MUL32 - -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - mull dst, xreg ## xhalf, yreg ## yhalf - -#elif XCHAL_HAVE_MAC16 - -/* The preprocessor insists on inserting a space when concatenating after - a period in the definition of do_mul below. These macros are a workaround - using underscores instead of periods when doing the concatenation. */ -#define umul_aa_ll umul.aa.ll -#define umul_aa_lh umul.aa.lh -#define umul_aa_hl umul.aa.hl -#define umul_aa_hh umul.aa.hh - -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - umul_aa_ ## xhalf ## yhalf xreg, yreg; \ - rsr dst, ACCLO - -#else /* no multiply hardware */ - -#define set_arg_l(dst, src) \ - extui dst, src, 0, 16 -#define set_arg_h(dst, src) \ - srli dst, src, 16 - -#if __XTENSA_CALL0_ABI__ -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - set_arg_ ## xhalf (a13, xreg); \ - set_arg_ ## yhalf (a14, yreg); \ - call0 .Lmul_mulsi3; \ - mov dst, a12 -#else -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - set_arg_ ## xhalf (a14, xreg); \ - set_arg_ ## yhalf (a15, yreg); \ - call12 .Lmul_mulsi3; \ - mov dst, a14 -#endif /* __XTENSA_CALL0_ABI__ */ - -#endif /* no multiply hardware */ - - /* Add pp1 and pp2 into a6 with carry-out in a9. */ - do_mul(a6, a2, l, a3, h) /* pp 1 */ - do_mul(a11, a2, h, a3, l) /* pp 2 */ - movi a9, 0 - add a6, a6, a11 - bgeu a6, a11, 1f - addi a9, a9, 1 -1: - /* Shift the high half of a9/a6 into position in a9. Note that - this value can be safely incremented without any carry-outs. */ - ssai 16 - src a9, a9, a6 - - /* Compute the low word into a6. */ - do_mul(a11, a2, l, a3, l) /* pp 0 */ - sll a6, a6 - add a6, a6, a11 - bgeu a6, a11, 1f - addi a9, a9, 1 -1: - /* Compute the high word into a2. */ - do_mul(a2, a2, h, a3, h) /* pp 3 */ - add a2, a2, a9 - -#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL - /* Restore values saved on the stack during the multiplication. */ - l32i a0, sp, 0 - l32i a8, sp, 4 -#endif -#endif /* ! XCHAL_HAVE_MUL32_HIGH */ - - /* Shift left by 9 bits, unless there was a carry-out from the - multiply, in which case, shift by 8 bits and increment the - exponent. */ - movi a4, 9 - srli a5, a2, 24 - 9 - beqz a5, 1f - addi a4, a4, -1 - addi a8, a8, 1 -1: ssl a4 - src a2, a2, a6 - sll a6, a6 - - /* Subtract the extra bias from the exponent sum (plus one to account - for the explicit "1.0" of the mantissa that will be added to the - exponent in the final result). */ - movi a4, 0x80 - sub a8, a8, a4 - - /* Check for over/underflow. The value in a8 is one less than the - final exponent, so values in the range 0..fd are OK here. */ - movi a4, 0xfe - bgeu a8, a4, .Lmul_overflow - -.Lmul_round: - /* Round. */ - bgez a6, .Lmul_rounded - addi a2, a2, 1 - slli a6, a6, 1 - beqz a6, .Lmul_exactlyhalf - -.Lmul_rounded: - /* Add the exponent to the mantissa. */ - slli a8, a8, 23 - add a2, a2, a8 - -.Lmul_addsign: - /* Add the sign bit. */ - srli a7, a7, 31 - slli a7, a7, 31 - or a2, a2, a7 - -.Lmul_done: -#if __XTENSA_CALL0_ABI__ - l32i a12, sp, 16 - l32i a13, sp, 20 - l32i a14, sp, 24 - l32i a15, sp, 28 - addi sp, sp, 32 -#endif - leaf_return - -.Lmul_exactlyhalf: - /* Round down to the nearest even value. */ - srli a2, a2, 1 - slli a2, a2, 1 - j .Lmul_rounded - -.Lmul_overflow: - bltz a8, .Lmul_underflow - /* Return +/- Infinity. */ - movi a8, 0xff - slli a2, a8, 23 - j .Lmul_addsign - -.Lmul_underflow: - /* Create a subnormal value, where the exponent field contains zero, - but the effective exponent is 1. The value of a8 is one less than - the actual exponent, so just negate it to get the shift amount. */ - neg a8, a8 - mov a9, a6 - ssr a8 - bgeui a8, 32, .Lmul_flush_to_zero - - /* Shift a2 right. Any bits that are shifted out of a2 are saved - in a6 (combined with the shifted-out bits currently in a6) for - rounding the result. */ - sll a6, a2 - srl a2, a2 - - /* Set the exponent to zero. */ - movi a8, 0 - - /* Pack any nonzero bits shifted out into a6. */ - beqz a9, .Lmul_round - movi a9, 1 - or a6, a6, a9 - j .Lmul_round - -.Lmul_flush_to_zero: - /* Return zero with the appropriate sign bit. */ - srli a2, a7, 31 - slli a2, a2, 31 - j .Lmul_done - -#if XCHAL_NO_MUL - - /* For Xtensa processors with no multiply hardware, this simplified - version of _mulsi3 is used for multiplying 16-bit chunks of - the floating-point mantissas. When using CALL0, this function - uses a custom ABI: the inputs are passed in a13 and a14, the - result is returned in a12, and a8 and a15 are clobbered. */ - .align 4 -.Lmul_mulsi3: - leaf_entry sp, 16 - .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2 - movi \dst, 0 -1: add \tmp1, \src2, \dst - extui \tmp2, \src1, 0, 1 - movnez \dst, \tmp1, \tmp2 - - do_addx2 \tmp1, \src2, \dst, \tmp1 - extui \tmp2, \src1, 1, 1 - movnez \dst, \tmp1, \tmp2 - - do_addx4 \tmp1, \src2, \dst, \tmp1 - extui \tmp2, \src1, 2, 1 - movnez \dst, \tmp1, \tmp2 - - do_addx8 \tmp1, \src2, \dst, \tmp1 - extui \tmp2, \src1, 3, 1 - movnez \dst, \tmp1, \tmp2 - - srli \src1, \src1, 4 - slli \src2, \src2, 4 - bnez \src1, 1b - .endm -#if __XTENSA_CALL0_ABI__ - mul_mulsi3_body a12, a13, a14, a15, a8 -#else - /* The result will be written into a2, so save that argument in a4. */ - mov a4, a2 - mul_mulsi3_body a2, a4, a3, a5, a6 -#endif - leaf_return -#endif /* XCHAL_NO_MUL */ -#endif /* L_mulsf3 */ - -#ifdef L_divsf3 - - /* Division */ -__divsf3_aux: - - /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). - (This code is placed before the start of the function just to - keep it in range of the limited branch displacements.) */ - -.Ldiv_yexpzero: - /* Clear the sign bit of y. */ - slli a3, a3, 1 - srli a3, a3, 1 - - /* Check for division by zero. */ - beqz a3, .Ldiv_yzero - - /* Normalize y. Adjust the exponent in a9. */ - do_nsau a10, a3, a4, a5 - addi a10, a10, -8 - ssl a10 - sll a3, a3 - movi a9, 1 - sub a9, a9, a10 - j .Ldiv_ynormalized - -.Ldiv_yzero: - /* y is zero. Return NaN if x is also zero; otherwise, infinity. */ - slli a4, a2, 1 - srli a4, a4, 1 - srli a2, a7, 31 - slli a2, a2, 31 - or a2, a2, a6 - bnez a4, 1f - movi a4, 0x400000 /* make it a quiet NaN */ - or a2, a2, a4 -1: leaf_return - -.Ldiv_xexpzero: - /* Clear the sign bit of x. */ - slli a2, a2, 1 - srli a2, a2, 1 - - /* If x is zero, return zero. */ - beqz a2, .Ldiv_return_zero - - /* Normalize x. Adjust the exponent in a8. */ - do_nsau a10, a2, a4, a5 - addi a10, a10, -8 - ssl a10 - sll a2, a2 - movi a8, 1 - sub a8, a8, a10 - j .Ldiv_xnormalized - -.Ldiv_return_zero: - /* Return zero with the appropriate sign bit. */ - srli a2, a7, 31 - slli a2, a2, 31 - leaf_return - -.Ldiv_xnan_or_inf: - /* Set the sign bit of the result. */ - srli a7, a3, 31 - slli a7, a7, 31 - xor a2, a2, a7 - /* If y is NaN or Inf, return NaN. */ - bnall a3, a6, 1f - movi a4, 0x400000 /* make it a quiet NaN */ - or a2, a2, a4 -1: leaf_return - -.Ldiv_ynan_or_inf: - /* If y is Infinity, return zero. */ - slli a8, a3, 9 - beqz a8, .Ldiv_return_zero - /* y is NaN; return it. */ - mov a2, a3 - leaf_return - - .align 4 - .global __divsf3 - .type __divsf3, @function -__divsf3: - leaf_entry sp, 16 - movi a6, 0x7f800000 - - /* Get the sign of the result. */ - xor a7, a2, a3 - - /* Check for NaN and infinity. */ - ball a2, a6, .Ldiv_xnan_or_inf - ball a3, a6, .Ldiv_ynan_or_inf - - /* Extract the exponents. */ - extui a8, a2, 23, 8 - extui a9, a3, 23, 8 - - beqz a9, .Ldiv_yexpzero -.Ldiv_ynormalized: - beqz a8, .Ldiv_xexpzero -.Ldiv_xnormalized: - - /* Subtract the exponents. */ - sub a8, a8, a9 - - /* Replace sign/exponent fields with explicit "1.0". */ - movi a10, 0xffffff - or a2, a2, a6 - and a2, a2, a10 - or a3, a3, a6 - and a3, a3, a10 - - /* The first digit of the mantissa division must be a one. - Shift x (and adjust the exponent) as needed to make this true. */ - bltu a3, a2, 1f - slli a2, a2, 1 - addi a8, a8, -1 -1: - /* Do the first subtraction and shift. */ - sub a2, a2, a3 - slli a2, a2, 1 - - /* Put the quotient into a10. */ - movi a10, 1 - - /* Divide one bit at a time for 23 bits. */ - movi a9, 23 -#if XCHAL_HAVE_LOOPS - loop a9, .Ldiv_loopend -#endif -.Ldiv_loop: - /* Shift the quotient << 1. */ - slli a10, a10, 1 - - /* Is this digit a 0 or 1? */ - bltu a2, a3, 1f - - /* Output a 1 and subtract. */ - addi a10, a10, 1 - sub a2, a2, a3 - - /* Shift the dividend << 1. */ -1: slli a2, a2, 1 - -#if !XCHAL_HAVE_LOOPS - addi a9, a9, -1 - bnez a9, .Ldiv_loop -#endif -.Ldiv_loopend: - - /* Add the exponent bias (less one to account for the explicit "1.0" - of the mantissa that will be added to the exponent in the final - result). */ - addi a8, a8, 0x7e - - /* Check for over/underflow. The value in a8 is one less than the - final exponent, so values in the range 0..fd are OK here. */ - movi a4, 0xfe - bgeu a8, a4, .Ldiv_overflow - -.Ldiv_round: - /* Round. The remainder (<< 1) is in a2. */ - bltu a2, a3, .Ldiv_rounded - addi a10, a10, 1 - beq a2, a3, .Ldiv_exactlyhalf - -.Ldiv_rounded: - /* Add the exponent to the mantissa. */ - slli a8, a8, 23 - add a2, a10, a8 - -.Ldiv_addsign: - /* Add the sign bit. */ - srli a7, a7, 31 - slli a7, a7, 31 - or a2, a2, a7 - leaf_return - -.Ldiv_overflow: - bltz a8, .Ldiv_underflow - /* Return +/- Infinity. */ - addi a8, a4, 1 /* 0xff */ - slli a2, a8, 23 - j .Ldiv_addsign - -.Ldiv_exactlyhalf: - /* Remainder is exactly half the divisor. Round even. */ - srli a10, a10, 1 - slli a10, a10, 1 - j .Ldiv_rounded - -.Ldiv_underflow: - /* Create a subnormal value, where the exponent field contains zero, - but the effective exponent is 1. The value of a8 is one less than - the actual exponent, so just negate it to get the shift amount. */ - neg a8, a8 - ssr a8 - bgeui a8, 32, .Ldiv_flush_to_zero - - /* Shift a10 right. Any bits that are shifted out of a10 are - saved in a6 for rounding the result. */ - sll a6, a10 - srl a10, a10 - - /* Set the exponent to zero. */ - movi a8, 0 - - /* Pack any nonzero remainder (in a2) into a6. */ - beqz a2, 1f - movi a9, 1 - or a6, a6, a9 - - /* Round a10 based on the bits shifted out into a6. */ -1: bgez a6, .Ldiv_rounded - addi a10, a10, 1 - slli a6, a6, 1 - bnez a6, .Ldiv_rounded - srli a10, a10, 1 - slli a10, a10, 1 - j .Ldiv_rounded - -.Ldiv_flush_to_zero: - /* Return zero with the appropriate sign bit. */ - srli a2, a7, 31 - slli a2, a2, 31 - leaf_return - -#endif /* L_divsf3 */ - -#ifdef L_cmpsf2 - - /* Equal and Not Equal */ - - .align 4 - .global __eqsf2 - .global __nesf2 - .set __nesf2, __eqsf2 - .type __eqsf2, @function -__eqsf2: - leaf_entry sp, 16 - bne a2, a3, 4f - - /* The values are equal but NaN != NaN. Check the exponent. */ - movi a6, 0x7f800000 - ball a2, a6, 3f - - /* Equal. */ - movi a2, 0 - leaf_return - - /* Not equal. */ -2: movi a2, 1 - leaf_return - - /* Check if the mantissas are nonzero. */ -3: slli a7, a2, 9 - j 5f - - /* Check if x and y are zero with different signs. */ -4: or a7, a2, a3 - slli a7, a7, 1 - - /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa - or x when exponent(x) = 0x7f8 and x == y. */ -5: movi a2, 0 - movi a3, 1 - movnez a2, a3, a7 - leaf_return - - - /* Greater Than */ - - .align 4 - .global __gtsf2 - .type __gtsf2, @function -__gtsf2: - leaf_entry sp, 16 - movi a6, 0x7f800000 - ball a2, a6, 2f -1: bnall a3, a6, .Lle_cmp - - /* Check if y is a NaN. */ - slli a7, a3, 9 - beqz a7, .Lle_cmp - movi a2, 0 - leaf_return - - /* Check if x is a NaN. */ -2: slli a7, a2, 9 - beqz a7, 1b - movi a2, 0 - leaf_return - - - /* Less Than or Equal */ - - .align 4 - .global __lesf2 - .type __lesf2, @function -__lesf2: - leaf_entry sp, 16 - movi a6, 0x7f800000 - ball a2, a6, 2f -1: bnall a3, a6, .Lle_cmp - - /* Check if y is a NaN. */ - slli a7, a3, 9 - beqz a7, .Lle_cmp - movi a2, 1 - leaf_return - - /* Check if x is a NaN. */ -2: slli a7, a2, 9 - beqz a7, 1b - movi a2, 1 - leaf_return - -.Lle_cmp: - /* Check if x and y have different signs. */ - xor a7, a2, a3 - bltz a7, .Lle_diff_signs - - /* Check if x is negative. */ - bltz a2, .Lle_xneg - - /* Check if x <= y. */ - bltu a3, a2, 5f -4: movi a2, 0 - leaf_return - -.Lle_xneg: - /* Check if y <= x. */ - bgeu a2, a3, 4b -5: movi a2, 1 - leaf_return - -.Lle_diff_signs: - bltz a2, 4b - - /* Check if both x and y are zero. */ - or a7, a2, a3 - slli a7, a7, 1 - movi a2, 1 - movi a3, 0 - moveqz a2, a3, a7 - leaf_return - - - /* Greater Than or Equal */ - - .align 4 - .global __gesf2 - .type __gesf2, @function -__gesf2: - leaf_entry sp, 16 - movi a6, 0x7f800000 - ball a2, a6, 2f -1: bnall a3, a6, .Llt_cmp - - /* Check if y is a NaN. */ - slli a7, a3, 9 - beqz a7, .Llt_cmp - movi a2, -1 - leaf_return - - /* Check if x is a NaN. */ -2: slli a7, a2, 9 - beqz a7, 1b - movi a2, -1 - leaf_return - - - /* Less Than */ - - .align 4 - .global __ltsf2 - .type __ltsf2, @function -__ltsf2: - leaf_entry sp, 16 - movi a6, 0x7f800000 - ball a2, a6, 2f -1: bnall a3, a6, .Llt_cmp - - /* Check if y is a NaN. */ - slli a7, a3, 9 - beqz a7, .Llt_cmp - movi a2, 0 - leaf_return - - /* Check if x is a NaN. */ -2: slli a7, a2, 9 - beqz a7, 1b - movi a2, 0 - leaf_return - -.Llt_cmp: - /* Check if x and y have different signs. */ - xor a7, a2, a3 - bltz a7, .Llt_diff_signs - - /* Check if x is negative. */ - bltz a2, .Llt_xneg - - /* Check if x < y. */ - bgeu a2, a3, 5f -4: movi a2, -1 - leaf_return - -.Llt_xneg: - /* Check if y < x. */ - bltu a3, a2, 4b -5: movi a2, 0 - leaf_return - -.Llt_diff_signs: - bgez a2, 5b - - /* Check if both x and y are nonzero. */ - or a7, a2, a3 - slli a7, a7, 1 - movi a2, 0 - movi a3, -1 - movnez a2, a3, a7 - leaf_return - - - /* Unordered */ - - .align 4 - .global __unordsf2 - .type __unordsf2, @function -__unordsf2: - leaf_entry sp, 16 - movi a6, 0x7f800000 - ball a2, a6, 3f -1: ball a3, a6, 4f -2: movi a2, 0 - leaf_return - -3: slli a7, a2, 9 - beqz a7, 1b - movi a2, 1 - leaf_return - -4: slli a7, a3, 9 - beqz a7, 2b - movi a2, 1 - leaf_return - -#endif /* L_cmpsf2 */ - -#ifdef L_fixsfsi - - .align 4 - .global __fixsfsi - .type __fixsfsi, @function -__fixsfsi: - leaf_entry sp, 16 - - /* Check for NaN and Infinity. */ - movi a6, 0x7f800000 - ball a2, a6, .Lfixsfsi_nan_or_inf - - /* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */ - extui a4, a2, 23, 8 - addi a4, a4, -0x7e - bgei a4, 32, .Lfixsfsi_maxint - blti a4, 1, .Lfixsfsi_zero - - /* Add explicit "1.0" and shift << 8. */ - or a7, a2, a6 - slli a5, a7, 8 - - /* Shift back to the right, based on the exponent. */ - ssl a4 /* shift by 32 - a4 */ - srl a5, a5 - - /* Negate the result if sign != 0. */ - neg a2, a5 - movgez a2, a5, a7 - leaf_return - -.Lfixsfsi_nan_or_inf: - /* Handle Infinity and NaN. */ - slli a4, a2, 9 - beqz a4, .Lfixsfsi_maxint - - /* Translate NaN to +maxint. */ - movi a2, 0 - -.Lfixsfsi_maxint: - slli a4, a6, 8 /* 0x80000000 */ - addi a5, a4, -1 /* 0x7fffffff */ - movgez a4, a5, a2 - mov a2, a4 - leaf_return - -.Lfixsfsi_zero: - movi a2, 0 - leaf_return - -#endif /* L_fixsfsi */ - -#ifdef L_fixsfdi - - .align 4 - .global __fixsfdi - .type __fixsfdi, @function -__fixsfdi: - leaf_entry sp, 16 - - /* Check for NaN and Infinity. */ - movi a6, 0x7f800000 - ball a2, a6, .Lfixsfdi_nan_or_inf - - /* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */ - extui a4, a2, 23, 8 - addi a4, a4, -0x7e - bgei a4, 64, .Lfixsfdi_maxint - blti a4, 1, .Lfixsfdi_zero - - /* Add explicit "1.0" and shift << 8. */ - or a7, a2, a6 - slli xh, a7, 8 - - /* Shift back to the right, based on the exponent. */ - ssl a4 /* shift by 64 - a4 */ - bgei a4, 32, .Lfixsfdi_smallshift - srl xl, xh - movi xh, 0 - -.Lfixsfdi_shifted: - /* Negate the result if sign != 0. */ - bgez a7, 1f - neg xl, xl - neg xh, xh - beqz xl, 1f - addi xh, xh, -1 -1: leaf_return - -.Lfixsfdi_smallshift: - movi xl, 0 - sll xl, xh - srl xh, xh - j .Lfixsfdi_shifted - -.Lfixsfdi_nan_or_inf: - /* Handle Infinity and NaN. */ - slli a4, a2, 9 - beqz a4, .Lfixsfdi_maxint - - /* Translate NaN to +maxint. */ - movi a2, 0 - -.Lfixsfdi_maxint: - slli a7, a6, 8 /* 0x80000000 */ - bgez a2, 1f - mov xh, a7 - movi xl, 0 - leaf_return - -1: addi xh, a7, -1 /* 0x7fffffff */ - movi xl, -1 - leaf_return - -.Lfixsfdi_zero: - movi xh, 0 - movi xl, 0 - leaf_return - -#endif /* L_fixsfdi */ - -#ifdef L_fixunssfsi - - .align 4 - .global __fixunssfsi - .type __fixunssfsi, @function -__fixunssfsi: - leaf_entry sp, 16 - - /* Check for NaN and Infinity. */ - movi a6, 0x7f800000 - ball a2, a6, .Lfixunssfsi_nan_or_inf - - /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */ - extui a4, a2, 23, 8 - addi a4, a4, -0x7f - bgei a4, 32, .Lfixunssfsi_maxint - bltz a4, .Lfixunssfsi_zero - - /* Add explicit "1.0" and shift << 8. */ - or a7, a2, a6 - slli a5, a7, 8 - - /* Shift back to the right, based on the exponent. */ - addi a4, a4, 1 - beqi a4, 32, .Lfixunssfsi_bigexp - ssl a4 /* shift by 32 - a4 */ - srl a5, a5 - - /* Negate the result if sign != 0. */ - neg a2, a5 - movgez a2, a5, a7 - leaf_return - -.Lfixunssfsi_nan_or_inf: - /* Handle Infinity and NaN. */ - slli a4, a2, 9 - beqz a4, .Lfixunssfsi_maxint - - /* Translate NaN to 0xffffffff. */ - movi a2, -1 - leaf_return - -.Lfixunssfsi_maxint: - slli a4, a6, 8 /* 0x80000000 */ - movi a5, -1 /* 0xffffffff */ - movgez a4, a5, a2 - mov a2, a4 - leaf_return - -.Lfixunssfsi_zero: - movi a2, 0 - leaf_return - -.Lfixunssfsi_bigexp: - /* Handle unsigned maximum exponent case. */ - bltz a2, 1f - mov a2, a5 /* no shift needed */ - leaf_return - - /* Return 0x80000000 if negative. */ -1: slli a2, a6, 8 - leaf_return - -#endif /* L_fixunssfsi */ - -#ifdef L_fixunssfdi - - .align 4 - .global __fixunssfdi - .type __fixunssfdi, @function -__fixunssfdi: - leaf_entry sp, 16 - - /* Check for NaN and Infinity. */ - movi a6, 0x7f800000 - ball a2, a6, .Lfixunssfdi_nan_or_inf - - /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */ - extui a4, a2, 23, 8 - addi a4, a4, -0x7f - bgei a4, 64, .Lfixunssfdi_maxint - bltz a4, .Lfixunssfdi_zero - - /* Add explicit "1.0" and shift << 8. */ - or a7, a2, a6 - slli xh, a7, 8 - - /* Shift back to the right, based on the exponent. */ - addi a4, a4, 1 - beqi a4, 64, .Lfixunssfdi_bigexp - ssl a4 /* shift by 64 - a4 */ - bgei a4, 32, .Lfixunssfdi_smallshift - srl xl, xh - movi xh, 0 - -.Lfixunssfdi_shifted: - /* Negate the result if sign != 0. */ - bgez a7, 1f - neg xl, xl - neg xh, xh - beqz xl, 1f - addi xh, xh, -1 -1: leaf_return - -.Lfixunssfdi_smallshift: - movi xl, 0 - src xl, xh, xl - srl xh, xh - j .Lfixunssfdi_shifted - -.Lfixunssfdi_nan_or_inf: - /* Handle Infinity and NaN. */ - slli a4, a2, 9 - beqz a4, .Lfixunssfdi_maxint - - /* Translate NaN to 0xffffffff.... */ -1: movi xh, -1 - movi xl, -1 - leaf_return - -.Lfixunssfdi_maxint: - bgez a2, 1b -2: slli xh, a6, 8 /* 0x80000000 */ - movi xl, 0 - leaf_return - -.Lfixunssfdi_zero: - movi xh, 0 - movi xl, 0 - leaf_return - -.Lfixunssfdi_bigexp: - /* Handle unsigned maximum exponent case. */ - bltz a7, 2b - movi xl, 0 - leaf_return /* no shift needed */ - -#endif /* L_fixunssfdi */ - -#ifdef L_floatsisf - - .align 4 - .global __floatunsisf - .type __floatunsisf, @function -__floatunsisf: - leaf_entry sp, 16 - beqz a2, .Lfloatsisf_return - - /* Set the sign to zero and jump to the floatsisf code. */ - movi a7, 0 - j .Lfloatsisf_normalize - - .align 4 - .global __floatsisf - .type __floatsisf, @function -__floatsisf: - leaf_entry sp, 16 - - /* Check for zero. */ - beqz a2, .Lfloatsisf_return - - /* Save the sign. */ - extui a7, a2, 31, 1 - - /* Get the absolute value. */ -#if XCHAL_HAVE_ABS - abs a2, a2 -#else - neg a4, a2 - movltz a2, a4, a2 -#endif - -.Lfloatsisf_normalize: - /* Normalize with the first 1 bit in the msb. */ - do_nsau a4, a2, a5, a6 - ssl a4 - sll a5, a2 - - /* Shift the mantissa into position, with rounding bits in a6. */ - srli a2, a5, 8 - slli a6, a5, (32 - 8) - - /* Set the exponent. */ - movi a5, 0x9d /* 0x7e + 31 */ - sub a5, a5, a4 - slli a5, a5, 23 - add a2, a2, a5 - - /* Add the sign. */ - slli a7, a7, 31 - or a2, a2, a7 - - /* Round up if the leftover fraction is >= 1/2. */ - bgez a6, .Lfloatsisf_return - addi a2, a2, 1 /* Overflow to the exponent is OK. */ - - /* Check if the leftover fraction is exactly 1/2. */ - slli a6, a6, 1 - beqz a6, .Lfloatsisf_exactlyhalf - -.Lfloatsisf_return: - leaf_return - -.Lfloatsisf_exactlyhalf: - /* Round down to the nearest even value. */ - srli a2, a2, 1 - slli a2, a2, 1 - leaf_return - -#endif /* L_floatsisf */ - -#ifdef L_floatdisf - - .align 4 - .global __floatundisf - .type __floatundisf, @function -__floatundisf: - leaf_entry sp, 16 - - /* Check for zero. */ - or a4, xh, xl - beqz a4, 2f - - /* Set the sign to zero and jump to the floatdisf code. */ - movi a7, 0 - j .Lfloatdisf_normalize - - .align 4 - .global __floatdisf - .type __floatdisf, @function -__floatdisf: - leaf_entry sp, 16 - - /* Check for zero. */ - or a4, xh, xl - beqz a4, 2f - - /* Save the sign. */ - extui a7, xh, 31, 1 - - /* Get the absolute value. */ - bgez xh, .Lfloatdisf_normalize - neg xl, xl - neg xh, xh - beqz xl, .Lfloatdisf_normalize - addi xh, xh, -1 - -.Lfloatdisf_normalize: - /* Normalize with the first 1 bit in the msb of xh. */ - beqz xh, .Lfloatdisf_bigshift - do_nsau a4, xh, a5, a6 - ssl a4 - src xh, xh, xl - sll xl, xl - -.Lfloatdisf_shifted: - /* Shift the mantissa into position, with rounding bits in a6. */ - ssai 8 - sll a5, xl - src a6, xh, xl - srl xh, xh - beqz a5, 1f - movi a5, 1 - or a6, a6, a5 -1: - /* Set the exponent. */ - movi a5, 0xbd /* 0x7e + 63 */ - sub a5, a5, a4 - slli a5, a5, 23 - add a2, xh, a5 - - /* Add the sign. */ - slli a7, a7, 31 - or a2, a2, a7 - - /* Round up if the leftover fraction is >= 1/2. */ - bgez a6, 2f - addi a2, a2, 1 /* Overflow to the exponent is OK. */ - - /* Check if the leftover fraction is exactly 1/2. */ - slli a6, a6, 1 - beqz a6, .Lfloatdisf_exactlyhalf -2: leaf_return - -.Lfloatdisf_bigshift: - /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */ - do_nsau a4, xl, a5, a6 - ssl a4 - sll xh, xl - movi xl, 0 - addi a4, a4, 32 - j .Lfloatdisf_shifted - -.Lfloatdisf_exactlyhalf: - /* Round down to the nearest even value. */ - srli a2, a2, 1 - slli a2, a2, 1 - leaf_return - -#endif /* L_floatdisf */ diff --git a/gcc/config/xtensa/lib1funcs.asm b/gcc/config/xtensa/lib1funcs.asm deleted file mode 100644 index 071b9171177..00000000000 --- a/gcc/config/xtensa/lib1funcs.asm +++ /dev/null @@ -1,845 +0,0 @@ -/* Assembly functions for the Xtensa version of libgcc1. - Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007, 2009 - Free Software Foundation, Inc. - Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free -Software Foundation; either version 3, or (at your option) any later -version. - -GCC is distributed in the hope that it will be useful, but WITHOUT ANY -WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -<http://www.gnu.org/licenses/>. */ - -#include "xtensa-config.h" - -/* Define macros for the ABS and ADDX* instructions to handle cases - where they are not included in the Xtensa processor configuration. */ - - .macro do_abs dst, src, tmp -#if XCHAL_HAVE_ABS - abs \dst, \src -#else - neg \tmp, \src - movgez \tmp, \src, \src - mov \dst, \tmp -#endif - .endm - - .macro do_addx2 dst, as, at, tmp -#if XCHAL_HAVE_ADDX - addx2 \dst, \as, \at -#else - slli \tmp, \as, 1 - add \dst, \tmp, \at -#endif - .endm - - .macro do_addx4 dst, as, at, tmp -#if XCHAL_HAVE_ADDX - addx4 \dst, \as, \at -#else - slli \tmp, \as, 2 - add \dst, \tmp, \at -#endif - .endm - - .macro do_addx8 dst, as, at, tmp -#if XCHAL_HAVE_ADDX - addx8 \dst, \as, \at -#else - slli \tmp, \as, 3 - add \dst, \tmp, \at -#endif - .endm - -/* Define macros for leaf function entry and return, supporting either the - standard register windowed ABI or the non-windowed call0 ABI. These - macros do not allocate any extra stack space, so they only work for - leaf functions that do not need to spill anything to the stack. */ - - .macro leaf_entry reg, size -#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ - entry \reg, \size -#else - /* do nothing */ -#endif - .endm - - .macro leaf_return -#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ - retw -#else - ret -#endif - .endm - - -#ifdef L_mulsi3 - .align 4 - .global __mulsi3 - .type __mulsi3, @function -__mulsi3: - leaf_entry sp, 16 - -#if XCHAL_HAVE_MUL32 - mull a2, a2, a3 - -#elif XCHAL_HAVE_MUL16 - or a4, a2, a3 - srai a4, a4, 16 - bnez a4, .LMUL16 - mul16u a2, a2, a3 - leaf_return -.LMUL16: - srai a4, a2, 16 - srai a5, a3, 16 - mul16u a7, a4, a3 - mul16u a6, a5, a2 - mul16u a4, a2, a3 - add a7, a7, a6 - slli a7, a7, 16 - add a2, a7, a4 - -#elif XCHAL_HAVE_MAC16 - mul.aa.hl a2, a3 - mula.aa.lh a2, a3 - rsr a5, ACCLO - umul.aa.ll a2, a3 - rsr a4, ACCLO - slli a5, a5, 16 - add a2, a4, a5 - -#else /* !MUL32 && !MUL16 && !MAC16 */ - - /* Multiply one bit at a time, but unroll the loop 4x to better - exploit the addx instructions and avoid overhead. - Peel the first iteration to save a cycle on init. */ - - /* Avoid negative numbers. */ - xor a5, a2, a3 /* Top bit is 1 if one input is negative. */ - do_abs a3, a3, a6 - do_abs a2, a2, a6 - - /* Swap so the second argument is smaller. */ - sub a7, a2, a3 - mov a4, a3 - movgez a4, a2, a7 /* a4 = max (a2, a3) */ - movltz a3, a2, a7 /* a3 = min (a2, a3) */ - - movi a2, 0 - extui a6, a3, 0, 1 - movnez a2, a4, a6 - - do_addx2 a7, a4, a2, a7 - extui a6, a3, 1, 1 - movnez a2, a7, a6 - - do_addx4 a7, a4, a2, a7 - extui a6, a3, 2, 1 - movnez a2, a7, a6 - - do_addx8 a7, a4, a2, a7 - extui a6, a3, 3, 1 - movnez a2, a7, a6 - - bgeui a3, 16, .Lmult_main_loop - neg a3, a2 - movltz a2, a3, a5 - leaf_return - - .align 4 -.Lmult_main_loop: - srli a3, a3, 4 - slli a4, a4, 4 - - add a7, a4, a2 - extui a6, a3, 0, 1 - movnez a2, a7, a6 - - do_addx2 a7, a4, a2, a7 - extui a6, a3, 1, 1 - movnez a2, a7, a6 - - do_addx4 a7, a4, a2, a7 - extui a6, a3, 2, 1 - movnez a2, a7, a6 - - do_addx8 a7, a4, a2, a7 - extui a6, a3, 3, 1 - movnez a2, a7, a6 - - bgeui a3, 16, .Lmult_main_loop - - neg a3, a2 - movltz a2, a3, a5 - -#endif /* !MUL32 && !MUL16 && !MAC16 */ - - leaf_return - .size __mulsi3, . - __mulsi3 - -#endif /* L_mulsi3 */ - - -#ifdef L_umulsidi3 - -#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 -#define XCHAL_NO_MUL 1 -#endif - - .align 4 - .global __umulsidi3 - .type __umulsidi3, @function -__umulsidi3: -#if __XTENSA_CALL0_ABI__ - leaf_entry sp, 32 - addi sp, sp, -32 - s32i a12, sp, 16 - s32i a13, sp, 20 - s32i a14, sp, 24 - s32i a15, sp, 28 -#elif XCHAL_NO_MUL - /* This is not really a leaf function; allocate enough stack space - to allow CALL12s to a helper function. */ - leaf_entry sp, 48 -#else - leaf_entry sp, 16 -#endif - -#ifdef __XTENSA_EB__ -#define wh a2 -#define wl a3 -#else -#define wh a3 -#define wl a2 -#endif /* __XTENSA_EB__ */ - - /* This code is taken from the mulsf3 routine in ieee754-sf.S. - See more comments there. */ - -#if XCHAL_HAVE_MUL32_HIGH - mull a6, a2, a3 - muluh wh, a2, a3 - mov wl, a6 - -#else /* ! MUL32_HIGH */ - -#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL - /* a0 and a8 will be clobbered by calling the multiply function - but a8 is not used here and need not be saved. */ - s32i a0, sp, 0 -#endif - -#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 - -#define a2h a4 -#define a3h a5 - - /* Get the high halves of the inputs into registers. */ - srli a2h, a2, 16 - srli a3h, a3, 16 - -#define a2l a2 -#define a3l a3 - -#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16 - /* Clear the high halves of the inputs. This does not matter - for MUL16 because the high bits are ignored. */ - extui a2, a2, 0, 16 - extui a3, a3, 0, 16 -#endif -#endif /* MUL16 || MUL32 */ - - -#if XCHAL_HAVE_MUL16 - -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - mul16u dst, xreg ## xhalf, yreg ## yhalf - -#elif XCHAL_HAVE_MUL32 - -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - mull dst, xreg ## xhalf, yreg ## yhalf - -#elif XCHAL_HAVE_MAC16 - -/* The preprocessor insists on inserting a space when concatenating after - a period in the definition of do_mul below. These macros are a workaround - using underscores instead of periods when doing the concatenation. */ -#define umul_aa_ll umul.aa.ll -#define umul_aa_lh umul.aa.lh -#define umul_aa_hl umul.aa.hl -#define umul_aa_hh umul.aa.hh - -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - umul_aa_ ## xhalf ## yhalf xreg, yreg; \ - rsr dst, ACCLO - -#else /* no multiply hardware */ - -#define set_arg_l(dst, src) \ - extui dst, src, 0, 16 -#define set_arg_h(dst, src) \ - srli dst, src, 16 - -#if __XTENSA_CALL0_ABI__ -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - set_arg_ ## xhalf (a13, xreg); \ - set_arg_ ## yhalf (a14, yreg); \ - call0 .Lmul_mulsi3; \ - mov dst, a12 -#else -#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ - set_arg_ ## xhalf (a14, xreg); \ - set_arg_ ## yhalf (a15, yreg); \ - call12 .Lmul_mulsi3; \ - mov dst, a14 -#endif /* __XTENSA_CALL0_ABI__ */ - -#endif /* no multiply hardware */ - - /* Add pp1 and pp2 into a6 with carry-out in a9. */ - do_mul(a6, a2, l, a3, h) /* pp 1 */ - do_mul(a11, a2, h, a3, l) /* pp 2 */ - movi a9, 0 - add a6, a6, a11 - bgeu a6, a11, 1f - addi a9, a9, 1 -1: - /* Shift the high half of a9/a6 into position in a9. Note that - this value can be safely incremented without any carry-outs. */ - ssai 16 - src a9, a9, a6 - - /* Compute the low word into a6. */ - do_mul(a11, a2, l, a3, l) /* pp 0 */ - sll a6, a6 - add a6, a6, a11 - bgeu a6, a11, 1f - addi a9, a9, 1 -1: - /* Compute the high word into wh. */ - do_mul(wh, a2, h, a3, h) /* pp 3 */ - add wh, wh, a9 - mov wl, a6 - -#endif /* !MUL32_HIGH */ - -#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL - /* Restore the original return address. */ - l32i a0, sp, 0 -#endif -#if __XTENSA_CALL0_ABI__ - l32i a12, sp, 16 - l32i a13, sp, 20 - l32i a14, sp, 24 - l32i a15, sp, 28 - addi sp, sp, 32 -#endif - leaf_return - -#if XCHAL_NO_MUL - - /* For Xtensa processors with no multiply hardware, this simplified - version of _mulsi3 is used for multiplying 16-bit chunks of - the floating-point mantissas. When using CALL0, this function - uses a custom ABI: the inputs are passed in a13 and a14, the - result is returned in a12, and a8 and a15 are clobbered. */ - .align 4 -.Lmul_mulsi3: - leaf_entry sp, 16 - .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2 - movi \dst, 0 -1: add \tmp1, \src2, \dst - extui \tmp2, \src1, 0, 1 - movnez \dst, \tmp1, \tmp2 - - do_addx2 \tmp1, \src2, \dst, \tmp1 - extui \tmp2, \src1, 1, 1 - movnez \dst, \tmp1, \tmp2 - - do_addx4 \tmp1, \src2, \dst, \tmp1 - extui \tmp2, \src1, 2, 1 - movnez \dst, \tmp1, \tmp2 - - do_addx8 \tmp1, \src2, \dst, \tmp1 - extui \tmp2, \src1, 3, 1 - movnez \dst, \tmp1, \tmp2 - - srli \src1, \src1, 4 - slli \src2, \src2, 4 - bnez \src1, 1b - .endm -#if __XTENSA_CALL0_ABI__ - mul_mulsi3_body a12, a13, a14, a15, a8 -#else - /* The result will be written into a2, so save that argument in a4. */ - mov a4, a2 - mul_mulsi3_body a2, a4, a3, a5, a6 -#endif - leaf_return -#endif /* XCHAL_NO_MUL */ - - .size __umulsidi3, . - __umulsidi3 - -#endif /* L_umulsidi3 */ - - -/* Define a macro for the NSAU (unsigned normalize shift amount) - instruction, which computes the number of leading zero bits, - to handle cases where it is not included in the Xtensa processor - configuration. */ - - .macro do_nsau cnt, val, tmp, a -#if XCHAL_HAVE_NSA - nsau \cnt, \val -#else - mov \a, \val - movi \cnt, 0 - extui \tmp, \a, 16, 16 - bnez \tmp, 0f - movi \cnt, 16 - slli \a, \a, 16 -0: - extui \tmp, \a, 24, 8 - bnez \tmp, 1f - addi \cnt, \cnt, 8 - slli \a, \a, 8 -1: - movi \tmp, __nsau_data - extui \a, \a, 24, 8 - add \tmp, \tmp, \a - l8ui \tmp, \tmp, 0 - add \cnt, \cnt, \tmp -#endif /* !XCHAL_HAVE_NSA */ - .endm - -#ifdef L_clz - .section .rodata - .align 4 - .global __nsau_data - .type __nsau_data, @object -__nsau_data: -#if !XCHAL_HAVE_NSA - .byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4 - .byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 - .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 - .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 - .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 - .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 - .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 - .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 - .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -#endif /* !XCHAL_HAVE_NSA */ - .size __nsau_data, . - __nsau_data - .hidden __nsau_data -#endif /* L_clz */ - - -#ifdef L_clzsi2 - .align 4 - .global __clzsi2 - .type __clzsi2, @function -__clzsi2: - leaf_entry sp, 16 - do_nsau a2, a2, a3, a4 - leaf_return - .size __clzsi2, . - __clzsi2 - -#endif /* L_clzsi2 */ - - -#ifdef L_ctzsi2 - .align 4 - .global __ctzsi2 - .type __ctzsi2, @function -__ctzsi2: - leaf_entry sp, 16 - neg a3, a2 - and a3, a3, a2 - do_nsau a2, a3, a4, a5 - neg a2, a2 - addi a2, a2, 31 - leaf_return - .size __ctzsi2, . - __ctzsi2 - -#endif /* L_ctzsi2 */ - - -#ifdef L_ffssi2 - .align 4 - .global __ffssi2 - .type __ffssi2, @function -__ffssi2: - leaf_entry sp, 16 - neg a3, a2 - and a3, a3, a2 - do_nsau a2, a3, a4, a5 - neg a2, a2 - addi a2, a2, 32 - leaf_return - .size __ffssi2, . - __ffssi2 - -#endif /* L_ffssi2 */ - - -#ifdef L_udivsi3 - .align 4 - .global __udivsi3 - .type __udivsi3, @function -__udivsi3: - leaf_entry sp, 16 -#if XCHAL_HAVE_DIV32 - quou a2, a2, a3 -#else - bltui a3, 2, .Lle_one /* check if the divisor <= 1 */ - - mov a6, a2 /* keep dividend in a6 */ - do_nsau a5, a6, a2, a7 /* dividend_shift = nsau (dividend) */ - do_nsau a4, a3, a2, a7 /* divisor_shift = nsau (divisor) */ - bgeu a5, a4, .Lspecial - - sub a4, a4, a5 /* count = divisor_shift - dividend_shift */ - ssl a4 - sll a3, a3 /* divisor <<= count */ - movi a2, 0 /* quotient = 0 */ - - /* test-subtract-and-shift loop; one quotient bit on each iteration */ -#if XCHAL_HAVE_LOOPS - loopnez a4, .Lloopend -#endif /* XCHAL_HAVE_LOOPS */ -.Lloop: - bltu a6, a3, .Lzerobit - sub a6, a6, a3 - addi a2, a2, 1 -.Lzerobit: - slli a2, a2, 1 - srli a3, a3, 1 -#if !XCHAL_HAVE_LOOPS - addi a4, a4, -1 - bnez a4, .Lloop -#endif /* !XCHAL_HAVE_LOOPS */ -.Lloopend: - - bltu a6, a3, .Lreturn - addi a2, a2, 1 /* increment quotient if dividend >= divisor */ -.Lreturn: - leaf_return - -.Lle_one: - beqz a3, .Lerror /* if divisor == 1, return the dividend */ - leaf_return - -.Lspecial: - /* return dividend >= divisor */ - bltu a6, a3, .Lreturn0 - movi a2, 1 - leaf_return - -.Lerror: - /* Divide by zero: Use an illegal instruction to force an exception. - The subsequent "DIV0" string can be recognized by the exception - handler to identify the real cause of the exception. */ - ill - .ascii "DIV0" - -.Lreturn0: - movi a2, 0 -#endif /* XCHAL_HAVE_DIV32 */ - leaf_return - .size __udivsi3, . - __udivsi3 - -#endif /* L_udivsi3 */ - - -#ifdef L_divsi3 - .align 4 - .global __divsi3 - .type __divsi3, @function -__divsi3: - leaf_entry sp, 16 -#if XCHAL_HAVE_DIV32 - quos a2, a2, a3 -#else - xor a7, a2, a3 /* sign = dividend ^ divisor */ - do_abs a6, a2, a4 /* udividend = abs (dividend) */ - do_abs a3, a3, a4 /* udivisor = abs (divisor) */ - bltui a3, 2, .Lle_one /* check if udivisor <= 1 */ - do_nsau a5, a6, a2, a8 /* udividend_shift = nsau (udividend) */ - do_nsau a4, a3, a2, a8 /* udivisor_shift = nsau (udivisor) */ - bgeu a5, a4, .Lspecial - - sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */ - ssl a4 - sll a3, a3 /* udivisor <<= count */ - movi a2, 0 /* quotient = 0 */ - - /* test-subtract-and-shift loop; one quotient bit on each iteration */ -#if XCHAL_HAVE_LOOPS - loopnez a4, .Lloopend -#endif /* XCHAL_HAVE_LOOPS */ -.Lloop: - bltu a6, a3, .Lzerobit - sub a6, a6, a3 - addi a2, a2, 1 -.Lzerobit: - slli a2, a2, 1 - srli a3, a3, 1 -#if !XCHAL_HAVE_LOOPS - addi a4, a4, -1 - bnez a4, .Lloop -#endif /* !XCHAL_HAVE_LOOPS */ -.Lloopend: - - bltu a6, a3, .Lreturn - addi a2, a2, 1 /* increment if udividend >= udivisor */ -.Lreturn: - neg a5, a2 - movltz a2, a5, a7 /* return (sign < 0) ? -quotient : quotient */ - leaf_return - -.Lle_one: - beqz a3, .Lerror - neg a2, a6 /* if udivisor == 1, then return... */ - movgez a2, a6, a7 /* (sign < 0) ? -udividend : udividend */ - leaf_return - -.Lspecial: - bltu a6, a3, .Lreturn0 /* if dividend < divisor, return 0 */ - movi a2, 1 - movi a4, -1 - movltz a2, a4, a7 /* else return (sign < 0) ? -1 : 1 */ - leaf_return - -.Lerror: - /* Divide by zero: Use an illegal instruction to force an exception. - The subsequent "DIV0" string can be recognized by the exception - handler to identify the real cause of the exception. */ - ill - .ascii "DIV0" - -.Lreturn0: - movi a2, 0 -#endif /* XCHAL_HAVE_DIV32 */ - leaf_return - .size __divsi3, . - __divsi3 - -#endif /* L_divsi3 */ - - -#ifdef L_umodsi3 - .align 4 - .global __umodsi3 - .type __umodsi3, @function -__umodsi3: - leaf_entry sp, 16 -#if XCHAL_HAVE_DIV32 - remu a2, a2, a3 -#else - bltui a3, 2, .Lle_one /* check if the divisor is <= 1 */ - - do_nsau a5, a2, a6, a7 /* dividend_shift = nsau (dividend) */ - do_nsau a4, a3, a6, a7 /* divisor_shift = nsau (divisor) */ - bgeu a5, a4, .Lspecial - - sub a4, a4, a5 /* count = divisor_shift - dividend_shift */ - ssl a4 - sll a3, a3 /* divisor <<= count */ - - /* test-subtract-and-shift loop */ -#if XCHAL_HAVE_LOOPS - loopnez a4, .Lloopend -#endif /* XCHAL_HAVE_LOOPS */ -.Lloop: - bltu a2, a3, .Lzerobit - sub a2, a2, a3 -.Lzerobit: - srli a3, a3, 1 -#if !XCHAL_HAVE_LOOPS - addi a4, a4, -1 - bnez a4, .Lloop -#endif /* !XCHAL_HAVE_LOOPS */ -.Lloopend: - -.Lspecial: - bltu a2, a3, .Lreturn - sub a2, a2, a3 /* subtract once more if dividend >= divisor */ -.Lreturn: - leaf_return - -.Lle_one: - bnez a3, .Lreturn0 - - /* Divide by zero: Use an illegal instruction to force an exception. - The subsequent "DIV0" string can be recognized by the exception - handler to identify the real cause of the exception. */ - ill - .ascii "DIV0" - -.Lreturn0: - movi a2, 0 -#endif /* XCHAL_HAVE_DIV32 */ - leaf_return - .size __umodsi3, . - __umodsi3 - -#endif /* L_umodsi3 */ - - -#ifdef L_modsi3 - .align 4 - .global __modsi3 - .type __modsi3, @function -__modsi3: - leaf_entry sp, 16 -#if XCHAL_HAVE_DIV32 - rems a2, a2, a3 -#else - mov a7, a2 /* save original (signed) dividend */ - do_abs a2, a2, a4 /* udividend = abs (dividend) */ - do_abs a3, a3, a4 /* udivisor = abs (divisor) */ - bltui a3, 2, .Lle_one /* check if udivisor <= 1 */ - do_nsau a5, a2, a6, a8 /* udividend_shift = nsau (udividend) */ - do_nsau a4, a3, a6, a8 /* udivisor_shift = nsau (udivisor) */ - bgeu a5, a4, .Lspecial - - sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */ - ssl a4 - sll a3, a3 /* udivisor <<= count */ - - /* test-subtract-and-shift loop */ -#if XCHAL_HAVE_LOOPS - loopnez a4, .Lloopend -#endif /* XCHAL_HAVE_LOOPS */ -.Lloop: - bltu a2, a3, .Lzerobit - sub a2, a2, a3 -.Lzerobit: - srli a3, a3, 1 -#if !XCHAL_HAVE_LOOPS - addi a4, a4, -1 - bnez a4, .Lloop -#endif /* !XCHAL_HAVE_LOOPS */ -.Lloopend: - -.Lspecial: - bltu a2, a3, .Lreturn - sub a2, a2, a3 /* subtract again if udividend >= udivisor */ -.Lreturn: - bgez a7, .Lpositive - neg a2, a2 /* if (dividend < 0), return -udividend */ -.Lpositive: - leaf_return - -.Lle_one: - bnez a3, .Lreturn0 - - /* Divide by zero: Use an illegal instruction to force an exception. - The subsequent "DIV0" string can be recognized by the exception - handler to identify the real cause of the exception. */ - ill - .ascii "DIV0" - -.Lreturn0: - movi a2, 0 -#endif /* XCHAL_HAVE_DIV32 */ - leaf_return - .size __modsi3, . - __modsi3 - -#endif /* L_modsi3 */ - - -#ifdef __XTENSA_EB__ -#define uh a2 -#define ul a3 -#else -#define uh a3 -#define ul a2 -#endif /* __XTENSA_EB__ */ - - -#ifdef L_ashldi3 - .align 4 - .global __ashldi3 - .type __ashldi3, @function -__ashldi3: - leaf_entry sp, 16 - ssl a4 - bgei a4, 32, .Llow_only - src uh, uh, ul - sll ul, ul - leaf_return - -.Llow_only: - sll uh, ul - movi ul, 0 - leaf_return - .size __ashldi3, . - __ashldi3 - -#endif /* L_ashldi3 */ - - -#ifdef L_ashrdi3 - .align 4 - .global __ashrdi3 - .type __ashrdi3, @function -__ashrdi3: - leaf_entry sp, 16 - ssr a4 - bgei a4, 32, .Lhigh_only - src ul, uh, ul - sra uh, uh - leaf_return - -.Lhigh_only: - sra ul, uh - srai uh, uh, 31 - leaf_return - .size __ashrdi3, . - __ashrdi3 - -#endif /* L_ashrdi3 */ - - -#ifdef L_lshrdi3 - .align 4 - .global __lshrdi3 - .type __lshrdi3, @function -__lshrdi3: - leaf_entry sp, 16 - ssr a4 - bgei a4, 32, .Lhigh_only1 - src ul, uh, ul - srl uh, uh - leaf_return - -.Lhigh_only1: - srl ul, uh - movi uh, 0 - leaf_return - .size __lshrdi3, . - __lshrdi3 - -#endif /* L_lshrdi3 */ - - -#include "ieee754-df.S" -#include "ieee754-sf.S" diff --git a/gcc/config/xtensa/t-xtensa b/gcc/config/xtensa/t-xtensa index c0a7cb5202f..31ac2ad2452 100644 --- a/gcc/config/xtensa/t-xtensa +++ b/gcc/config/xtensa/t-xtensa @@ -17,18 +17,6 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -LIB1ASMSRC = xtensa/lib1funcs.asm -LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 _udivsi3 _umodsi3 \ - _umulsidi3 _clz _clzsi2 _ctzsi2 _ffssi2 \ - _ashldi3 _ashrdi3 _lshrdi3 \ - _negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \ - _fixunssfsi _fixunssfdi _floatsisf _floatunsisf \ - _floatdisf _floatundisf \ - _negdf2 _addsubdf3 _muldf3 _divdf3 _cmpdf2 _fixdfsi _fixdfdi \ - _fixunsdfsi _fixunsdfdi _floatsidf _floatunsidf \ - _floatdidf _floatundidf \ - _truncdfsf2 _extendsfdf2 - LIB2FUNCS_EXTRA = $(srcdir)/config/xtensa/lib2funcs.S $(out_object_file): gt-xtensa.h |