summaryrefslogtreecommitdiff
path: root/gcc/config/xtensa
diff options
context:
space:
mode:
authorro <ro@138bc75d-0d04-0410-961f-82ee72b054a4>2011-11-02 15:03:19 +0000
committerro <ro@138bc75d-0d04-0410-961f-82ee72b054a4>2011-11-02 15:03:19 +0000
commit9213d2eb44a8b9bcc432b57e246d9b52d5bdc949 (patch)
treebfbde9a54f663fb7556b9dacd07709ef97c1961c /gcc/config/xtensa
parent237490bf10db39b859bd28598ff64f1bd2c84421 (diff)
downloadgcc-9213d2eb44a8b9bcc432b57e246d9b52d5bdc949.tar.gz
Move libgcc1 to toplevel libgcc
gcc: * Makefile.in (LIB1ASMSRC): Don't export. (libgcc.mvars): Don't emit LIB1ASMFUNCS, LIB1ASMSRC. * config/arm/arm.c: Update lib1funcs.asm filename. * config/arm/linux-eabi.h: Likewise. * config/arm/bpabi-v6m.S, config/arm/bpabi.S, config/arm/ieee754-df.S, config/arm/ieee754-sf.S: Move to ../libgcc/config/arm. * config/arm/lib1funcs.asm: Move to ../libgcc/config/arm/lib1funcs.S. * config/arm/t-arm (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/arm/t-arm-elf (LIB1ASMFUNCS): Remove. * config/arm/t-bpabi: Likewise. * config/arm/t-linux (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/arm/t-linux-eabi (LIB1ASMFUNCS): Remove. * config/arm/t-strongarm-elf: Likewise. * config/arm/t-symbian: Likewise. * config/arm/t-vxworks: Likewise. * config/arm/t-wince-pe: Likewise. * config/avr/libgcc.S: Move to ../libgcc/config/avr. * config/avr/t-avr (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/bfin/lib1funcs.asm: Move to ../libgcc/config/bfin/lib1funcs.S. * config/bfin/t-bfin: Remove. * config/bfin/t-bfin-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/bfin/t-bfin-linux: Likewise. * config/bfin/t-bfin-uclinux: Likewise. * config/c6x/lib1funcs.asm: Move to ../libgcc/config/c6x/lib1funcs.S. * config/c6x/t-c6x-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/fr30/lib1funcs.asm: Move to ../libgcc/config/fr30/lib1funcs.S. * config/fr30/t-fr30 (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/frv/lib1funcs.asm: Move to ../libgcc/config/frv/lib1funcs.S. * config/frv/t-frv (CROSS_LIBGCC1, LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/h8300/fixunssfsi.c: Update lib1funcs.asm filename. * config/h8300/lib1funcs.asm: Move to ../libgcc/config/h8300/lib1funcs.S. * config/h8300/t-h8300 (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/i386/cygwin.asm: Move to ../libgcc/config/i386/cygwin.S. * config/i386/t-cygming (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/i386/t-interix: Likewise. * config/ia64/lib1funcs.asm: Move to ../libgcc/config/ia64/lib1funcs.S. * config/ia64/t-hpux (LIB1ASMFUNCS, LIBGCC1_TEST): Remove. * config/ia64/t-ia64 (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/iq2000/t-iq2000 (LIBGCC1, CROSS_LIBGCC1): Remove. * config/m32c/m32c.c: Update m32c-lib1.S filename. * config/m32c/m32c-lib1.S: Move to ../libgcc/config/m32c/lib1funcs.S. * config/m32c/t-m32c (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/m32r/t-linux (CROSS_LIBGCC1, LIBGCC1, LIBGCC1_TEST): Remove. * config/m68k/lb1sf68.asm: Move to ../libgcc/config/m68k/lb1sf68.S. * config/m68k/t-floatlib (LIB1ASMSRC, LIB1ASMFUNCS): New file. * config/mcore/lib1.asm: Move to ../libgcc/config/mcore/lib1funcs.S. * config/mcore/t-mcore (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/mep/mep-lib1.asm: Move to ../libgcc/config/mep/lib1funcs.S. * config/mep/t-mep (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/mips/mips16.S: Move to ../libgcc/config/mips. * config/mips/t-libgcc-mips16: Remove. * config/mips/t-sr71k (LIBGCC1, CROSS_LIBGCC1): Remove. * config/pa/milli64.S: Move to ../libgcc/config/pa. * config/pa/t-linux (LIB1ASMFUNCS, LIB1ASMSRC): Remove. * config/pa/t-linux64: Likewise. * config/picochip/libgccExtras/fake_libgcc.asm: Move to ../libgcc/config/picochip/lib1funcs.S. * config/picochip/t-picochip (LIB1ASMFUNCS, LIB1ASMSRC): Remove. * config/sh/lib1funcs.asm: Move to ../libgcc/config/sh/lib1funcs.S. * config/sh/lib1funcs.h: Move to ../libgcc/config/sh. * config/sh/sh.h: Update lib1funcs.asm filename. * config/sh/t-linux (LIB1ASMFUNCS_CACHE): Remove. * config/sh/t-netbsd: Likewise. * config/sh/t-sh (LIB1ASMSRC, LIB1ASMFUNCS, LIB1ASMFUNCS_CACHE): Remove. * config/sh/t-sh64 (LIB1ASMFUNCS): Remove. * config/sparc/lb1spc.asm: Move to ../libgcc/config/sparc/lb1spc.S. * config/sparc/lb1spl.asm: Remove. * config/sparc/t-elf (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config/sparc/t-leon: Likewise. * config/spu/t-spu-elf (LIBGCC1, CROSS_LIBGCC1): Remove. * config/v850/lib1funcs.asm: Move to ../libgcc/config/v850/lib1funcs.S. * config/v850/t-v850 (LIB1ASMSRC, LIB1ASMFUNCS): Remove * config/vax/lib1funcs.asm: Move to ../libgcc/config/vax/lib1funcs.S. * config/vax/t-linux: Remove. * config/xtensa/ieee754-df.S, config/xtensa/ieee754-sf.S: Move to ../libgcc/config/xtensa. * config/xtensa/lib1funcs.asm: Move to ../libgcc/config/xtensa/lib1funcs.S. * config/xtensa/t-xtensa (LIB1ASMSRC, LIB1ASMFUNCS): Remove. * config.gcc (bfin*-rtems*): Remove bfin/t-bfin from tmake_file. (bfin*-*): Likewise. (mips64*-*-linux*, mipsisa64*-*-linux*): Remove mips/t-libgcc-mips16 from tmake_file. (mips*-*-linux*): Likewise. (mips*-sde-elf*): Likewise. (mipsisa32-*-elf*, mipsisa32el-*-elf*, mipsisa32r2-*-elf*) (mipsisa32r2el-*-elf*, mipsisa64-*-elf*, mipsisa64el-*-elf*) (mipsisa64r2-*-elf*, mipsisa64r2el-*-elf*): Likewise. (mipsisa64sb1-*-elf*, mipsisa64sb1el-*-elf*): Likewise. (mips-*-elf*, mipsel-*-elf*): Likewise. (mips64-*-elf*, mips64el-*-elf*): Likewise. (mips64orion-*-elf*, mips64orionel-*-elf*): Likewise. (mips*-*-rtems*): Likewise. (mipstx39-*-elf*, mipstx39el-*-elf*): Likewise. (vax-*-linux*): Remove vax/t-linux from tmake_file. libgcc: * Makefile.in ($(lib1asmfuncs-o), $(lib1asmfuncs-s-o)): Use $(srcdir) to refer to $(LIB1ASMSRC). Use $<. * config/arm/bpabi-v6m.S, config/arm/bpabi.S, config/arm/ieee754-df.S, config/arm/ieee754-sf.S, config/arm/lib1funcs.S: New files. * config/arm/libunwind.S [!__symbian__]: Use lib1funcs.S. * config/arm/t-arm: New file. * config/arm/t-bpabi (LIB1ASMFUNCS): Set. * config/arm/t-elf, config/arm/t-linux, config/arm/t-linux-eabi, config/arm/t-strongarm-elf: New files. * config/arm/t-symbian (LIB1ASMFUNCS): Set. * config/arm/t-vxworks, config/arm/t-wince-pe: New files. * config/avr/lib1funcs.S: New file. * config/avr/t-avr (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config/bfin/lib1funcs.S, config/bfin/t-bfin: New files. * config/c6x/lib1funcs.S: New file. * config/c6x/t-elf (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config/fr30/lib1funcs.S, config/fr30/t-fr30: New files. * config/frv/lib1funcs.S: New file. * config/frv/t-frv (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config/h8300/lib1funcs.S, config/h8300/t-h8300: New files. * config/i386/cygwin.S, config/i386/t-chkstk: New files. * config/ia64/__divxf3.asm: Rename to ... * config/ia64/__divxf3.S: ... this. Adapt lib1funcs.asm filename. * config/ia64/_fixtfdi.asm: Rename to ... * config/ia64/_fixtfdi.S: ... this. Adapt lib1funcs.asm filename. * config/ia64/_fixunstfdi.asm: Rename to ... * config/ia64/_fixunstfdi.S: ... this. Adapt lib1funcs.asm filename. * config/ia64/_floatditf.asm: Rename to ... * config/ia64/_floatditf.S: ... this. Adapt lib1funcs.asm filename. * config/ia64/lib1funcs.S: New file. * config/ia64/t-hpux (LIB1ASMFUNCS): Set. * config/ia64/t-ia64 (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config/ia64/t-softfp-compat (libgcc1-tf-compats): Adapt suffix. * config/m32c/lib1funcs.S, config/m32c/t-m32c: New files. * config/m68k/lb1sf68.S, config/m68k/t-floatlib: New files. * config/mcore/lib1funcs.S, config/mcore/t-mcore: New files. * config/mep/lib1funcs.S: New file. * config/mep/t-mep (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config/mips/mips16.S: New file. * config/mips/t-mips16 (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config/pa/milli64.S: New file. * config/pa/t-linux, config/pa/t-linux64: New files. * config/picochip/lib1funcs.S: New file. * config/picochip/t-picochip (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config/sh/lib1funcs.S, config/sh/lib1funcs.h: New files. * config/sh/t-linux (LIB1ASMFUNCS_CACHE): Set. * config/sh/t-netbsd: New file. * config/sh/t-sh (LIB1ASMSRC, LIB1ASMFUNCS, LIB1ASMFUNCS_CACHE): Set. Use $(srcdir) to refer to lib1funcs.S, adapt filename. * config/sh/t-sh64: New file. * config/sparc/lb1spc.S: New file. * config/sparc/t-softmul (LIB1ASMSRC): Adapt sparc/lb1spc.asm filename. * config/v850/lib1funcs.S, config/v850/t-v850: New files. * config/vax/lib1funcs.S, config/vax/t-linux: New files. * config/xtensa/ieee754-df.S, config/xtensa/ieee754-sf.S, config/xtensa/lib1funcs.S: New files. * config/xtensa/t-xtensa (LIB1ASMSRC, LIB1ASMFUNCS): Set. * config.host (arm-wrs-vxworks): Add arm/t-arm, arm/t-vxworks to tmake_file. (arm*-*-freebsd*): Add arm/t-arm, arm/t-strongarm-elf to tmake_file. (arm*-*-netbsdelf*): Add arm/t-arm to tmake_file. (arm*-*-linux*): Likewise. Add arm/t-elf, arm/t-bpabi, arm/t-linux-eabi to tmake_file for arm*-*-linux-*eabi, add arm/t-linux otherwise. (arm*-*-uclinux*): Add arm/t-arm, arm/t-elf to tmake_file. (arm*-*-ecos-elf): Likewise. (arm*-*-eabi*, arm*-*-symbianelf*): Likewise. (arm*-*-rtems*): Likewise. (arm*-*-elf): Likewise. (arm*-wince-pe*): Add arm/t-arm, arm/t-wince-pe to tmake_file. (avr-*-rtems*): Add to tmake_file, add avr/t-avr. (bfin*-elf*): Add bfin/t-bfin to tmake_file. (bfin*-uclinux*): Likewise. (bfin*-linux-uclibc*): Likewise. (bfin*-rtems*): Likewise. (bfin*-*): Likewise. (fido-*-elf): Merge into m68k-*-elf*. (fr30-*-elf)): Add fr30/t-fr30 to tmake_file. (frv-*-*linux*): Add frv/t-frv to tmake_file. (h8300-*-rtems*): Add h8300/t-h8300 to tmake_file. (h8300-*-elf*): Likewise. (hppa*64*-*-linux*): Add pa/t-linux, pa/t-linux64 to tmake_file. (hppa*-*-linux*): Add pa/t-linux to tmake_file. (i[34567]86-*-cygwin*): Add i386/t-chkstk to tmake_file. (i[34567]86-*-mingw*): Likewise. (x86_64-*-mingw*): Likewise. (i[34567]86-*-interix3*): Likewise. (ia64*-*-hpux*): Add ia64/t-ia64, ia64/t-hpux to tmake_file. (ia64-hp-*vms*): Add ia64/t-ia64 to tmake_file. (m68k-*-elf*): Also handle fido-*-elf. Add m68k/t-floatlib to tmake_file. (m68k-*-uclinux*): Add m68k/t-floatlib to tmake_file. (m68k-*-linux*): Likewise. (m68k-*-rtems*): Likewise. (mcore-*-elf): Add mcore/t-mcore to tmake_file. (sh-*-elf*, sh[12346l]*-*-elf*): Add sh/t-sh64 to tmake_file for sh64*-*-*. (sh-*-linux*, sh[2346lbe]*-*-linux*): Add sh/t-sh to tmake_file. Add sh/t-sh64 to tmake_file for sh64*-*-linux*. (sh-*-netbsdelf*, shl*-*-netbsdelf*, sh5-*-netbsd*) (sh5l*-*-netbsd*, sh64-*-netbsd*, sh64l*-*-netbsd*): Add sh/t-sh, sh/t-netbsd to tmake_file. Add sh/t-sh64 to tmake_file for sh5*-*-netbsd*, sh64*-netbsd*. (sh-*-rtems*): Add sh/t-sh to tmake_file. (sh-wrs-vxworks): Likewise. (sparc-*-linux*): Add sparc/t-softmul to tmake_file except for *-leon[3-9]*. (v850*-*-*): Add v850/t-v850 to tmake_file. (vax-*-linux*): Add vax/t-linux to tmake_file. (m32c-*-elf*, m32c-*-rtems*): Add m32c/t-m32c to tmake_file. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@180773 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/xtensa')
-rw-r--r--gcc/config/xtensa/ieee754-df.S2388
-rw-r--r--gcc/config/xtensa/ieee754-sf.S1757
-rw-r--r--gcc/config/xtensa/lib1funcs.asm845
-rw-r--r--gcc/config/xtensa/t-xtensa12
4 files changed, 0 insertions, 5002 deletions
diff --git a/gcc/config/xtensa/ieee754-df.S b/gcc/config/xtensa/ieee754-df.S
deleted file mode 100644
index 9b46889bdc2..00000000000
--- a/gcc/config/xtensa/ieee754-df.S
+++ /dev/null
@@ -1,2388 +0,0 @@
-/* IEEE-754 double-precision functions for Xtensa
- Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
- Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
-
- This file is part of GCC.
-
- GCC is free software; you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
-
- GCC is distributed in the hope that it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
- License for more details.
-
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
-
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
-
-#ifdef __XTENSA_EB__
-#define xh a2
-#define xl a3
-#define yh a4
-#define yl a5
-#else
-#define xh a3
-#define xl a2
-#define yh a5
-#define yl a4
-#endif
-
-/* Warning! The branch displacements for some Xtensa branch instructions
- are quite small, and this code has been carefully laid out to keep
- branch targets in range. If you change anything, be sure to check that
- the assembler is not relaxing anything to branch over a jump. */
-
-#ifdef L_negdf2
-
- .align 4
- .global __negdf2
- .type __negdf2, @function
-__negdf2:
- leaf_entry sp, 16
- movi a4, 0x80000000
- xor xh, xh, a4
- leaf_return
-
-#endif /* L_negdf2 */
-
-#ifdef L_addsubdf3
-
- /* Addition */
-__adddf3_aux:
-
- /* Handle NaNs and Infinities. (This code is placed before the
- start of the function just to keep it in range of the limited
- branch displacements.) */
-
-.Ladd_xnan_or_inf:
- /* If y is neither Infinity nor NaN, return x. */
- bnall yh, a6, 1f
- /* If x is a NaN, return it. Otherwise, return y. */
- slli a7, xh, 12
- or a7, a7, xl
- beqz a7, .Ladd_ynan_or_inf
-1: leaf_return
-
-.Ladd_ynan_or_inf:
- /* Return y. */
- mov xh, yh
- mov xl, yl
- leaf_return
-
-.Ladd_opposite_signs:
- /* Operand signs differ. Do a subtraction. */
- slli a7, a6, 11
- xor yh, yh, a7
- j .Lsub_same_sign
-
- .align 4
- .global __adddf3
- .type __adddf3, @function
-__adddf3:
- leaf_entry sp, 16
- movi a6, 0x7ff00000
-
- /* Check if the two operands have the same sign. */
- xor a7, xh, yh
- bltz a7, .Ladd_opposite_signs
-
-.Ladd_same_sign:
- /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */
- ball xh, a6, .Ladd_xnan_or_inf
- ball yh, a6, .Ladd_ynan_or_inf
-
- /* Compare the exponents. The smaller operand will be shifted
- right by the exponent difference and added to the larger
- one. */
- extui a7, xh, 20, 12
- extui a8, yh, 20, 12
- bltu a7, a8, .Ladd_shiftx
-
-.Ladd_shifty:
- /* Check if the smaller (or equal) exponent is zero. */
- bnone yh, a6, .Ladd_yexpzero
-
- /* Replace yh sign/exponent with 0x001. */
- or yh, yh, a6
- slli yh, yh, 11
- srli yh, yh, 11
-
-.Ladd_yexpdiff:
- /* Compute the exponent difference. Optimize for difference < 32. */
- sub a10, a7, a8
- bgeui a10, 32, .Ladd_bigshifty
-
- /* Shift yh/yl right by the exponent difference. Any bits that are
- shifted out of yl are saved in a9 for rounding the result. */
- ssr a10
- movi a9, 0
- src a9, yl, a9
- src yl, yh, yl
- srl yh, yh
-
-.Ladd_addy:
- /* Do the 64-bit addition. */
- add xl, xl, yl
- add xh, xh, yh
- bgeu xl, yl, 1f
- addi xh, xh, 1
-1:
- /* Check if the add overflowed into the exponent. */
- extui a10, xh, 20, 12
- beq a10, a7, .Ladd_round
- mov a8, a7
- j .Ladd_carry
-
-.Ladd_yexpzero:
- /* y is a subnormal value. Replace its sign/exponent with zero,
- i.e., no implicit "1.0", and increment the apparent exponent
- because subnormals behave as if they had the minimum (nonzero)
- exponent. Test for the case when both exponents are zero. */
- slli yh, yh, 12
- srli yh, yh, 12
- bnone xh, a6, .Ladd_bothexpzero
- addi a8, a8, 1
- j .Ladd_yexpdiff
-
-.Ladd_bothexpzero:
- /* Both exponents are zero. Handle this as a special case. There
- is no need to shift or round, and the normal code for handling
- a carry into the exponent field will not work because it
- assumes there is an implicit "1.0" that needs to be added. */
- add xl, xl, yl
- add xh, xh, yh
- bgeu xl, yl, 1f
- addi xh, xh, 1
-1: leaf_return
-
-.Ladd_bigshifty:
- /* Exponent difference > 64 -- just return the bigger value. */
- bgeui a10, 64, 1b
-
- /* Shift yh/yl right by the exponent difference. Any bits that are
- shifted out are saved in a9 for rounding the result. */
- ssr a10
- sll a11, yl /* lost bits shifted out of yl */
- src a9, yh, yl
- srl yl, yh
- movi yh, 0
- beqz a11, .Ladd_addy
- or a9, a9, a10 /* any positive, nonzero value will work */
- j .Ladd_addy
-
-.Ladd_xexpzero:
- /* Same as "yexpzero" except skip handling the case when both
- exponents are zero. */
- slli xh, xh, 12
- srli xh, xh, 12
- addi a7, a7, 1
- j .Ladd_xexpdiff
-
-.Ladd_shiftx:
- /* Same thing as the "shifty" code, but with x and y swapped. Also,
- because the exponent difference is always nonzero in this version,
- the shift sequence can use SLL and skip loading a constant zero. */
- bnone xh, a6, .Ladd_xexpzero
-
- or xh, xh, a6
- slli xh, xh, 11
- srli xh, xh, 11
-
-.Ladd_xexpdiff:
- sub a10, a8, a7
- bgeui a10, 32, .Ladd_bigshiftx
-
- ssr a10
- sll a9, xl
- src xl, xh, xl
- srl xh, xh
-
-.Ladd_addx:
- add xl, xl, yl
- add xh, xh, yh
- bgeu xl, yl, 1f
- addi xh, xh, 1
-1:
- /* Check if the add overflowed into the exponent. */
- extui a10, xh, 20, 12
- bne a10, a8, .Ladd_carry
-
-.Ladd_round:
- /* Round up if the leftover fraction is >= 1/2. */
- bgez a9, 1f
- addi xl, xl, 1
- beqz xl, .Ladd_roundcarry
-
- /* Check if the leftover fraction is exactly 1/2. */
- slli a9, a9, 1
- beqz a9, .Ladd_exactlyhalf
-1: leaf_return
-
-.Ladd_bigshiftx:
- /* Mostly the same thing as "bigshifty".... */
- bgeui a10, 64, .Ladd_returny
-
- ssr a10
- sll a11, xl
- src a9, xh, xl
- srl xl, xh
- movi xh, 0
- beqz a11, .Ladd_addx
- or a9, a9, a10
- j .Ladd_addx
-
-.Ladd_returny:
- mov xh, yh
- mov xl, yl
- leaf_return
-
-.Ladd_carry:
- /* The addition has overflowed into the exponent field, so the
- value needs to be renormalized. The mantissa of the result
- can be recovered by subtracting the original exponent and
- adding 0x100000 (which is the explicit "1.0" for the
- mantissa of the non-shifted operand -- the "1.0" for the
- shifted operand was already added). The mantissa can then
- be shifted right by one bit. The explicit "1.0" of the
- shifted mantissa then needs to be replaced by the exponent,
- incremented by one to account for the normalizing shift.
- It is faster to combine these operations: do the shift first
- and combine the additions and subtractions. If x is the
- original exponent, the result is:
- shifted mantissa - (x << 19) + (1 << 19) + (x << 20)
- or:
- shifted mantissa + ((x + 1) << 19)
- Note that the exponent is incremented here by leaving the
- explicit "1.0" of the mantissa in the exponent field. */
-
- /* Shift xh/xl right by one bit. Save the lsb of xl. */
- mov a10, xl
- ssai 1
- src xl, xh, xl
- srl xh, xh
-
- /* See explanation above. The original exponent is in a8. */
- addi a8, a8, 1
- slli a8, a8, 19
- add xh, xh, a8
-
- /* Return an Infinity if the exponent overflowed. */
- ball xh, a6, .Ladd_infinity
-
- /* Same thing as the "round" code except the msb of the leftover
- fraction is bit 0 of a10, with the rest of the fraction in a9. */
- bbci.l a10, 0, 1f
- addi xl, xl, 1
- beqz xl, .Ladd_roundcarry
- beqz a9, .Ladd_exactlyhalf
-1: leaf_return
-
-.Ladd_infinity:
- /* Clear the mantissa. */
- movi xl, 0
- srli xh, xh, 20
- slli xh, xh, 20
-
- /* The sign bit may have been lost in a carry-out. Put it back. */
- slli a8, a8, 1
- or xh, xh, a8
- leaf_return
-
-.Ladd_exactlyhalf:
- /* Round down to the nearest even value. */
- srli xl, xl, 1
- slli xl, xl, 1
- leaf_return
-
-.Ladd_roundcarry:
- /* xl is always zero when the rounding increment overflows, so
- there's no need to round it to an even value. */
- addi xh, xh, 1
- /* Overflow to the exponent is OK. */
- leaf_return
-
-
- /* Subtraction */
-__subdf3_aux:
-
- /* Handle NaNs and Infinities. (This code is placed before the
- start of the function just to keep it in range of the limited
- branch displacements.) */
-
-.Lsub_xnan_or_inf:
- /* If y is neither Infinity nor NaN, return x. */
- bnall yh, a6, 1f
- /* Both x and y are either NaN or Inf, so the result is NaN. */
- movi a4, 0x80000 /* make it a quiet NaN */
- or xh, xh, a4
-1: leaf_return
-
-.Lsub_ynan_or_inf:
- /* Negate y and return it. */
- slli a7, a6, 11
- xor xh, yh, a7
- mov xl, yl
- leaf_return
-
-.Lsub_opposite_signs:
- /* Operand signs differ. Do an addition. */
- slli a7, a6, 11
- xor yh, yh, a7
- j .Ladd_same_sign
-
- .align 4
- .global __subdf3
- .type __subdf3, @function
-__subdf3:
- leaf_entry sp, 16
- movi a6, 0x7ff00000
-
- /* Check if the two operands have the same sign. */
- xor a7, xh, yh
- bltz a7, .Lsub_opposite_signs
-
-.Lsub_same_sign:
- /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */
- ball xh, a6, .Lsub_xnan_or_inf
- ball yh, a6, .Lsub_ynan_or_inf
-
- /* Compare the operands. In contrast to addition, the entire
- value matters here. */
- extui a7, xh, 20, 11
- extui a8, yh, 20, 11
- bltu xh, yh, .Lsub_xsmaller
- beq xh, yh, .Lsub_compare_low
-
-.Lsub_ysmaller:
- /* Check if the smaller (or equal) exponent is zero. */
- bnone yh, a6, .Lsub_yexpzero
-
- /* Replace yh sign/exponent with 0x001. */
- or yh, yh, a6
- slli yh, yh, 11
- srli yh, yh, 11
-
-.Lsub_yexpdiff:
- /* Compute the exponent difference. Optimize for difference < 32. */
- sub a10, a7, a8
- bgeui a10, 32, .Lsub_bigshifty
-
- /* Shift yh/yl right by the exponent difference. Any bits that are
- shifted out of yl are saved in a9 for rounding the result. */
- ssr a10
- movi a9, 0
- src a9, yl, a9
- src yl, yh, yl
- srl yh, yh
-
-.Lsub_suby:
- /* Do the 64-bit subtraction. */
- sub xh, xh, yh
- bgeu xl, yl, 1f
- addi xh, xh, -1
-1: sub xl, xl, yl
-
- /* Subtract the leftover bits in a9 from zero and propagate any
- borrow from xh/xl. */
- neg a9, a9
- beqz a9, 1f
- addi a5, xh, -1
- moveqz xh, a5, xl
- addi xl, xl, -1
-1:
- /* Check if the subtract underflowed into the exponent. */
- extui a10, xh, 20, 11
- beq a10, a7, .Lsub_round
- j .Lsub_borrow
-
-.Lsub_compare_low:
- /* The high words are equal. Compare the low words. */
- bltu xl, yl, .Lsub_xsmaller
- bltu yl, xl, .Lsub_ysmaller
- /* The operands are equal. Return 0.0. */
- movi xh, 0
- movi xl, 0
-1: leaf_return
-
-.Lsub_yexpzero:
- /* y is a subnormal value. Replace its sign/exponent with zero,
- i.e., no implicit "1.0". Unless x is also a subnormal, increment
- y's apparent exponent because subnormals behave as if they had
- the minimum (nonzero) exponent. */
- slli yh, yh, 12
- srli yh, yh, 12
- bnone xh, a6, .Lsub_yexpdiff
- addi a8, a8, 1
- j .Lsub_yexpdiff
-
-.Lsub_bigshifty:
- /* Exponent difference > 64 -- just return the bigger value. */
- bgeui a10, 64, 1b
-
- /* Shift yh/yl right by the exponent difference. Any bits that are
- shifted out are saved in a9 for rounding the result. */
- ssr a10
- sll a11, yl /* lost bits shifted out of yl */
- src a9, yh, yl
- srl yl, yh
- movi yh, 0
- beqz a11, .Lsub_suby
- or a9, a9, a10 /* any positive, nonzero value will work */
- j .Lsub_suby
-
-.Lsub_xsmaller:
- /* Same thing as the "ysmaller" code, but with x and y swapped and
- with y negated. */
- bnone xh, a6, .Lsub_xexpzero
-
- or xh, xh, a6
- slli xh, xh, 11
- srli xh, xh, 11
-
-.Lsub_xexpdiff:
- sub a10, a8, a7
- bgeui a10, 32, .Lsub_bigshiftx
-
- ssr a10
- movi a9, 0
- src a9, xl, a9
- src xl, xh, xl
- srl xh, xh
-
- /* Negate y. */
- slli a11, a6, 11
- xor yh, yh, a11
-
-.Lsub_subx:
- sub xl, yl, xl
- sub xh, yh, xh
- bgeu yl, xl, 1f
- addi xh, xh, -1
-1:
- /* Subtract the leftover bits in a9 from zero and propagate any
- borrow from xh/xl. */
- neg a9, a9
- beqz a9, 1f
- addi a5, xh, -1
- moveqz xh, a5, xl
- addi xl, xl, -1
-1:
- /* Check if the subtract underflowed into the exponent. */
- extui a10, xh, 20, 11
- bne a10, a8, .Lsub_borrow
-
-.Lsub_round:
- /* Round up if the leftover fraction is >= 1/2. */
- bgez a9, 1f
- addi xl, xl, 1
- beqz xl, .Lsub_roundcarry
-
- /* Check if the leftover fraction is exactly 1/2. */
- slli a9, a9, 1
- beqz a9, .Lsub_exactlyhalf
-1: leaf_return
-
-.Lsub_xexpzero:
- /* Same as "yexpzero". */
- slli xh, xh, 12
- srli xh, xh, 12
- bnone yh, a6, .Lsub_xexpdiff
- addi a7, a7, 1
- j .Lsub_xexpdiff
-
-.Lsub_bigshiftx:
- /* Mostly the same thing as "bigshifty", but with the sign bit of the
- shifted value set so that the subsequent subtraction flips the
- sign of y. */
- bgeui a10, 64, .Lsub_returny
-
- ssr a10
- sll a11, xl
- src a9, xh, xl
- srl xl, xh
- slli xh, a6, 11 /* set sign bit of xh */
- beqz a11, .Lsub_subx
- or a9, a9, a10
- j .Lsub_subx
-
-.Lsub_returny:
- /* Negate and return y. */
- slli a7, a6, 11
- xor xh, yh, a7
- mov xl, yl
- leaf_return
-
-.Lsub_borrow:
- /* The subtraction has underflowed into the exponent field, so the
- value needs to be renormalized. Shift the mantissa left as
- needed to remove any leading zeros and adjust the exponent
- accordingly. If the exponent is not large enough to remove
- all the leading zeros, the result will be a subnormal value. */
-
- slli a8, xh, 12
- beqz a8, .Lsub_xhzero
- do_nsau a6, a8, a7, a11
- srli a8, a8, 12
- bge a6, a10, .Lsub_subnormal
- addi a6, a6, 1
-
-.Lsub_shift_lt32:
- /* Shift the mantissa (a8/xl/a9) left by a6. */
- ssl a6
- src a8, a8, xl
- src xl, xl, a9
- sll a9, a9
-
- /* Combine the shifted mantissa with the sign and exponent,
- decrementing the exponent by a6. (The exponent has already
- been decremented by one due to the borrow from the subtraction,
- but adding the mantissa will increment the exponent by one.) */
- srli xh, xh, 20
- sub xh, xh, a6
- slli xh, xh, 20
- add xh, xh, a8
- j .Lsub_round
-
-.Lsub_exactlyhalf:
- /* Round down to the nearest even value. */
- srli xl, xl, 1
- slli xl, xl, 1
- leaf_return
-
-.Lsub_roundcarry:
- /* xl is always zero when the rounding increment overflows, so
- there's no need to round it to an even value. */
- addi xh, xh, 1
- /* Overflow to the exponent is OK. */
- leaf_return
-
-.Lsub_xhzero:
- /* When normalizing the result, all the mantissa bits in the high
- word are zero. Shift by "20 + (leading zero count of xl) + 1". */
- do_nsau a6, xl, a7, a11
- addi a6, a6, 21
- blt a10, a6, .Lsub_subnormal
-
-.Lsub_normalize_shift:
- bltui a6, 32, .Lsub_shift_lt32
-
- ssl a6
- src a8, xl, a9
- sll xl, a9
- movi a9, 0
-
- srli xh, xh, 20
- sub xh, xh, a6
- slli xh, xh, 20
- add xh, xh, a8
- j .Lsub_round
-
-.Lsub_subnormal:
- /* The exponent is too small to shift away all the leading zeros.
- Set a6 to the current exponent (which has already been
- decremented by the borrow) so that the exponent of the result
- will be zero. Do not add 1 to a6 in this case, because: (1)
- adding the mantissa will not increment the exponent, so there is
- no need to subtract anything extra from the exponent to
- compensate, and (2) the effective exponent of a subnormal is 1
- not 0 so the shift amount must be 1 smaller than normal. */
- mov a6, a10
- j .Lsub_normalize_shift
-
-#endif /* L_addsubdf3 */
-
-#ifdef L_muldf3
-
- /* Multiplication */
-#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
-#define XCHAL_NO_MUL 1
-#endif
-
-__muldf3_aux:
-
- /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
- (This code is placed before the start of the function just to
- keep it in range of the limited branch displacements.) */
-
-.Lmul_xexpzero:
- /* Clear the sign bit of x. */
- slli xh, xh, 1
- srli xh, xh, 1
-
- /* If x is zero, return zero. */
- or a10, xh, xl
- beqz a10, .Lmul_return_zero
-
- /* Normalize x. Adjust the exponent in a8. */
- beqz xh, .Lmul_xh_zero
- do_nsau a10, xh, a11, a12
- addi a10, a10, -11
- ssl a10
- src xh, xh, xl
- sll xl, xl
- movi a8, 1
- sub a8, a8, a10
- j .Lmul_xnormalized
-.Lmul_xh_zero:
- do_nsau a10, xl, a11, a12
- addi a10, a10, -11
- movi a8, -31
- sub a8, a8, a10
- ssl a10
- bltz a10, .Lmul_xl_srl
- sll xh, xl
- movi xl, 0
- j .Lmul_xnormalized
-.Lmul_xl_srl:
- srl xh, xl
- sll xl, xl
- j .Lmul_xnormalized
-
-.Lmul_yexpzero:
- /* Clear the sign bit of y. */
- slli yh, yh, 1
- srli yh, yh, 1
-
- /* If y is zero, return zero. */
- or a10, yh, yl
- beqz a10, .Lmul_return_zero
-
- /* Normalize y. Adjust the exponent in a9. */
- beqz yh, .Lmul_yh_zero
- do_nsau a10, yh, a11, a12
- addi a10, a10, -11
- ssl a10
- src yh, yh, yl
- sll yl, yl
- movi a9, 1
- sub a9, a9, a10
- j .Lmul_ynormalized
-.Lmul_yh_zero:
- do_nsau a10, yl, a11, a12
- addi a10, a10, -11
- movi a9, -31
- sub a9, a9, a10
- ssl a10
- bltz a10, .Lmul_yl_srl
- sll yh, yl
- movi yl, 0
- j .Lmul_ynormalized
-.Lmul_yl_srl:
- srl yh, yl
- sll yl, yl
- j .Lmul_ynormalized
-
-.Lmul_return_zero:
- /* Return zero with the appropriate sign bit. */
- srli xh, a7, 31
- slli xh, xh, 31
- movi xl, 0
- j .Lmul_done
-
-.Lmul_xnan_or_inf:
- /* If y is zero, return NaN. */
- bnez yl, 1f
- slli a8, yh, 1
- bnez a8, 1f
- movi a4, 0x80000 /* make it a quiet NaN */
- or xh, xh, a4
- j .Lmul_done
-1:
- /* If y is NaN, return y. */
- bnall yh, a6, .Lmul_returnx
- slli a8, yh, 12
- or a8, a8, yl
- beqz a8, .Lmul_returnx
-
-.Lmul_returny:
- mov xh, yh
- mov xl, yl
-
-.Lmul_returnx:
- /* Set the sign bit and return. */
- extui a7, a7, 31, 1
- slli xh, xh, 1
- ssai 1
- src xh, a7, xh
- j .Lmul_done
-
-.Lmul_ynan_or_inf:
- /* If x is zero, return NaN. */
- bnez xl, .Lmul_returny
- slli a8, xh, 1
- bnez a8, .Lmul_returny
- movi a7, 0x80000 /* make it a quiet NaN */
- or xh, yh, a7
- j .Lmul_done
-
- .align 4
- .global __muldf3
- .type __muldf3, @function
-__muldf3:
-#if __XTENSA_CALL0_ABI__
- leaf_entry sp, 32
- addi sp, sp, -32
- s32i a12, sp, 16
- s32i a13, sp, 20
- s32i a14, sp, 24
- s32i a15, sp, 28
-#elif XCHAL_NO_MUL
- /* This is not really a leaf function; allocate enough stack space
- to allow CALL12s to a helper function. */
- leaf_entry sp, 64
-#else
- leaf_entry sp, 32
-#endif
- movi a6, 0x7ff00000
-
- /* Get the sign of the result. */
- xor a7, xh, yh
-
- /* Check for NaN and infinity. */
- ball xh, a6, .Lmul_xnan_or_inf
- ball yh, a6, .Lmul_ynan_or_inf
-
- /* Extract the exponents. */
- extui a8, xh, 20, 11
- extui a9, yh, 20, 11
-
- beqz a8, .Lmul_xexpzero
-.Lmul_xnormalized:
- beqz a9, .Lmul_yexpzero
-.Lmul_ynormalized:
-
- /* Add the exponents. */
- add a8, a8, a9
-
- /* Replace sign/exponent fields with explicit "1.0". */
- movi a10, 0x1fffff
- or xh, xh, a6
- and xh, xh, a10
- or yh, yh, a6
- and yh, yh, a10
-
- /* Multiply 64x64 to 128 bits. The result ends up in xh/xl/a6.
- The least-significant word of the result is thrown away except
- that if it is nonzero, the lsb of a6 is set to 1. */
-#if XCHAL_HAVE_MUL32_HIGH
-
- /* Compute a6 with any carry-outs in a10. */
- movi a10, 0
- mull a6, xl, yh
- mull a11, xh, yl
- add a6, a6, a11
- bgeu a6, a11, 1f
- addi a10, a10, 1
-1:
- muluh a11, xl, yl
- add a6, a6, a11
- bgeu a6, a11, 1f
- addi a10, a10, 1
-1:
- /* If the low word of the result is nonzero, set the lsb of a6. */
- mull a11, xl, yl
- beqz a11, 1f
- movi a9, 1
- or a6, a6, a9
-1:
- /* Compute xl with any carry-outs in a9. */
- movi a9, 0
- mull a11, xh, yh
- add a10, a10, a11
- bgeu a10, a11, 1f
- addi a9, a9, 1
-1:
- muluh a11, xh, yl
- add a10, a10, a11
- bgeu a10, a11, 1f
- addi a9, a9, 1
-1:
- muluh xl, xl, yh
- add xl, xl, a10
- bgeu xl, a10, 1f
- addi a9, a9, 1
-1:
- /* Compute xh. */
- muluh xh, xh, yh
- add xh, xh, a9
-
-#else /* ! XCHAL_HAVE_MUL32_HIGH */
-
- /* Break the inputs into 16-bit chunks and compute 16 32-bit partial
- products. These partial products are:
-
- 0 xll * yll
-
- 1 xll * ylh
- 2 xlh * yll
-
- 3 xll * yhl
- 4 xlh * ylh
- 5 xhl * yll
-
- 6 xll * yhh
- 7 xlh * yhl
- 8 xhl * ylh
- 9 xhh * yll
-
- 10 xlh * yhh
- 11 xhl * yhl
- 12 xhh * ylh
-
- 13 xhl * yhh
- 14 xhh * yhl
-
- 15 xhh * yhh
-
- where the input chunks are (hh, hl, lh, ll). If using the Mul16
- or Mul32 multiplier options, these input chunks must be stored in
- separate registers. For Mac16, the UMUL.AA.* opcodes can specify
- that the inputs come from either half of the registers, so there
- is no need to shift them out ahead of time. If there is no
- multiply hardware, the 16-bit chunks can be extracted when setting
- up the arguments to the separate multiply function. */
-
- /* Save a7 since it is needed to hold a temporary value. */
- s32i a7, sp, 4
-#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
- /* Calling a separate multiply function will clobber a0 and requires
- use of a8 as a temporary, so save those values now. (The function
- uses a custom ABI so nothing else needs to be saved.) */
- s32i a0, sp, 0
- s32i a8, sp, 8
-#endif
-
-#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
-
-#define xlh a12
-#define ylh a13
-#define xhh a14
-#define yhh a15
-
- /* Get the high halves of the inputs into registers. */
- srli xlh, xl, 16
- srli ylh, yl, 16
- srli xhh, xh, 16
- srli yhh, yh, 16
-
-#define xll xl
-#define yll yl
-#define xhl xh
-#define yhl yh
-
-#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
- /* Clear the high halves of the inputs. This does not matter
- for MUL16 because the high bits are ignored. */
- extui xl, xl, 0, 16
- extui xh, xh, 0, 16
- extui yl, yl, 0, 16
- extui yh, yh, 0, 16
-#endif
-#endif /* MUL16 || MUL32 */
-
-
-#if XCHAL_HAVE_MUL16
-
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
- mul16u dst, xreg ## xhalf, yreg ## yhalf
-
-#elif XCHAL_HAVE_MUL32
-
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
- mull dst, xreg ## xhalf, yreg ## yhalf
-
-#elif XCHAL_HAVE_MAC16
-
-/* The preprocessor insists on inserting a space when concatenating after
- a period in the definition of do_mul below. These macros are a workaround
- using underscores instead of periods when doing the concatenation. */
-#define umul_aa_ll umul.aa.ll
-#define umul_aa_lh umul.aa.lh
-#define umul_aa_hl umul.aa.hl
-#define umul_aa_hh umul.aa.hh
-
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
- umul_aa_ ## xhalf ## yhalf xreg, yreg; \
- rsr dst, ACCLO
-
-#else /* no multiply hardware */
-
-#define set_arg_l(dst, src) \
- extui dst, src, 0, 16
-#define set_arg_h(dst, src) \
- srli dst, src, 16
-
-#if __XTENSA_CALL0_ABI__
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
- set_arg_ ## xhalf (a13, xreg); \
- set_arg_ ## yhalf (a14, yreg); \
- call0 .Lmul_mulsi3; \
- mov dst, a12
-#else
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
- set_arg_ ## xhalf (a14, xreg); \
- set_arg_ ## yhalf (a15, yreg); \
- call12 .Lmul_mulsi3; \
- mov dst, a14
-#endif /* __XTENSA_CALL0_ABI__ */
-
-#endif /* no multiply hardware */
-
- /* Add pp1 and pp2 into a10 with carry-out in a9. */
- do_mul(a10, xl, l, yl, h) /* pp 1 */
- do_mul(a11, xl, h, yl, l) /* pp 2 */
- movi a9, 0
- add a10, a10, a11
- bgeu a10, a11, 1f
- addi a9, a9, 1
-1:
- /* Initialize a6 with a9/a10 shifted into position. Note that
- this value can be safely incremented without any carry-outs. */
- ssai 16
- src a6, a9, a10
-
- /* Compute the low word into a10. */
- do_mul(a11, xl, l, yl, l) /* pp 0 */
- sll a10, a10
- add a10, a10, a11
- bgeu a10, a11, 1f
- addi a6, a6, 1
-1:
- /* Compute the contributions of pp0-5 to a6, with carry-outs in a9.
- This is good enough to determine the low half of a6, so that any
- nonzero bits from the low word of the result can be collapsed
- into a6, freeing up a register. */
- movi a9, 0
- do_mul(a11, xl, l, yh, l) /* pp 3 */
- add a6, a6, a11
- bgeu a6, a11, 1f
- addi a9, a9, 1
-1:
- do_mul(a11, xl, h, yl, h) /* pp 4 */
- add a6, a6, a11
- bgeu a6, a11, 1f
- addi a9, a9, 1
-1:
- do_mul(a11, xh, l, yl, l) /* pp 5 */
- add a6, a6, a11
- bgeu a6, a11, 1f
- addi a9, a9, 1
-1:
- /* Collapse any nonzero bits from the low word into a6. */
- beqz a10, 1f
- movi a11, 1
- or a6, a6, a11
-1:
- /* Add pp6-9 into a11 with carry-outs in a10. */
- do_mul(a7, xl, l, yh, h) /* pp 6 */
- do_mul(a11, xh, h, yl, l) /* pp 9 */
- movi a10, 0
- add a11, a11, a7
- bgeu a11, a7, 1f
- addi a10, a10, 1
-1:
- do_mul(a7, xl, h, yh, l) /* pp 7 */
- add a11, a11, a7
- bgeu a11, a7, 1f
- addi a10, a10, 1
-1:
- do_mul(a7, xh, l, yl, h) /* pp 8 */
- add a11, a11, a7
- bgeu a11, a7, 1f
- addi a10, a10, 1
-1:
- /* Shift a10/a11 into position, and add low half of a11 to a6. */
- src a10, a10, a11
- add a10, a10, a9
- sll a11, a11
- add a6, a6, a11
- bgeu a6, a11, 1f
- addi a10, a10, 1
-1:
- /* Add pp10-12 into xl with carry-outs in a9. */
- movi a9, 0
- do_mul(xl, xl, h, yh, h) /* pp 10 */
- add xl, xl, a10
- bgeu xl, a10, 1f
- addi a9, a9, 1
-1:
- do_mul(a10, xh, l, yh, l) /* pp 11 */
- add xl, xl, a10
- bgeu xl, a10, 1f
- addi a9, a9, 1
-1:
- do_mul(a10, xh, h, yl, h) /* pp 12 */
- add xl, xl, a10
- bgeu xl, a10, 1f
- addi a9, a9, 1
-1:
- /* Add pp13-14 into a11 with carry-outs in a10. */
- do_mul(a11, xh, l, yh, h) /* pp 13 */
- do_mul(a7, xh, h, yh, l) /* pp 14 */
- movi a10, 0
- add a11, a11, a7
- bgeu a11, a7, 1f
- addi a10, a10, 1
-1:
- /* Shift a10/a11 into position, and add low half of a11 to a6. */
- src a10, a10, a11
- add a10, a10, a9
- sll a11, a11
- add xl, xl, a11
- bgeu xl, a11, 1f
- addi a10, a10, 1
-1:
- /* Compute xh. */
- do_mul(xh, xh, h, yh, h) /* pp 15 */
- add xh, xh, a10
-
- /* Restore values saved on the stack during the multiplication. */
- l32i a7, sp, 4
-#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
- l32i a0, sp, 0
- l32i a8, sp, 8
-#endif
-#endif /* ! XCHAL_HAVE_MUL32_HIGH */
-
- /* Shift left by 12 bits, unless there was a carry-out from the
- multiply, in which case, shift by 11 bits and increment the
- exponent. Note: It is convenient to use the constant 0x3ff
- instead of 0x400 when removing the extra exponent bias (so that
- it is easy to construct 0x7fe for the overflow check). Reverse
- the logic here to decrement the exponent sum by one unless there
- was a carry-out. */
- movi a4, 11
- srli a5, xh, 21 - 12
- bnez a5, 1f
- addi a4, a4, 1
- addi a8, a8, -1
-1: ssl a4
- src xh, xh, xl
- src xl, xl, a6
- sll a6, a6
-
- /* Subtract the extra bias from the exponent sum (plus one to account
- for the explicit "1.0" of the mantissa that will be added to the
- exponent in the final result). */
- movi a4, 0x3ff
- sub a8, a8, a4
-
- /* Check for over/underflow. The value in a8 is one less than the
- final exponent, so values in the range 0..7fd are OK here. */
- slli a4, a4, 1 /* 0x7fe */
- bgeu a8, a4, .Lmul_overflow
-
-.Lmul_round:
- /* Round. */
- bgez a6, .Lmul_rounded
- addi xl, xl, 1
- beqz xl, .Lmul_roundcarry
- slli a6, a6, 1
- beqz a6, .Lmul_exactlyhalf
-
-.Lmul_rounded:
- /* Add the exponent to the mantissa. */
- slli a8, a8, 20
- add xh, xh, a8
-
-.Lmul_addsign:
- /* Add the sign bit. */
- srli a7, a7, 31
- slli a7, a7, 31
- or xh, xh, a7
-
-.Lmul_done:
-#if __XTENSA_CALL0_ABI__
- l32i a12, sp, 16
- l32i a13, sp, 20
- l32i a14, sp, 24
- l32i a15, sp, 28
- addi sp, sp, 32
-#endif
- leaf_return
-
-.Lmul_exactlyhalf:
- /* Round down to the nearest even value. */
- srli xl, xl, 1
- slli xl, xl, 1
- j .Lmul_rounded
-
-.Lmul_roundcarry:
- /* xl is always zero when the rounding increment overflows, so
- there's no need to round it to an even value. */
- addi xh, xh, 1
- /* Overflow is OK -- it will be added to the exponent. */
- j .Lmul_rounded
-
-.Lmul_overflow:
- bltz a8, .Lmul_underflow
- /* Return +/- Infinity. */
- addi a8, a4, 1 /* 0x7ff */
- slli xh, a8, 20
- movi xl, 0
- j .Lmul_addsign
-
-.Lmul_underflow:
- /* Create a subnormal value, where the exponent field contains zero,
- but the effective exponent is 1. The value of a8 is one less than
- the actual exponent, so just negate it to get the shift amount. */
- neg a8, a8
- mov a9, a6
- ssr a8
- bgeui a8, 32, .Lmul_bigshift
-
- /* Shift xh/xl right. Any bits that are shifted out of xl are saved
- in a6 (combined with the shifted-out bits currently in a6) for
- rounding the result. */
- sll a6, xl
- src xl, xh, xl
- srl xh, xh
- j 1f
-
-.Lmul_bigshift:
- bgeui a8, 64, .Lmul_flush_to_zero
- sll a10, xl /* lost bits shifted out of xl */
- src a6, xh, xl
- srl xl, xh
- movi xh, 0
- or a9, a9, a10
-
- /* Set the exponent to zero. */
-1: movi a8, 0
-
- /* Pack any nonzero bits shifted out into a6. */
- beqz a9, .Lmul_round
- movi a9, 1
- or a6, a6, a9
- j .Lmul_round
-
-.Lmul_flush_to_zero:
- /* Return zero with the appropriate sign bit. */
- srli xh, a7, 31
- slli xh, xh, 31
- movi xl, 0
- j .Lmul_done
-
-#if XCHAL_NO_MUL
-
- /* For Xtensa processors with no multiply hardware, this simplified
- version of _mulsi3 is used for multiplying 16-bit chunks of
- the floating-point mantissas. When using CALL0, this function
- uses a custom ABI: the inputs are passed in a13 and a14, the
- result is returned in a12, and a8 and a15 are clobbered. */
- .align 4
-.Lmul_mulsi3:
- leaf_entry sp, 16
- .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
- movi \dst, 0
-1: add \tmp1, \src2, \dst
- extui \tmp2, \src1, 0, 1
- movnez \dst, \tmp1, \tmp2
-
- do_addx2 \tmp1, \src2, \dst, \tmp1
- extui \tmp2, \src1, 1, 1
- movnez \dst, \tmp1, \tmp2
-
- do_addx4 \tmp1, \src2, \dst, \tmp1
- extui \tmp2, \src1, 2, 1
- movnez \dst, \tmp1, \tmp2
-
- do_addx8 \tmp1, \src2, \dst, \tmp1
- extui \tmp2, \src1, 3, 1
- movnez \dst, \tmp1, \tmp2
-
- srli \src1, \src1, 4
- slli \src2, \src2, 4
- bnez \src1, 1b
- .endm
-#if __XTENSA_CALL0_ABI__
- mul_mulsi3_body a12, a13, a14, a15, a8
-#else
- /* The result will be written into a2, so save that argument in a4. */
- mov a4, a2
- mul_mulsi3_body a2, a4, a3, a5, a6
-#endif
- leaf_return
-#endif /* XCHAL_NO_MUL */
-#endif /* L_muldf3 */
-
-#ifdef L_divdf3
-
- /* Division */
-__divdf3_aux:
-
- /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
- (This code is placed before the start of the function just to
- keep it in range of the limited branch displacements.) */
-
-.Ldiv_yexpzero:
- /* Clear the sign bit of y. */
- slli yh, yh, 1
- srli yh, yh, 1
-
- /* Check for division by zero. */
- or a10, yh, yl
- beqz a10, .Ldiv_yzero
-
- /* Normalize y. Adjust the exponent in a9. */
- beqz yh, .Ldiv_yh_zero
- do_nsau a10, yh, a11, a9
- addi a10, a10, -11
- ssl a10
- src yh, yh, yl
- sll yl, yl
- movi a9, 1
- sub a9, a9, a10
- j .Ldiv_ynormalized
-.Ldiv_yh_zero:
- do_nsau a10, yl, a11, a9
- addi a10, a10, -11
- movi a9, -31
- sub a9, a9, a10
- ssl a10
- bltz a10, .Ldiv_yl_srl
- sll yh, yl
- movi yl, 0
- j .Ldiv_ynormalized
-.Ldiv_yl_srl:
- srl yh, yl
- sll yl, yl
- j .Ldiv_ynormalized
-
-.Ldiv_yzero:
- /* y is zero. Return NaN if x is also zero; otherwise, infinity. */
- slli xh, xh, 1
- srli xh, xh, 1
- or xl, xl, xh
- srli xh, a7, 31
- slli xh, xh, 31
- or xh, xh, a6
- bnez xl, 1f
- movi a4, 0x80000 /* make it a quiet NaN */
- or xh, xh, a4
-1: movi xl, 0
- leaf_return
-
-.Ldiv_xexpzero:
- /* Clear the sign bit of x. */
- slli xh, xh, 1
- srli xh, xh, 1
-
- /* If x is zero, return zero. */
- or a10, xh, xl
- beqz a10, .Ldiv_return_zero
-
- /* Normalize x. Adjust the exponent in a8. */
- beqz xh, .Ldiv_xh_zero
- do_nsau a10, xh, a11, a8
- addi a10, a10, -11
- ssl a10
- src xh, xh, xl
- sll xl, xl
- movi a8, 1
- sub a8, a8, a10
- j .Ldiv_xnormalized
-.Ldiv_xh_zero:
- do_nsau a10, xl, a11, a8
- addi a10, a10, -11
- movi a8, -31
- sub a8, a8, a10
- ssl a10
- bltz a10, .Ldiv_xl_srl
- sll xh, xl
- movi xl, 0
- j .Ldiv_xnormalized
-.Ldiv_xl_srl:
- srl xh, xl
- sll xl, xl
- j .Ldiv_xnormalized
-
-.Ldiv_return_zero:
- /* Return zero with the appropriate sign bit. */
- srli xh, a7, 31
- slli xh, xh, 31
- movi xl, 0
- leaf_return
-
-.Ldiv_xnan_or_inf:
- /* Set the sign bit of the result. */
- srli a7, yh, 31
- slli a7, a7, 31
- xor xh, xh, a7
- /* If y is NaN or Inf, return NaN. */
- bnall yh, a6, 1f
- movi a4, 0x80000 /* make it a quiet NaN */
- or xh, xh, a4
-1: leaf_return
-
-.Ldiv_ynan_or_inf:
- /* If y is Infinity, return zero. */
- slli a8, yh, 12
- or a8, a8, yl
- beqz a8, .Ldiv_return_zero
- /* y is NaN; return it. */
- mov xh, yh
- mov xl, yl
- leaf_return
-
-.Ldiv_highequal1:
- bltu xl, yl, 2f
- j 3f
-
- .align 4
- .global __divdf3
- .type __divdf3, @function
-__divdf3:
- leaf_entry sp, 16
- movi a6, 0x7ff00000
-
- /* Get the sign of the result. */
- xor a7, xh, yh
-
- /* Check for NaN and infinity. */
- ball xh, a6, .Ldiv_xnan_or_inf
- ball yh, a6, .Ldiv_ynan_or_inf
-
- /* Extract the exponents. */
- extui a8, xh, 20, 11
- extui a9, yh, 20, 11
-
- beqz a9, .Ldiv_yexpzero
-.Ldiv_ynormalized:
- beqz a8, .Ldiv_xexpzero
-.Ldiv_xnormalized:
-
- /* Subtract the exponents. */
- sub a8, a8, a9
-
- /* Replace sign/exponent fields with explicit "1.0". */
- movi a10, 0x1fffff
- or xh, xh, a6
- and xh, xh, a10
- or yh, yh, a6
- and yh, yh, a10
-
- /* Set SAR for left shift by one. */
- ssai (32 - 1)
-
- /* The first digit of the mantissa division must be a one.
- Shift x (and adjust the exponent) as needed to make this true. */
- bltu yh, xh, 3f
- beq yh, xh, .Ldiv_highequal1
-2: src xh, xh, xl
- sll xl, xl
- addi a8, a8, -1
-3:
- /* Do the first subtraction and shift. */
- sub xh, xh, yh
- bgeu xl, yl, 1f
- addi xh, xh, -1
-1: sub xl, xl, yl
- src xh, xh, xl
- sll xl, xl
-
- /* Put the quotient into a10/a11. */
- movi a10, 0
- movi a11, 1
-
- /* Divide one bit at a time for 52 bits. */
- movi a9, 52
-#if XCHAL_HAVE_LOOPS
- loop a9, .Ldiv_loopend
-#endif
-.Ldiv_loop:
- /* Shift the quotient << 1. */
- src a10, a10, a11
- sll a11, a11
-
- /* Is this digit a 0 or 1? */
- bltu xh, yh, 3f
- beq xh, yh, .Ldiv_highequal2
-
- /* Output a 1 and subtract. */
-2: addi a11, a11, 1
- sub xh, xh, yh
- bgeu xl, yl, 1f
- addi xh, xh, -1
-1: sub xl, xl, yl
-
- /* Shift the dividend << 1. */
-3: src xh, xh, xl
- sll xl, xl
-
-#if !XCHAL_HAVE_LOOPS
- addi a9, a9, -1
- bnez a9, .Ldiv_loop
-#endif
-.Ldiv_loopend:
-
- /* Add the exponent bias (less one to account for the explicit "1.0"
- of the mantissa that will be added to the exponent in the final
- result). */
- movi a9, 0x3fe
- add a8, a8, a9
-
- /* Check for over/underflow. The value in a8 is one less than the
- final exponent, so values in the range 0..7fd are OK here. */
- addmi a9, a9, 0x400 /* 0x7fe */
- bgeu a8, a9, .Ldiv_overflow
-
-.Ldiv_round:
- /* Round. The remainder (<< 1) is in xh/xl. */
- bltu xh, yh, .Ldiv_rounded
- beq xh, yh, .Ldiv_highequal3
-.Ldiv_roundup:
- addi a11, a11, 1
- beqz a11, .Ldiv_roundcarry
-
-.Ldiv_rounded:
- mov xl, a11
- /* Add the exponent to the mantissa. */
- slli a8, a8, 20
- add xh, a10, a8
-
-.Ldiv_addsign:
- /* Add the sign bit. */
- srli a7, a7, 31
- slli a7, a7, 31
- or xh, xh, a7
- leaf_return
-
-.Ldiv_highequal2:
- bgeu xl, yl, 2b
- j 3b
-
-.Ldiv_highequal3:
- bltu xl, yl, .Ldiv_rounded
- bne xl, yl, .Ldiv_roundup
-
- /* Remainder is exactly half the divisor. Round even. */
- addi a11, a11, 1
- beqz a11, .Ldiv_roundcarry
- srli a11, a11, 1
- slli a11, a11, 1
- j .Ldiv_rounded
-
-.Ldiv_overflow:
- bltz a8, .Ldiv_underflow
- /* Return +/- Infinity. */
- addi a8, a9, 1 /* 0x7ff */
- slli xh, a8, 20
- movi xl, 0
- j .Ldiv_addsign
-
-.Ldiv_underflow:
- /* Create a subnormal value, where the exponent field contains zero,
- but the effective exponent is 1. The value of a8 is one less than
- the actual exponent, so just negate it to get the shift amount. */
- neg a8, a8
- ssr a8
- bgeui a8, 32, .Ldiv_bigshift
-
- /* Shift a10/a11 right. Any bits that are shifted out of a11 are
- saved in a6 for rounding the result. */
- sll a6, a11
- src a11, a10, a11
- srl a10, a10
- j 1f
-
-.Ldiv_bigshift:
- bgeui a8, 64, .Ldiv_flush_to_zero
- sll a9, a11 /* lost bits shifted out of a11 */
- src a6, a10, a11
- srl a11, a10
- movi a10, 0
- or xl, xl, a9
-
- /* Set the exponent to zero. */
-1: movi a8, 0
-
- /* Pack any nonzero remainder (in xh/xl) into a6. */
- or xh, xh, xl
- beqz xh, 1f
- movi a9, 1
- or a6, a6, a9
-
- /* Round a10/a11 based on the bits shifted out into a6. */
-1: bgez a6, .Ldiv_rounded
- addi a11, a11, 1
- beqz a11, .Ldiv_roundcarry
- slli a6, a6, 1
- bnez a6, .Ldiv_rounded
- srli a11, a11, 1
- slli a11, a11, 1
- j .Ldiv_rounded
-
-.Ldiv_roundcarry:
- /* a11 is always zero when the rounding increment overflows, so
- there's no need to round it to an even value. */
- addi a10, a10, 1
- /* Overflow to the exponent field is OK. */
- j .Ldiv_rounded
-
-.Ldiv_flush_to_zero:
- /* Return zero with the appropriate sign bit. */
- srli xh, a7, 31
- slli xh, xh, 31
- movi xl, 0
- leaf_return
-
-#endif /* L_divdf3 */
-
-#ifdef L_cmpdf2
-
- /* Equal and Not Equal */
-
- .align 4
- .global __eqdf2
- .global __nedf2
- .set __nedf2, __eqdf2
- .type __eqdf2, @function
-__eqdf2:
- leaf_entry sp, 16
- bne xl, yl, 2f
- bne xh, yh, 4f
-
- /* The values are equal but NaN != NaN. Check the exponent. */
- movi a6, 0x7ff00000
- ball xh, a6, 3f
-
- /* Equal. */
- movi a2, 0
- leaf_return
-
- /* Not equal. */
-2: movi a2, 1
- leaf_return
-
- /* Check if the mantissas are nonzero. */
-3: slli a7, xh, 12
- or a7, a7, xl
- j 5f
-
- /* Check if x and y are zero with different signs. */
-4: or a7, xh, yh
- slli a7, a7, 1
- or a7, a7, xl /* xl == yl here */
-
- /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
- or x when exponent(x) = 0x7ff and x == y. */
-5: movi a2, 0
- movi a3, 1
- movnez a2, a3, a7
- leaf_return
-
-
- /* Greater Than */
-
- .align 4
- .global __gtdf2
- .type __gtdf2, @function
-__gtdf2:
- leaf_entry sp, 16
- movi a6, 0x7ff00000
- ball xh, a6, 2f
-1: bnall yh, a6, .Lle_cmp
-
- /* Check if y is a NaN. */
- slli a7, yh, 12
- or a7, a7, yl
- beqz a7, .Lle_cmp
- movi a2, 0
- leaf_return
-
- /* Check if x is a NaN. */
-2: slli a7, xh, 12
- or a7, a7, xl
- beqz a7, 1b
- movi a2, 0
- leaf_return
-
-
- /* Less Than or Equal */
-
- .align 4
- .global __ledf2
- .type __ledf2, @function
-__ledf2:
- leaf_entry sp, 16
- movi a6, 0x7ff00000
- ball xh, a6, 2f
-1: bnall yh, a6, .Lle_cmp
-
- /* Check if y is a NaN. */
- slli a7, yh, 12
- or a7, a7, yl
- beqz a7, .Lle_cmp
- movi a2, 1
- leaf_return
-
- /* Check if x is a NaN. */
-2: slli a7, xh, 12
- or a7, a7, xl
- beqz a7, 1b
- movi a2, 1
- leaf_return
-
-.Lle_cmp:
- /* Check if x and y have different signs. */
- xor a7, xh, yh
- bltz a7, .Lle_diff_signs
-
- /* Check if x is negative. */
- bltz xh, .Lle_xneg
-
- /* Check if x <= y. */
- bltu xh, yh, 4f
- bne xh, yh, 5f
- bltu yl, xl, 5f
-4: movi a2, 0
- leaf_return
-
-.Lle_xneg:
- /* Check if y <= x. */
- bltu yh, xh, 4b
- bne yh, xh, 5f
- bgeu xl, yl, 4b
-5: movi a2, 1
- leaf_return
-
-.Lle_diff_signs:
- bltz xh, 4b
-
- /* Check if both x and y are zero. */
- or a7, xh, yh
- slli a7, a7, 1
- or a7, a7, xl
- or a7, a7, yl
- movi a2, 1
- movi a3, 0
- moveqz a2, a3, a7
- leaf_return
-
-
- /* Greater Than or Equal */
-
- .align 4
- .global __gedf2
- .type __gedf2, @function
-__gedf2:
- leaf_entry sp, 16
- movi a6, 0x7ff00000
- ball xh, a6, 2f
-1: bnall yh, a6, .Llt_cmp
-
- /* Check if y is a NaN. */
- slli a7, yh, 12
- or a7, a7, yl
- beqz a7, .Llt_cmp
- movi a2, -1
- leaf_return
-
- /* Check if x is a NaN. */
-2: slli a7, xh, 12
- or a7, a7, xl
- beqz a7, 1b
- movi a2, -1
- leaf_return
-
-
- /* Less Than */
-
- .align 4
- .global __ltdf2
- .type __ltdf2, @function
-__ltdf2:
- leaf_entry sp, 16
- movi a6, 0x7ff00000
- ball xh, a6, 2f
-1: bnall yh, a6, .Llt_cmp
-
- /* Check if y is a NaN. */
- slli a7, yh, 12
- or a7, a7, yl
- beqz a7, .Llt_cmp
- movi a2, 0
- leaf_return
-
- /* Check if x is a NaN. */
-2: slli a7, xh, 12
- or a7, a7, xl
- beqz a7, 1b
- movi a2, 0
- leaf_return
-
-.Llt_cmp:
- /* Check if x and y have different signs. */
- xor a7, xh, yh
- bltz a7, .Llt_diff_signs
-
- /* Check if x is negative. */
- bltz xh, .Llt_xneg
-
- /* Check if x < y. */
- bltu xh, yh, 4f
- bne xh, yh, 5f
- bgeu xl, yl, 5f
-4: movi a2, -1
- leaf_return
-
-.Llt_xneg:
- /* Check if y < x. */
- bltu yh, xh, 4b
- bne yh, xh, 5f
- bltu yl, xl, 4b
-5: movi a2, 0
- leaf_return
-
-.Llt_diff_signs:
- bgez xh, 5b
-
- /* Check if both x and y are nonzero. */
- or a7, xh, yh
- slli a7, a7, 1
- or a7, a7, xl
- or a7, a7, yl
- movi a2, 0
- movi a3, -1
- movnez a2, a3, a7
- leaf_return
-
-
- /* Unordered */
-
- .align 4
- .global __unorddf2
- .type __unorddf2, @function
-__unorddf2:
- leaf_entry sp, 16
- movi a6, 0x7ff00000
- ball xh, a6, 3f
-1: ball yh, a6, 4f
-2: movi a2, 0
- leaf_return
-
-3: slli a7, xh, 12
- or a7, a7, xl
- beqz a7, 1b
- movi a2, 1
- leaf_return
-
-4: slli a7, yh, 12
- or a7, a7, yl
- beqz a7, 2b
- movi a2, 1
- leaf_return
-
-#endif /* L_cmpdf2 */
-
-#ifdef L_fixdfsi
-
- .align 4
- .global __fixdfsi
- .type __fixdfsi, @function
-__fixdfsi:
- leaf_entry sp, 16
-
- /* Check for NaN and Infinity. */
- movi a6, 0x7ff00000
- ball xh, a6, .Lfixdfsi_nan_or_inf
-
- /* Extract the exponent and check if 0 < (exp - 0x3fe) < 32. */
- extui a4, xh, 20, 11
- extui a5, a6, 19, 10 /* 0x3fe */
- sub a4, a4, a5
- bgei a4, 32, .Lfixdfsi_maxint
- blti a4, 1, .Lfixdfsi_zero
-
- /* Add explicit "1.0" and shift << 11. */
- or a7, xh, a6
- ssai (32 - 11)
- src a5, a7, xl
-
- /* Shift back to the right, based on the exponent. */
- ssl a4 /* shift by 32 - a4 */
- srl a5, a5
-
- /* Negate the result if sign != 0. */
- neg a2, a5
- movgez a2, a5, a7
- leaf_return
-
-.Lfixdfsi_nan_or_inf:
- /* Handle Infinity and NaN. */
- slli a4, xh, 12
- or a4, a4, xl
- beqz a4, .Lfixdfsi_maxint
-
- /* Translate NaN to +maxint. */
- movi xh, 0
-
-.Lfixdfsi_maxint:
- slli a4, a6, 11 /* 0x80000000 */
- addi a5, a4, -1 /* 0x7fffffff */
- movgez a4, a5, xh
- mov a2, a4
- leaf_return
-
-.Lfixdfsi_zero:
- movi a2, 0
- leaf_return
-
-#endif /* L_fixdfsi */
-
-#ifdef L_fixdfdi
-
- .align 4
- .global __fixdfdi
- .type __fixdfdi, @function
-__fixdfdi:
- leaf_entry sp, 16
-
- /* Check for NaN and Infinity. */
- movi a6, 0x7ff00000
- ball xh, a6, .Lfixdfdi_nan_or_inf
-
- /* Extract the exponent and check if 0 < (exp - 0x3fe) < 64. */
- extui a4, xh, 20, 11
- extui a5, a6, 19, 10 /* 0x3fe */
- sub a4, a4, a5
- bgei a4, 64, .Lfixdfdi_maxint
- blti a4, 1, .Lfixdfdi_zero
-
- /* Add explicit "1.0" and shift << 11. */
- or a7, xh, a6
- ssai (32 - 11)
- src xh, a7, xl
- sll xl, xl
-
- /* Shift back to the right, based on the exponent. */
- ssl a4 /* shift by 64 - a4 */
- bgei a4, 32, .Lfixdfdi_smallshift
- srl xl, xh
- movi xh, 0
-
-.Lfixdfdi_shifted:
- /* Negate the result if sign != 0. */
- bgez a7, 1f
- neg xl, xl
- neg xh, xh
- beqz xl, 1f
- addi xh, xh, -1
-1: leaf_return
-
-.Lfixdfdi_smallshift:
- src xl, xh, xl
- srl xh, xh
- j .Lfixdfdi_shifted
-
-.Lfixdfdi_nan_or_inf:
- /* Handle Infinity and NaN. */
- slli a4, xh, 12
- or a4, a4, xl
- beqz a4, .Lfixdfdi_maxint
-
- /* Translate NaN to +maxint. */
- movi xh, 0
-
-.Lfixdfdi_maxint:
- slli a7, a6, 11 /* 0x80000000 */
- bgez xh, 1f
- mov xh, a7
- movi xl, 0
- leaf_return
-
-1: addi xh, a7, -1 /* 0x7fffffff */
- movi xl, -1
- leaf_return
-
-.Lfixdfdi_zero:
- movi xh, 0
- movi xl, 0
- leaf_return
-
-#endif /* L_fixdfdi */
-
-#ifdef L_fixunsdfsi
-
- .align 4
- .global __fixunsdfsi
- .type __fixunsdfsi, @function
-__fixunsdfsi:
- leaf_entry sp, 16
-
- /* Check for NaN and Infinity. */
- movi a6, 0x7ff00000
- ball xh, a6, .Lfixunsdfsi_nan_or_inf
-
- /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32. */
- extui a4, xh, 20, 11
- extui a5, a6, 20, 10 /* 0x3ff */
- sub a4, a4, a5
- bgei a4, 32, .Lfixunsdfsi_maxint
- bltz a4, .Lfixunsdfsi_zero
-
- /* Add explicit "1.0" and shift << 11. */
- or a7, xh, a6
- ssai (32 - 11)
- src a5, a7, xl
-
- /* Shift back to the right, based on the exponent. */
- addi a4, a4, 1
- beqi a4, 32, .Lfixunsdfsi_bigexp
- ssl a4 /* shift by 32 - a4 */
- srl a5, a5
-
- /* Negate the result if sign != 0. */
- neg a2, a5
- movgez a2, a5, a7
- leaf_return
-
-.Lfixunsdfsi_nan_or_inf:
- /* Handle Infinity and NaN. */
- slli a4, xh, 12
- or a4, a4, xl
- beqz a4, .Lfixunsdfsi_maxint
-
- /* Translate NaN to 0xffffffff. */
- movi a2, -1
- leaf_return
-
-.Lfixunsdfsi_maxint:
- slli a4, a6, 11 /* 0x80000000 */
- movi a5, -1 /* 0xffffffff */
- movgez a4, a5, xh
- mov a2, a4
- leaf_return
-
-.Lfixunsdfsi_zero:
- movi a2, 0
- leaf_return
-
-.Lfixunsdfsi_bigexp:
- /* Handle unsigned maximum exponent case. */
- bltz xh, 1f
- mov a2, a5 /* no shift needed */
- leaf_return
-
- /* Return 0x80000000 if negative. */
-1: slli a2, a6, 11
- leaf_return
-
-#endif /* L_fixunsdfsi */
-
-#ifdef L_fixunsdfdi
-
- .align 4
- .global __fixunsdfdi
- .type __fixunsdfdi, @function
-__fixunsdfdi:
- leaf_entry sp, 16
-
- /* Check for NaN and Infinity. */
- movi a6, 0x7ff00000
- ball xh, a6, .Lfixunsdfdi_nan_or_inf
-
- /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64. */
- extui a4, xh, 20, 11
- extui a5, a6, 20, 10 /* 0x3ff */
- sub a4, a4, a5
- bgei a4, 64, .Lfixunsdfdi_maxint
- bltz a4, .Lfixunsdfdi_zero
-
- /* Add explicit "1.0" and shift << 11. */
- or a7, xh, a6
- ssai (32 - 11)
- src xh, a7, xl
- sll xl, xl
-
- /* Shift back to the right, based on the exponent. */
- addi a4, a4, 1
- beqi a4, 64, .Lfixunsdfdi_bigexp
- ssl a4 /* shift by 64 - a4 */
- bgei a4, 32, .Lfixunsdfdi_smallshift
- srl xl, xh
- movi xh, 0
-
-.Lfixunsdfdi_shifted:
- /* Negate the result if sign != 0. */
- bgez a7, 1f
- neg xl, xl
- neg xh, xh
- beqz xl, 1f
- addi xh, xh, -1
-1: leaf_return
-
-.Lfixunsdfdi_smallshift:
- src xl, xh, xl
- srl xh, xh
- j .Lfixunsdfdi_shifted
-
-.Lfixunsdfdi_nan_or_inf:
- /* Handle Infinity and NaN. */
- slli a4, xh, 12
- or a4, a4, xl
- beqz a4, .Lfixunsdfdi_maxint
-
- /* Translate NaN to 0xffffffff.... */
-1: movi xh, -1
- movi xl, -1
- leaf_return
-
-.Lfixunsdfdi_maxint:
- bgez xh, 1b
-2: slli xh, a6, 11 /* 0x80000000 */
- movi xl, 0
- leaf_return
-
-.Lfixunsdfdi_zero:
- movi xh, 0
- movi xl, 0
- leaf_return
-
-.Lfixunsdfdi_bigexp:
- /* Handle unsigned maximum exponent case. */
- bltz a7, 2b
- leaf_return /* no shift needed */
-
-#endif /* L_fixunsdfdi */
-
-#ifdef L_floatsidf
-
- .align 4
- .global __floatunsidf
- .type __floatunsidf, @function
-__floatunsidf:
- leaf_entry sp, 16
- beqz a2, .Lfloatsidf_return_zero
-
- /* Set the sign to zero and jump to the floatsidf code. */
- movi a7, 0
- j .Lfloatsidf_normalize
-
- .align 4
- .global __floatsidf
- .type __floatsidf, @function
-__floatsidf:
- leaf_entry sp, 16
-
- /* Check for zero. */
- beqz a2, .Lfloatsidf_return_zero
-
- /* Save the sign. */
- extui a7, a2, 31, 1
-
- /* Get the absolute value. */
-#if XCHAL_HAVE_ABS
- abs a2, a2
-#else
- neg a4, a2
- movltz a2, a4, a2
-#endif
-
-.Lfloatsidf_normalize:
- /* Normalize with the first 1 bit in the msb. */
- do_nsau a4, a2, a5, a6
- ssl a4
- sll a5, a2
-
- /* Shift the mantissa into position. */
- srli xh, a5, 11
- slli xl, a5, (32 - 11)
-
- /* Set the exponent. */
- movi a5, 0x41d /* 0x3fe + 31 */
- sub a5, a5, a4
- slli a5, a5, 20
- add xh, xh, a5
-
- /* Add the sign and return. */
- slli a7, a7, 31
- or xh, xh, a7
- leaf_return
-
-.Lfloatsidf_return_zero:
- movi a3, 0
- leaf_return
-
-#endif /* L_floatsidf */
-
-#ifdef L_floatdidf
-
- .align 4
- .global __floatundidf
- .type __floatundidf, @function
-__floatundidf:
- leaf_entry sp, 16
-
- /* Check for zero. */
- or a4, xh, xl
- beqz a4, 2f
-
- /* Set the sign to zero and jump to the floatdidf code. */
- movi a7, 0
- j .Lfloatdidf_normalize
-
- .align 4
- .global __floatdidf
- .type __floatdidf, @function
-__floatdidf:
- leaf_entry sp, 16
-
- /* Check for zero. */
- or a4, xh, xl
- beqz a4, 2f
-
- /* Save the sign. */
- extui a7, xh, 31, 1
-
- /* Get the absolute value. */
- bgez xh, .Lfloatdidf_normalize
- neg xl, xl
- neg xh, xh
- beqz xl, .Lfloatdidf_normalize
- addi xh, xh, -1
-
-.Lfloatdidf_normalize:
- /* Normalize with the first 1 bit in the msb of xh. */
- beqz xh, .Lfloatdidf_bigshift
- do_nsau a4, xh, a5, a6
- ssl a4
- src xh, xh, xl
- sll xl, xl
-
-.Lfloatdidf_shifted:
- /* Shift the mantissa into position, with rounding bits in a6. */
- ssai 11
- sll a6, xl
- src xl, xh, xl
- srl xh, xh
-
- /* Set the exponent. */
- movi a5, 0x43d /* 0x3fe + 63 */
- sub a5, a5, a4
- slli a5, a5, 20
- add xh, xh, a5
-
- /* Add the sign. */
- slli a7, a7, 31
- or xh, xh, a7
-
- /* Round up if the leftover fraction is >= 1/2. */
- bgez a6, 2f
- addi xl, xl, 1
- beqz xl, .Lfloatdidf_roundcarry
-
- /* Check if the leftover fraction is exactly 1/2. */
- slli a6, a6, 1
- beqz a6, .Lfloatdidf_exactlyhalf
-2: leaf_return
-
-.Lfloatdidf_bigshift:
- /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
- do_nsau a4, xl, a5, a6
- ssl a4
- sll xh, xl
- movi xl, 0
- addi a4, a4, 32
- j .Lfloatdidf_shifted
-
-.Lfloatdidf_exactlyhalf:
- /* Round down to the nearest even value. */
- srli xl, xl, 1
- slli xl, xl, 1
- leaf_return
-
-.Lfloatdidf_roundcarry:
- /* xl is always zero when the rounding increment overflows, so
- there's no need to round it to an even value. */
- addi xh, xh, 1
- /* Overflow to the exponent is OK. */
- leaf_return
-
-#endif /* L_floatdidf */
-
-#ifdef L_truncdfsf2
-
- .align 4
- .global __truncdfsf2
- .type __truncdfsf2, @function
-__truncdfsf2:
- leaf_entry sp, 16
-
- /* Adjust the exponent bias. */
- movi a4, (0x3ff - 0x7f) << 20
- sub a5, xh, a4
-
- /* Check for underflow. */
- xor a6, xh, a5
- bltz a6, .Ltrunc_underflow
- extui a6, a5, 20, 11
- beqz a6, .Ltrunc_underflow
-
- /* Check for overflow. */
- movi a4, 255
- bge a6, a4, .Ltrunc_overflow
-
- /* Shift a5/xl << 3 into a5/a4. */
- ssai (32 - 3)
- src a5, a5, xl
- sll a4, xl
-
-.Ltrunc_addsign:
- /* Add the sign bit. */
- extui a6, xh, 31, 1
- slli a6, a6, 31
- or a2, a6, a5
-
- /* Round up if the leftover fraction is >= 1/2. */
- bgez a4, 1f
- addi a2, a2, 1
- /* Overflow to the exponent is OK. The answer will be correct. */
-
- /* Check if the leftover fraction is exactly 1/2. */
- slli a4, a4, 1
- beqz a4, .Ltrunc_exactlyhalf
-1: leaf_return
-
-.Ltrunc_exactlyhalf:
- /* Round down to the nearest even value. */
- srli a2, a2, 1
- slli a2, a2, 1
- leaf_return
-
-.Ltrunc_overflow:
- /* Check if exponent == 0x7ff. */
- movi a4, 0x7ff00000
- bnall xh, a4, 1f
-
- /* Check if mantissa is nonzero. */
- slli a5, xh, 12
- or a5, a5, xl
- beqz a5, 1f
-
- /* Shift a4 to set a bit in the mantissa, making a quiet NaN. */
- srli a4, a4, 1
-
-1: slli a4, a4, 4 /* 0xff000000 or 0xff800000 */
- /* Add the sign bit. */
- extui a6, xh, 31, 1
- ssai 1
- src a2, a6, a4
- leaf_return
-
-.Ltrunc_underflow:
- /* Find shift count for a subnormal. Flush to zero if >= 32. */
- extui a6, xh, 20, 11
- movi a5, 0x3ff - 0x7f
- sub a6, a5, a6
- addi a6, a6, 1
- bgeui a6, 32, 1f
-
- /* Replace the exponent with an explicit "1.0". */
- slli a5, a5, 13 /* 0x700000 */
- or a5, a5, xh
- slli a5, a5, 11
- srli a5, a5, 11
-
- /* Shift the mantissa left by 3 bits (into a5/a4). */
- ssai (32 - 3)
- src a5, a5, xl
- sll a4, xl
-
- /* Shift right by a6. */
- ssr a6
- sll a7, a4
- src a4, a5, a4
- srl a5, a5
- beqz a7, .Ltrunc_addsign
- or a4, a4, a6 /* any positive, nonzero value will work */
- j .Ltrunc_addsign
-
- /* Return +/- zero. */
-1: extui a2, xh, 31, 1
- slli a2, a2, 31
- leaf_return
-
-#endif /* L_truncdfsf2 */
-
-#ifdef L_extendsfdf2
-
- .align 4
- .global __extendsfdf2
- .type __extendsfdf2, @function
-__extendsfdf2:
- leaf_entry sp, 16
-
- /* Save the sign bit and then shift it off. */
- extui a5, a2, 31, 1
- slli a5, a5, 31
- slli a4, a2, 1
-
- /* Extract and check the exponent. */
- extui a6, a2, 23, 8
- beqz a6, .Lextend_expzero
- addi a6, a6, 1
- beqi a6, 256, .Lextend_nan_or_inf
-
- /* Shift >> 3 into a4/xl. */
- srli a4, a4, 4
- slli xl, a2, (32 - 3)
-
- /* Adjust the exponent bias. */
- movi a6, (0x3ff - 0x7f) << 20
- add a4, a4, a6
-
- /* Add the sign bit. */
- or xh, a4, a5
- leaf_return
-
-.Lextend_nan_or_inf:
- movi a4, 0x7ff00000
-
- /* Check for NaN. */
- slli a7, a2, 9
- beqz a7, 1f
-
- slli a6, a6, 11 /* 0x80000 */
- or a4, a4, a6
-
- /* Add the sign and return. */
-1: or xh, a4, a5
- movi xl, 0
- leaf_return
-
-.Lextend_expzero:
- beqz a4, 1b
-
- /* Normalize it to have 8 zero bits before the first 1 bit. */
- do_nsau a7, a4, a2, a3
- addi a7, a7, -8
- ssl a7
- sll a4, a4
-
- /* Shift >> 3 into a4/xl. */
- slli xl, a4, (32 - 3)
- srli a4, a4, 3
-
- /* Set the exponent. */
- movi a6, 0x3fe - 0x7f
- sub a6, a6, a7
- slli a6, a6, 20
- add a4, a4, a6
-
- /* Add the sign and return. */
- or xh, a4, a5
- leaf_return
-
-#endif /* L_extendsfdf2 */
-
-
diff --git a/gcc/config/xtensa/ieee754-sf.S b/gcc/config/xtensa/ieee754-sf.S
deleted file mode 100644
index d75be0e5ae5..00000000000
--- a/gcc/config/xtensa/ieee754-sf.S
+++ /dev/null
@@ -1,1757 +0,0 @@
-/* IEEE-754 single-precision functions for Xtensa
- Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
- Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
-
- This file is part of GCC.
-
- GCC is free software; you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
-
- GCC is distributed in the hope that it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
- License for more details.
-
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
-
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
-
-#ifdef __XTENSA_EB__
-#define xh a2
-#define xl a3
-#define yh a4
-#define yl a5
-#else
-#define xh a3
-#define xl a2
-#define yh a5
-#define yl a4
-#endif
-
-/* Warning! The branch displacements for some Xtensa branch instructions
- are quite small, and this code has been carefully laid out to keep
- branch targets in range. If you change anything, be sure to check that
- the assembler is not relaxing anything to branch over a jump. */
-
-#ifdef L_negsf2
-
- .align 4
- .global __negsf2
- .type __negsf2, @function
-__negsf2:
- leaf_entry sp, 16
- movi a4, 0x80000000
- xor a2, a2, a4
- leaf_return
-
-#endif /* L_negsf2 */
-
-#ifdef L_addsubsf3
-
- /* Addition */
-__addsf3_aux:
-
- /* Handle NaNs and Infinities. (This code is placed before the
- start of the function just to keep it in range of the limited
- branch displacements.) */
-
-.Ladd_xnan_or_inf:
- /* If y is neither Infinity nor NaN, return x. */
- bnall a3, a6, 1f
- /* If x is a NaN, return it. Otherwise, return y. */
- slli a7, a2, 9
- beqz a7, .Ladd_ynan_or_inf
-1: leaf_return
-
-.Ladd_ynan_or_inf:
- /* Return y. */
- mov a2, a3
- leaf_return
-
-.Ladd_opposite_signs:
- /* Operand signs differ. Do a subtraction. */
- slli a7, a6, 8
- xor a3, a3, a7
- j .Lsub_same_sign
-
- .align 4
- .global __addsf3
- .type __addsf3, @function
-__addsf3:
- leaf_entry sp, 16
- movi a6, 0x7f800000
-
- /* Check if the two operands have the same sign. */
- xor a7, a2, a3
- bltz a7, .Ladd_opposite_signs
-
-.Ladd_same_sign:
- /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
- ball a2, a6, .Ladd_xnan_or_inf
- ball a3, a6, .Ladd_ynan_or_inf
-
- /* Compare the exponents. The smaller operand will be shifted
- right by the exponent difference and added to the larger
- one. */
- extui a7, a2, 23, 9
- extui a8, a3, 23, 9
- bltu a7, a8, .Ladd_shiftx
-
-.Ladd_shifty:
- /* Check if the smaller (or equal) exponent is zero. */
- bnone a3, a6, .Ladd_yexpzero
-
- /* Replace y sign/exponent with 0x008. */
- or a3, a3, a6
- slli a3, a3, 8
- srli a3, a3, 8
-
-.Ladd_yexpdiff:
- /* Compute the exponent difference. */
- sub a10, a7, a8
-
- /* Exponent difference > 32 -- just return the bigger value. */
- bgeui a10, 32, 1f
-
- /* Shift y right by the exponent difference. Any bits that are
- shifted out of y are saved in a9 for rounding the result. */
- ssr a10
- movi a9, 0
- src a9, a3, a9
- srl a3, a3
-
- /* Do the addition. */
- add a2, a2, a3
-
- /* Check if the add overflowed into the exponent. */
- extui a10, a2, 23, 9
- beq a10, a7, .Ladd_round
- mov a8, a7
- j .Ladd_carry
-
-.Ladd_yexpzero:
- /* y is a subnormal value. Replace its sign/exponent with zero,
- i.e., no implicit "1.0", and increment the apparent exponent
- because subnormals behave as if they had the minimum (nonzero)
- exponent. Test for the case when both exponents are zero. */
- slli a3, a3, 9
- srli a3, a3, 9
- bnone a2, a6, .Ladd_bothexpzero
- addi a8, a8, 1
- j .Ladd_yexpdiff
-
-.Ladd_bothexpzero:
- /* Both exponents are zero. Handle this as a special case. There
- is no need to shift or round, and the normal code for handling
- a carry into the exponent field will not work because it
- assumes there is an implicit "1.0" that needs to be added. */
- add a2, a2, a3
-1: leaf_return
-
-.Ladd_xexpzero:
- /* Same as "yexpzero" except skip handling the case when both
- exponents are zero. */
- slli a2, a2, 9
- srli a2, a2, 9
- addi a7, a7, 1
- j .Ladd_xexpdiff
-
-.Ladd_shiftx:
- /* Same thing as the "shifty" code, but with x and y swapped. Also,
- because the exponent difference is always nonzero in this version,
- the shift sequence can use SLL and skip loading a constant zero. */
- bnone a2, a6, .Ladd_xexpzero
-
- or a2, a2, a6
- slli a2, a2, 8
- srli a2, a2, 8
-
-.Ladd_xexpdiff:
- sub a10, a8, a7
- bgeui a10, 32, .Ladd_returny
-
- ssr a10
- sll a9, a2
- srl a2, a2
-
- add a2, a2, a3
-
- /* Check if the add overflowed into the exponent. */
- extui a10, a2, 23, 9
- bne a10, a8, .Ladd_carry
-
-.Ladd_round:
- /* Round up if the leftover fraction is >= 1/2. */
- bgez a9, 1f
- addi a2, a2, 1
-
- /* Check if the leftover fraction is exactly 1/2. */
- slli a9, a9, 1
- beqz a9, .Ladd_exactlyhalf
-1: leaf_return
-
-.Ladd_returny:
- mov a2, a3
- leaf_return
-
-.Ladd_carry:
- /* The addition has overflowed into the exponent field, so the
- value needs to be renormalized. The mantissa of the result
- can be recovered by subtracting the original exponent and
- adding 0x800000 (which is the explicit "1.0" for the
- mantissa of the non-shifted operand -- the "1.0" for the
- shifted operand was already added). The mantissa can then
- be shifted right by one bit. The explicit "1.0" of the
- shifted mantissa then needs to be replaced by the exponent,
- incremented by one to account for the normalizing shift.
- It is faster to combine these operations: do the shift first
- and combine the additions and subtractions. If x is the
- original exponent, the result is:
- shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
- or:
- shifted mantissa + ((x + 1) << 22)
- Note that the exponent is incremented here by leaving the
- explicit "1.0" of the mantissa in the exponent field. */
-
- /* Shift x right by one bit. Save the lsb. */
- mov a10, a2
- srli a2, a2, 1
-
- /* See explanation above. The original exponent is in a8. */
- addi a8, a8, 1
- slli a8, a8, 22
- add a2, a2, a8
-
- /* Return an Infinity if the exponent overflowed. */
- ball a2, a6, .Ladd_infinity
-
- /* Same thing as the "round" code except the msb of the leftover
- fraction is bit 0 of a10, with the rest of the fraction in a9. */
- bbci.l a10, 0, 1f
- addi a2, a2, 1
- beqz a9, .Ladd_exactlyhalf
-1: leaf_return
-
-.Ladd_infinity:
- /* Clear the mantissa. */
- srli a2, a2, 23
- slli a2, a2, 23
-
- /* The sign bit may have been lost in a carry-out. Put it back. */
- slli a8, a8, 1
- or a2, a2, a8
- leaf_return
-
-.Ladd_exactlyhalf:
- /* Round down to the nearest even value. */
- srli a2, a2, 1
- slli a2, a2, 1
- leaf_return
-
-
- /* Subtraction */
-__subsf3_aux:
-
- /* Handle NaNs and Infinities. (This code is placed before the
- start of the function just to keep it in range of the limited
- branch displacements.) */
-
-.Lsub_xnan_or_inf:
- /* If y is neither Infinity nor NaN, return x. */
- bnall a3, a6, 1f
- /* Both x and y are either NaN or Inf, so the result is NaN. */
- movi a4, 0x400000 /* make it a quiet NaN */
- or a2, a2, a4
-1: leaf_return
-
-.Lsub_ynan_or_inf:
- /* Negate y and return it. */
- slli a7, a6, 8
- xor a2, a3, a7
- leaf_return
-
-.Lsub_opposite_signs:
- /* Operand signs differ. Do an addition. */
- slli a7, a6, 8
- xor a3, a3, a7
- j .Ladd_same_sign
-
- .align 4
- .global __subsf3
- .type __subsf3, @function
-__subsf3:
- leaf_entry sp, 16
- movi a6, 0x7f800000
-
- /* Check if the two operands have the same sign. */
- xor a7, a2, a3
- bltz a7, .Lsub_opposite_signs
-
-.Lsub_same_sign:
- /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
- ball a2, a6, .Lsub_xnan_or_inf
- ball a3, a6, .Lsub_ynan_or_inf
-
- /* Compare the operands. In contrast to addition, the entire
- value matters here. */
- extui a7, a2, 23, 8
- extui a8, a3, 23, 8
- bltu a2, a3, .Lsub_xsmaller
-
-.Lsub_ysmaller:
- /* Check if the smaller (or equal) exponent is zero. */
- bnone a3, a6, .Lsub_yexpzero
-
- /* Replace y sign/exponent with 0x008. */
- or a3, a3, a6
- slli a3, a3, 8
- srli a3, a3, 8
-
-.Lsub_yexpdiff:
- /* Compute the exponent difference. */
- sub a10, a7, a8
-
- /* Exponent difference > 32 -- just return the bigger value. */
- bgeui a10, 32, 1f
-
- /* Shift y right by the exponent difference. Any bits that are
- shifted out of y are saved in a9 for rounding the result. */
- ssr a10
- movi a9, 0
- src a9, a3, a9
- srl a3, a3
-
- sub a2, a2, a3
-
- /* Subtract the leftover bits in a9 from zero and propagate any
- borrow from a2. */
- neg a9, a9
- addi a10, a2, -1
- movnez a2, a10, a9
-
- /* Check if the subtract underflowed into the exponent. */
- extui a10, a2, 23, 8
- beq a10, a7, .Lsub_round
- j .Lsub_borrow
-
-.Lsub_yexpzero:
- /* Return zero if the inputs are equal. (For the non-subnormal
- case, subtracting the "1.0" will cause a borrow from the exponent
- and this case can be detected when handling the borrow.) */
- beq a2, a3, .Lsub_return_zero
-
- /* y is a subnormal value. Replace its sign/exponent with zero,
- i.e., no implicit "1.0". Unless x is also a subnormal, increment
- y's apparent exponent because subnormals behave as if they had
- the minimum (nonzero) exponent. */
- slli a3, a3, 9
- srli a3, a3, 9
- bnone a2, a6, .Lsub_yexpdiff
- addi a8, a8, 1
- j .Lsub_yexpdiff
-
-.Lsub_returny:
- /* Negate and return y. */
- slli a7, a6, 8
- xor a2, a3, a7
-1: leaf_return
-
-.Lsub_xsmaller:
- /* Same thing as the "ysmaller" code, but with x and y swapped and
- with y negated. */
- bnone a2, a6, .Lsub_xexpzero
-
- or a2, a2, a6
- slli a2, a2, 8
- srli a2, a2, 8
-
-.Lsub_xexpdiff:
- sub a10, a8, a7
- bgeui a10, 32, .Lsub_returny
-
- ssr a10
- movi a9, 0
- src a9, a2, a9
- srl a2, a2
-
- /* Negate y. */
- slli a11, a6, 8
- xor a3, a3, a11
-
- sub a2, a3, a2
-
- neg a9, a9
- addi a10, a2, -1
- movnez a2, a10, a9
-
- /* Check if the subtract underflowed into the exponent. */
- extui a10, a2, 23, 8
- bne a10, a8, .Lsub_borrow
-
-.Lsub_round:
- /* Round up if the leftover fraction is >= 1/2. */
- bgez a9, 1f
- addi a2, a2, 1
-
- /* Check if the leftover fraction is exactly 1/2. */
- slli a9, a9, 1
- beqz a9, .Lsub_exactlyhalf
-1: leaf_return
-
-.Lsub_xexpzero:
- /* Same as "yexpzero". */
- beq a2, a3, .Lsub_return_zero
- slli a2, a2, 9
- srli a2, a2, 9
- bnone a3, a6, .Lsub_xexpdiff
- addi a7, a7, 1
- j .Lsub_xexpdiff
-
-.Lsub_return_zero:
- movi a2, 0
- leaf_return
-
-.Lsub_borrow:
- /* The subtraction has underflowed into the exponent field, so the
- value needs to be renormalized. Shift the mantissa left as
- needed to remove any leading zeros and adjust the exponent
- accordingly. If the exponent is not large enough to remove
- all the leading zeros, the result will be a subnormal value. */
-
- slli a8, a2, 9
- beqz a8, .Lsub_xzero
- do_nsau a6, a8, a7, a11
- srli a8, a8, 9
- bge a6, a10, .Lsub_subnormal
- addi a6, a6, 1
-
-.Lsub_normalize_shift:
- /* Shift the mantissa (a8/a9) left by a6. */
- ssl a6
- src a8, a8, a9
- sll a9, a9
-
- /* Combine the shifted mantissa with the sign and exponent,
- decrementing the exponent by a6. (The exponent has already
- been decremented by one due to the borrow from the subtraction,
- but adding the mantissa will increment the exponent by one.) */
- srli a2, a2, 23
- sub a2, a2, a6
- slli a2, a2, 23
- add a2, a2, a8
- j .Lsub_round
-
-.Lsub_exactlyhalf:
- /* Round down to the nearest even value. */
- srli a2, a2, 1
- slli a2, a2, 1
- leaf_return
-
-.Lsub_xzero:
- /* If there was a borrow from the exponent, and the mantissa and
- guard digits are all zero, then the inputs were equal and the
- result should be zero. */
- beqz a9, .Lsub_return_zero
-
- /* Only the guard digit is nonzero. Shift by min(24, a10). */
- addi a11, a10, -24
- movi a6, 24
- movltz a6, a10, a11
- j .Lsub_normalize_shift
-
-.Lsub_subnormal:
- /* The exponent is too small to shift away all the leading zeros.
- Set a6 to the current exponent (which has already been
- decremented by the borrow) so that the exponent of the result
- will be zero. Do not add 1 to a6 in this case, because: (1)
- adding the mantissa will not increment the exponent, so there is
- no need to subtract anything extra from the exponent to
- compensate, and (2) the effective exponent of a subnormal is 1
- not 0 so the shift amount must be 1 smaller than normal. */
- mov a6, a10
- j .Lsub_normalize_shift
-
-#endif /* L_addsubsf3 */
-
-#ifdef L_mulsf3
-
- /* Multiplication */
-#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
-#define XCHAL_NO_MUL 1
-#endif
-
-__mulsf3_aux:
-
- /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
- (This code is placed before the start of the function just to
- keep it in range of the limited branch displacements.) */
-
-.Lmul_xexpzero:
- /* Clear the sign bit of x. */
- slli a2, a2, 1
- srli a2, a2, 1
-
- /* If x is zero, return zero. */
- beqz a2, .Lmul_return_zero
-
- /* Normalize x. Adjust the exponent in a8. */
- do_nsau a10, a2, a11, a12
- addi a10, a10, -8
- ssl a10
- sll a2, a2
- movi a8, 1
- sub a8, a8, a10
- j .Lmul_xnormalized
-
-.Lmul_yexpzero:
- /* Clear the sign bit of y. */
- slli a3, a3, 1
- srli a3, a3, 1
-
- /* If y is zero, return zero. */
- beqz a3, .Lmul_return_zero
-
- /* Normalize y. Adjust the exponent in a9. */
- do_nsau a10, a3, a11, a12
- addi a10, a10, -8
- ssl a10
- sll a3, a3
- movi a9, 1
- sub a9, a9, a10
- j .Lmul_ynormalized
-
-.Lmul_return_zero:
- /* Return zero with the appropriate sign bit. */
- srli a2, a7, 31
- slli a2, a2, 31
- j .Lmul_done
-
-.Lmul_xnan_or_inf:
- /* If y is zero, return NaN. */
- slli a8, a3, 1
- bnez a8, 1f
- movi a4, 0x400000 /* make it a quiet NaN */
- or a2, a2, a4
- j .Lmul_done
-1:
- /* If y is NaN, return y. */
- bnall a3, a6, .Lmul_returnx
- slli a8, a3, 9
- beqz a8, .Lmul_returnx
-
-.Lmul_returny:
- mov a2, a3
-
-.Lmul_returnx:
- /* Set the sign bit and return. */
- extui a7, a7, 31, 1
- slli a2, a2, 1
- ssai 1
- src a2, a7, a2
- j .Lmul_done
-
-.Lmul_ynan_or_inf:
- /* If x is zero, return NaN. */
- slli a8, a2, 1
- bnez a8, .Lmul_returny
- movi a7, 0x400000 /* make it a quiet NaN */
- or a2, a3, a7
- j .Lmul_done
-
- .align 4
- .global __mulsf3
- .type __mulsf3, @function
-__mulsf3:
-#if __XTENSA_CALL0_ABI__
- leaf_entry sp, 32
- addi sp, sp, -32
- s32i a12, sp, 16
- s32i a13, sp, 20
- s32i a14, sp, 24
- s32i a15, sp, 28
-#elif XCHAL_NO_MUL
- /* This is not really a leaf function; allocate enough stack space
- to allow CALL12s to a helper function. */
- leaf_entry sp, 64
-#else
- leaf_entry sp, 32
-#endif
- movi a6, 0x7f800000
-
- /* Get the sign of the result. */
- xor a7, a2, a3
-
- /* Check for NaN and infinity. */
- ball a2, a6, .Lmul_xnan_or_inf
- ball a3, a6, .Lmul_ynan_or_inf
-
- /* Extract the exponents. */
- extui a8, a2, 23, 8
- extui a9, a3, 23, 8
-
- beqz a8, .Lmul_xexpzero
-.Lmul_xnormalized:
- beqz a9, .Lmul_yexpzero
-.Lmul_ynormalized:
-
- /* Add the exponents. */
- add a8, a8, a9
-
- /* Replace sign/exponent fields with explicit "1.0". */
- movi a10, 0xffffff
- or a2, a2, a6
- and a2, a2, a10
- or a3, a3, a6
- and a3, a3, a10
-
- /* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */
-
-#if XCHAL_HAVE_MUL32_HIGH
-
- mull a6, a2, a3
- muluh a2, a2, a3
-
-#else
-
- /* Break the inputs into 16-bit chunks and compute 4 32-bit partial
- products. These partial products are:
-
- 0 xl * yl
-
- 1 xl * yh
- 2 xh * yl
-
- 3 xh * yh
-
- If using the Mul16 or Mul32 multiplier options, these input
- chunks must be stored in separate registers. For Mac16, the
- UMUL.AA.* opcodes can specify that the inputs come from either
- half of the registers, so there is no need to shift them out
- ahead of time. If there is no multiply hardware, the 16-bit
- chunks can be extracted when setting up the arguments to the
- separate multiply function. */
-
-#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
- /* Calling a separate multiply function will clobber a0 and requires
- use of a8 as a temporary, so save those values now. (The function
- uses a custom ABI so nothing else needs to be saved.) */
- s32i a0, sp, 0
- s32i a8, sp, 4
-#endif
-
-#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
-
-#define a2h a4
-#define a3h a5
-
- /* Get the high halves of the inputs into registers. */
- srli a2h, a2, 16
- srli a3h, a3, 16
-
-#define a2l a2
-#define a3l a3
-
-#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
- /* Clear the high halves of the inputs. This does not matter
- for MUL16 because the high bits are ignored. */
- extui a2, a2, 0, 16
- extui a3, a3, 0, 16
-#endif
-#endif /* MUL16 || MUL32 */
-
-
-#if XCHAL_HAVE_MUL16
-
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
- mul16u dst, xreg ## xhalf, yreg ## yhalf
-
-#elif XCHAL_HAVE_MUL32
-
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
- mull dst, xreg ## xhalf, yreg ## yhalf
-
-#elif XCHAL_HAVE_MAC16
-
-/* The preprocessor insists on inserting a space when concatenating after
- a period in the definition of do_mul below. These macros are a workaround
- using underscores instead of periods when doing the concatenation. */
-#define umul_aa_ll umul.aa.ll
-#define umul_aa_lh umul.aa.lh
-#define umul_aa_hl umul.aa.hl
-#define umul_aa_hh umul.aa.hh
-
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
- umul_aa_ ## xhalf ## yhalf xreg, yreg; \
- rsr dst, ACCLO
-
-#else /* no multiply hardware */
-
-#define set_arg_l(dst, src) \
- extui dst, src, 0, 16
-#define set_arg_h(dst, src) \
- srli dst, src, 16
-
-#if __XTENSA_CALL0_ABI__
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
- set_arg_ ## xhalf (a13, xreg); \
- set_arg_ ## yhalf (a14, yreg); \
- call0 .Lmul_mulsi3; \
- mov dst, a12
-#else
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
- set_arg_ ## xhalf (a14, xreg); \
- set_arg_ ## yhalf (a15, yreg); \
- call12 .Lmul_mulsi3; \
- mov dst, a14
-#endif /* __XTENSA_CALL0_ABI__ */
-
-#endif /* no multiply hardware */
-
- /* Add pp1 and pp2 into a6 with carry-out in a9. */
- do_mul(a6, a2, l, a3, h) /* pp 1 */
- do_mul(a11, a2, h, a3, l) /* pp 2 */
- movi a9, 0
- add a6, a6, a11
- bgeu a6, a11, 1f
- addi a9, a9, 1
-1:
- /* Shift the high half of a9/a6 into position in a9. Note that
- this value can be safely incremented without any carry-outs. */
- ssai 16
- src a9, a9, a6
-
- /* Compute the low word into a6. */
- do_mul(a11, a2, l, a3, l) /* pp 0 */
- sll a6, a6
- add a6, a6, a11
- bgeu a6, a11, 1f
- addi a9, a9, 1
-1:
- /* Compute the high word into a2. */
- do_mul(a2, a2, h, a3, h) /* pp 3 */
- add a2, a2, a9
-
-#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
- /* Restore values saved on the stack during the multiplication. */
- l32i a0, sp, 0
- l32i a8, sp, 4
-#endif
-#endif /* ! XCHAL_HAVE_MUL32_HIGH */
-
- /* Shift left by 9 bits, unless there was a carry-out from the
- multiply, in which case, shift by 8 bits and increment the
- exponent. */
- movi a4, 9
- srli a5, a2, 24 - 9
- beqz a5, 1f
- addi a4, a4, -1
- addi a8, a8, 1
-1: ssl a4
- src a2, a2, a6
- sll a6, a6
-
- /* Subtract the extra bias from the exponent sum (plus one to account
- for the explicit "1.0" of the mantissa that will be added to the
- exponent in the final result). */
- movi a4, 0x80
- sub a8, a8, a4
-
- /* Check for over/underflow. The value in a8 is one less than the
- final exponent, so values in the range 0..fd are OK here. */
- movi a4, 0xfe
- bgeu a8, a4, .Lmul_overflow
-
-.Lmul_round:
- /* Round. */
- bgez a6, .Lmul_rounded
- addi a2, a2, 1
- slli a6, a6, 1
- beqz a6, .Lmul_exactlyhalf
-
-.Lmul_rounded:
- /* Add the exponent to the mantissa. */
- slli a8, a8, 23
- add a2, a2, a8
-
-.Lmul_addsign:
- /* Add the sign bit. */
- srli a7, a7, 31
- slli a7, a7, 31
- or a2, a2, a7
-
-.Lmul_done:
-#if __XTENSA_CALL0_ABI__
- l32i a12, sp, 16
- l32i a13, sp, 20
- l32i a14, sp, 24
- l32i a15, sp, 28
- addi sp, sp, 32
-#endif
- leaf_return
-
-.Lmul_exactlyhalf:
- /* Round down to the nearest even value. */
- srli a2, a2, 1
- slli a2, a2, 1
- j .Lmul_rounded
-
-.Lmul_overflow:
- bltz a8, .Lmul_underflow
- /* Return +/- Infinity. */
- movi a8, 0xff
- slli a2, a8, 23
- j .Lmul_addsign
-
-.Lmul_underflow:
- /* Create a subnormal value, where the exponent field contains zero,
- but the effective exponent is 1. The value of a8 is one less than
- the actual exponent, so just negate it to get the shift amount. */
- neg a8, a8
- mov a9, a6
- ssr a8
- bgeui a8, 32, .Lmul_flush_to_zero
-
- /* Shift a2 right. Any bits that are shifted out of a2 are saved
- in a6 (combined with the shifted-out bits currently in a6) for
- rounding the result. */
- sll a6, a2
- srl a2, a2
-
- /* Set the exponent to zero. */
- movi a8, 0
-
- /* Pack any nonzero bits shifted out into a6. */
- beqz a9, .Lmul_round
- movi a9, 1
- or a6, a6, a9
- j .Lmul_round
-
-.Lmul_flush_to_zero:
- /* Return zero with the appropriate sign bit. */
- srli a2, a7, 31
- slli a2, a2, 31
- j .Lmul_done
-
-#if XCHAL_NO_MUL
-
- /* For Xtensa processors with no multiply hardware, this simplified
- version of _mulsi3 is used for multiplying 16-bit chunks of
- the floating-point mantissas. When using CALL0, this function
- uses a custom ABI: the inputs are passed in a13 and a14, the
- result is returned in a12, and a8 and a15 are clobbered. */
- .align 4
-.Lmul_mulsi3:
- leaf_entry sp, 16
- .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
- movi \dst, 0
-1: add \tmp1, \src2, \dst
- extui \tmp2, \src1, 0, 1
- movnez \dst, \tmp1, \tmp2
-
- do_addx2 \tmp1, \src2, \dst, \tmp1
- extui \tmp2, \src1, 1, 1
- movnez \dst, \tmp1, \tmp2
-
- do_addx4 \tmp1, \src2, \dst, \tmp1
- extui \tmp2, \src1, 2, 1
- movnez \dst, \tmp1, \tmp2
-
- do_addx8 \tmp1, \src2, \dst, \tmp1
- extui \tmp2, \src1, 3, 1
- movnez \dst, \tmp1, \tmp2
-
- srli \src1, \src1, 4
- slli \src2, \src2, 4
- bnez \src1, 1b
- .endm
-#if __XTENSA_CALL0_ABI__
- mul_mulsi3_body a12, a13, a14, a15, a8
-#else
- /* The result will be written into a2, so save that argument in a4. */
- mov a4, a2
- mul_mulsi3_body a2, a4, a3, a5, a6
-#endif
- leaf_return
-#endif /* XCHAL_NO_MUL */
-#endif /* L_mulsf3 */
-
-#ifdef L_divsf3
-
- /* Division */
-__divsf3_aux:
-
- /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
- (This code is placed before the start of the function just to
- keep it in range of the limited branch displacements.) */
-
-.Ldiv_yexpzero:
- /* Clear the sign bit of y. */
- slli a3, a3, 1
- srli a3, a3, 1
-
- /* Check for division by zero. */
- beqz a3, .Ldiv_yzero
-
- /* Normalize y. Adjust the exponent in a9. */
- do_nsau a10, a3, a4, a5
- addi a10, a10, -8
- ssl a10
- sll a3, a3
- movi a9, 1
- sub a9, a9, a10
- j .Ldiv_ynormalized
-
-.Ldiv_yzero:
- /* y is zero. Return NaN if x is also zero; otherwise, infinity. */
- slli a4, a2, 1
- srli a4, a4, 1
- srli a2, a7, 31
- slli a2, a2, 31
- or a2, a2, a6
- bnez a4, 1f
- movi a4, 0x400000 /* make it a quiet NaN */
- or a2, a2, a4
-1: leaf_return
-
-.Ldiv_xexpzero:
- /* Clear the sign bit of x. */
- slli a2, a2, 1
- srli a2, a2, 1
-
- /* If x is zero, return zero. */
- beqz a2, .Ldiv_return_zero
-
- /* Normalize x. Adjust the exponent in a8. */
- do_nsau a10, a2, a4, a5
- addi a10, a10, -8
- ssl a10
- sll a2, a2
- movi a8, 1
- sub a8, a8, a10
- j .Ldiv_xnormalized
-
-.Ldiv_return_zero:
- /* Return zero with the appropriate sign bit. */
- srli a2, a7, 31
- slli a2, a2, 31
- leaf_return
-
-.Ldiv_xnan_or_inf:
- /* Set the sign bit of the result. */
- srli a7, a3, 31
- slli a7, a7, 31
- xor a2, a2, a7
- /* If y is NaN or Inf, return NaN. */
- bnall a3, a6, 1f
- movi a4, 0x400000 /* make it a quiet NaN */
- or a2, a2, a4
-1: leaf_return
-
-.Ldiv_ynan_or_inf:
- /* If y is Infinity, return zero. */
- slli a8, a3, 9
- beqz a8, .Ldiv_return_zero
- /* y is NaN; return it. */
- mov a2, a3
- leaf_return
-
- .align 4
- .global __divsf3
- .type __divsf3, @function
-__divsf3:
- leaf_entry sp, 16
- movi a6, 0x7f800000
-
- /* Get the sign of the result. */
- xor a7, a2, a3
-
- /* Check for NaN and infinity. */
- ball a2, a6, .Ldiv_xnan_or_inf
- ball a3, a6, .Ldiv_ynan_or_inf
-
- /* Extract the exponents. */
- extui a8, a2, 23, 8
- extui a9, a3, 23, 8
-
- beqz a9, .Ldiv_yexpzero
-.Ldiv_ynormalized:
- beqz a8, .Ldiv_xexpzero
-.Ldiv_xnormalized:
-
- /* Subtract the exponents. */
- sub a8, a8, a9
-
- /* Replace sign/exponent fields with explicit "1.0". */
- movi a10, 0xffffff
- or a2, a2, a6
- and a2, a2, a10
- or a3, a3, a6
- and a3, a3, a10
-
- /* The first digit of the mantissa division must be a one.
- Shift x (and adjust the exponent) as needed to make this true. */
- bltu a3, a2, 1f
- slli a2, a2, 1
- addi a8, a8, -1
-1:
- /* Do the first subtraction and shift. */
- sub a2, a2, a3
- slli a2, a2, 1
-
- /* Put the quotient into a10. */
- movi a10, 1
-
- /* Divide one bit at a time for 23 bits. */
- movi a9, 23
-#if XCHAL_HAVE_LOOPS
- loop a9, .Ldiv_loopend
-#endif
-.Ldiv_loop:
- /* Shift the quotient << 1. */
- slli a10, a10, 1
-
- /* Is this digit a 0 or 1? */
- bltu a2, a3, 1f
-
- /* Output a 1 and subtract. */
- addi a10, a10, 1
- sub a2, a2, a3
-
- /* Shift the dividend << 1. */
-1: slli a2, a2, 1
-
-#if !XCHAL_HAVE_LOOPS
- addi a9, a9, -1
- bnez a9, .Ldiv_loop
-#endif
-.Ldiv_loopend:
-
- /* Add the exponent bias (less one to account for the explicit "1.0"
- of the mantissa that will be added to the exponent in the final
- result). */
- addi a8, a8, 0x7e
-
- /* Check for over/underflow. The value in a8 is one less than the
- final exponent, so values in the range 0..fd are OK here. */
- movi a4, 0xfe
- bgeu a8, a4, .Ldiv_overflow
-
-.Ldiv_round:
- /* Round. The remainder (<< 1) is in a2. */
- bltu a2, a3, .Ldiv_rounded
- addi a10, a10, 1
- beq a2, a3, .Ldiv_exactlyhalf
-
-.Ldiv_rounded:
- /* Add the exponent to the mantissa. */
- slli a8, a8, 23
- add a2, a10, a8
-
-.Ldiv_addsign:
- /* Add the sign bit. */
- srli a7, a7, 31
- slli a7, a7, 31
- or a2, a2, a7
- leaf_return
-
-.Ldiv_overflow:
- bltz a8, .Ldiv_underflow
- /* Return +/- Infinity. */
- addi a8, a4, 1 /* 0xff */
- slli a2, a8, 23
- j .Ldiv_addsign
-
-.Ldiv_exactlyhalf:
- /* Remainder is exactly half the divisor. Round even. */
- srli a10, a10, 1
- slli a10, a10, 1
- j .Ldiv_rounded
-
-.Ldiv_underflow:
- /* Create a subnormal value, where the exponent field contains zero,
- but the effective exponent is 1. The value of a8 is one less than
- the actual exponent, so just negate it to get the shift amount. */
- neg a8, a8
- ssr a8
- bgeui a8, 32, .Ldiv_flush_to_zero
-
- /* Shift a10 right. Any bits that are shifted out of a10 are
- saved in a6 for rounding the result. */
- sll a6, a10
- srl a10, a10
-
- /* Set the exponent to zero. */
- movi a8, 0
-
- /* Pack any nonzero remainder (in a2) into a6. */
- beqz a2, 1f
- movi a9, 1
- or a6, a6, a9
-
- /* Round a10 based on the bits shifted out into a6. */
-1: bgez a6, .Ldiv_rounded
- addi a10, a10, 1
- slli a6, a6, 1
- bnez a6, .Ldiv_rounded
- srli a10, a10, 1
- slli a10, a10, 1
- j .Ldiv_rounded
-
-.Ldiv_flush_to_zero:
- /* Return zero with the appropriate sign bit. */
- srli a2, a7, 31
- slli a2, a2, 31
- leaf_return
-
-#endif /* L_divsf3 */
-
-#ifdef L_cmpsf2
-
- /* Equal and Not Equal */
-
- .align 4
- .global __eqsf2
- .global __nesf2
- .set __nesf2, __eqsf2
- .type __eqsf2, @function
-__eqsf2:
- leaf_entry sp, 16
- bne a2, a3, 4f
-
- /* The values are equal but NaN != NaN. Check the exponent. */
- movi a6, 0x7f800000
- ball a2, a6, 3f
-
- /* Equal. */
- movi a2, 0
- leaf_return
-
- /* Not equal. */
-2: movi a2, 1
- leaf_return
-
- /* Check if the mantissas are nonzero. */
-3: slli a7, a2, 9
- j 5f
-
- /* Check if x and y are zero with different signs. */
-4: or a7, a2, a3
- slli a7, a7, 1
-
- /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
- or x when exponent(x) = 0x7f8 and x == y. */
-5: movi a2, 0
- movi a3, 1
- movnez a2, a3, a7
- leaf_return
-
-
- /* Greater Than */
-
- .align 4
- .global __gtsf2
- .type __gtsf2, @function
-__gtsf2:
- leaf_entry sp, 16
- movi a6, 0x7f800000
- ball a2, a6, 2f
-1: bnall a3, a6, .Lle_cmp
-
- /* Check if y is a NaN. */
- slli a7, a3, 9
- beqz a7, .Lle_cmp
- movi a2, 0
- leaf_return
-
- /* Check if x is a NaN. */
-2: slli a7, a2, 9
- beqz a7, 1b
- movi a2, 0
- leaf_return
-
-
- /* Less Than or Equal */
-
- .align 4
- .global __lesf2
- .type __lesf2, @function
-__lesf2:
- leaf_entry sp, 16
- movi a6, 0x7f800000
- ball a2, a6, 2f
-1: bnall a3, a6, .Lle_cmp
-
- /* Check if y is a NaN. */
- slli a7, a3, 9
- beqz a7, .Lle_cmp
- movi a2, 1
- leaf_return
-
- /* Check if x is a NaN. */
-2: slli a7, a2, 9
- beqz a7, 1b
- movi a2, 1
- leaf_return
-
-.Lle_cmp:
- /* Check if x and y have different signs. */
- xor a7, a2, a3
- bltz a7, .Lle_diff_signs
-
- /* Check if x is negative. */
- bltz a2, .Lle_xneg
-
- /* Check if x <= y. */
- bltu a3, a2, 5f
-4: movi a2, 0
- leaf_return
-
-.Lle_xneg:
- /* Check if y <= x. */
- bgeu a2, a3, 4b
-5: movi a2, 1
- leaf_return
-
-.Lle_diff_signs:
- bltz a2, 4b
-
- /* Check if both x and y are zero. */
- or a7, a2, a3
- slli a7, a7, 1
- movi a2, 1
- movi a3, 0
- moveqz a2, a3, a7
- leaf_return
-
-
- /* Greater Than or Equal */
-
- .align 4
- .global __gesf2
- .type __gesf2, @function
-__gesf2:
- leaf_entry sp, 16
- movi a6, 0x7f800000
- ball a2, a6, 2f
-1: bnall a3, a6, .Llt_cmp
-
- /* Check if y is a NaN. */
- slli a7, a3, 9
- beqz a7, .Llt_cmp
- movi a2, -1
- leaf_return
-
- /* Check if x is a NaN. */
-2: slli a7, a2, 9
- beqz a7, 1b
- movi a2, -1
- leaf_return
-
-
- /* Less Than */
-
- .align 4
- .global __ltsf2
- .type __ltsf2, @function
-__ltsf2:
- leaf_entry sp, 16
- movi a6, 0x7f800000
- ball a2, a6, 2f
-1: bnall a3, a6, .Llt_cmp
-
- /* Check if y is a NaN. */
- slli a7, a3, 9
- beqz a7, .Llt_cmp
- movi a2, 0
- leaf_return
-
- /* Check if x is a NaN. */
-2: slli a7, a2, 9
- beqz a7, 1b
- movi a2, 0
- leaf_return
-
-.Llt_cmp:
- /* Check if x and y have different signs. */
- xor a7, a2, a3
- bltz a7, .Llt_diff_signs
-
- /* Check if x is negative. */
- bltz a2, .Llt_xneg
-
- /* Check if x < y. */
- bgeu a2, a3, 5f
-4: movi a2, -1
- leaf_return
-
-.Llt_xneg:
- /* Check if y < x. */
- bltu a3, a2, 4b
-5: movi a2, 0
- leaf_return
-
-.Llt_diff_signs:
- bgez a2, 5b
-
- /* Check if both x and y are nonzero. */
- or a7, a2, a3
- slli a7, a7, 1
- movi a2, 0
- movi a3, -1
- movnez a2, a3, a7
- leaf_return
-
-
- /* Unordered */
-
- .align 4
- .global __unordsf2
- .type __unordsf2, @function
-__unordsf2:
- leaf_entry sp, 16
- movi a6, 0x7f800000
- ball a2, a6, 3f
-1: ball a3, a6, 4f
-2: movi a2, 0
- leaf_return
-
-3: slli a7, a2, 9
- beqz a7, 1b
- movi a2, 1
- leaf_return
-
-4: slli a7, a3, 9
- beqz a7, 2b
- movi a2, 1
- leaf_return
-
-#endif /* L_cmpsf2 */
-
-#ifdef L_fixsfsi
-
- .align 4
- .global __fixsfsi
- .type __fixsfsi, @function
-__fixsfsi:
- leaf_entry sp, 16
-
- /* Check for NaN and Infinity. */
- movi a6, 0x7f800000
- ball a2, a6, .Lfixsfsi_nan_or_inf
-
- /* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */
- extui a4, a2, 23, 8
- addi a4, a4, -0x7e
- bgei a4, 32, .Lfixsfsi_maxint
- blti a4, 1, .Lfixsfsi_zero
-
- /* Add explicit "1.0" and shift << 8. */
- or a7, a2, a6
- slli a5, a7, 8
-
- /* Shift back to the right, based on the exponent. */
- ssl a4 /* shift by 32 - a4 */
- srl a5, a5
-
- /* Negate the result if sign != 0. */
- neg a2, a5
- movgez a2, a5, a7
- leaf_return
-
-.Lfixsfsi_nan_or_inf:
- /* Handle Infinity and NaN. */
- slli a4, a2, 9
- beqz a4, .Lfixsfsi_maxint
-
- /* Translate NaN to +maxint. */
- movi a2, 0
-
-.Lfixsfsi_maxint:
- slli a4, a6, 8 /* 0x80000000 */
- addi a5, a4, -1 /* 0x7fffffff */
- movgez a4, a5, a2
- mov a2, a4
- leaf_return
-
-.Lfixsfsi_zero:
- movi a2, 0
- leaf_return
-
-#endif /* L_fixsfsi */
-
-#ifdef L_fixsfdi
-
- .align 4
- .global __fixsfdi
- .type __fixsfdi, @function
-__fixsfdi:
- leaf_entry sp, 16
-
- /* Check for NaN and Infinity. */
- movi a6, 0x7f800000
- ball a2, a6, .Lfixsfdi_nan_or_inf
-
- /* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */
- extui a4, a2, 23, 8
- addi a4, a4, -0x7e
- bgei a4, 64, .Lfixsfdi_maxint
- blti a4, 1, .Lfixsfdi_zero
-
- /* Add explicit "1.0" and shift << 8. */
- or a7, a2, a6
- slli xh, a7, 8
-
- /* Shift back to the right, based on the exponent. */
- ssl a4 /* shift by 64 - a4 */
- bgei a4, 32, .Lfixsfdi_smallshift
- srl xl, xh
- movi xh, 0
-
-.Lfixsfdi_shifted:
- /* Negate the result if sign != 0. */
- bgez a7, 1f
- neg xl, xl
- neg xh, xh
- beqz xl, 1f
- addi xh, xh, -1
-1: leaf_return
-
-.Lfixsfdi_smallshift:
- movi xl, 0
- sll xl, xh
- srl xh, xh
- j .Lfixsfdi_shifted
-
-.Lfixsfdi_nan_or_inf:
- /* Handle Infinity and NaN. */
- slli a4, a2, 9
- beqz a4, .Lfixsfdi_maxint
-
- /* Translate NaN to +maxint. */
- movi a2, 0
-
-.Lfixsfdi_maxint:
- slli a7, a6, 8 /* 0x80000000 */
- bgez a2, 1f
- mov xh, a7
- movi xl, 0
- leaf_return
-
-1: addi xh, a7, -1 /* 0x7fffffff */
- movi xl, -1
- leaf_return
-
-.Lfixsfdi_zero:
- movi xh, 0
- movi xl, 0
- leaf_return
-
-#endif /* L_fixsfdi */
-
-#ifdef L_fixunssfsi
-
- .align 4
- .global __fixunssfsi
- .type __fixunssfsi, @function
-__fixunssfsi:
- leaf_entry sp, 16
-
- /* Check for NaN and Infinity. */
- movi a6, 0x7f800000
- ball a2, a6, .Lfixunssfsi_nan_or_inf
-
- /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */
- extui a4, a2, 23, 8
- addi a4, a4, -0x7f
- bgei a4, 32, .Lfixunssfsi_maxint
- bltz a4, .Lfixunssfsi_zero
-
- /* Add explicit "1.0" and shift << 8. */
- or a7, a2, a6
- slli a5, a7, 8
-
- /* Shift back to the right, based on the exponent. */
- addi a4, a4, 1
- beqi a4, 32, .Lfixunssfsi_bigexp
- ssl a4 /* shift by 32 - a4 */
- srl a5, a5
-
- /* Negate the result if sign != 0. */
- neg a2, a5
- movgez a2, a5, a7
- leaf_return
-
-.Lfixunssfsi_nan_or_inf:
- /* Handle Infinity and NaN. */
- slli a4, a2, 9
- beqz a4, .Lfixunssfsi_maxint
-
- /* Translate NaN to 0xffffffff. */
- movi a2, -1
- leaf_return
-
-.Lfixunssfsi_maxint:
- slli a4, a6, 8 /* 0x80000000 */
- movi a5, -1 /* 0xffffffff */
- movgez a4, a5, a2
- mov a2, a4
- leaf_return
-
-.Lfixunssfsi_zero:
- movi a2, 0
- leaf_return
-
-.Lfixunssfsi_bigexp:
- /* Handle unsigned maximum exponent case. */
- bltz a2, 1f
- mov a2, a5 /* no shift needed */
- leaf_return
-
- /* Return 0x80000000 if negative. */
-1: slli a2, a6, 8
- leaf_return
-
-#endif /* L_fixunssfsi */
-
-#ifdef L_fixunssfdi
-
- .align 4
- .global __fixunssfdi
- .type __fixunssfdi, @function
-__fixunssfdi:
- leaf_entry sp, 16
-
- /* Check for NaN and Infinity. */
- movi a6, 0x7f800000
- ball a2, a6, .Lfixunssfdi_nan_or_inf
-
- /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */
- extui a4, a2, 23, 8
- addi a4, a4, -0x7f
- bgei a4, 64, .Lfixunssfdi_maxint
- bltz a4, .Lfixunssfdi_zero
-
- /* Add explicit "1.0" and shift << 8. */
- or a7, a2, a6
- slli xh, a7, 8
-
- /* Shift back to the right, based on the exponent. */
- addi a4, a4, 1
- beqi a4, 64, .Lfixunssfdi_bigexp
- ssl a4 /* shift by 64 - a4 */
- bgei a4, 32, .Lfixunssfdi_smallshift
- srl xl, xh
- movi xh, 0
-
-.Lfixunssfdi_shifted:
- /* Negate the result if sign != 0. */
- bgez a7, 1f
- neg xl, xl
- neg xh, xh
- beqz xl, 1f
- addi xh, xh, -1
-1: leaf_return
-
-.Lfixunssfdi_smallshift:
- movi xl, 0
- src xl, xh, xl
- srl xh, xh
- j .Lfixunssfdi_shifted
-
-.Lfixunssfdi_nan_or_inf:
- /* Handle Infinity and NaN. */
- slli a4, a2, 9
- beqz a4, .Lfixunssfdi_maxint
-
- /* Translate NaN to 0xffffffff.... */
-1: movi xh, -1
- movi xl, -1
- leaf_return
-
-.Lfixunssfdi_maxint:
- bgez a2, 1b
-2: slli xh, a6, 8 /* 0x80000000 */
- movi xl, 0
- leaf_return
-
-.Lfixunssfdi_zero:
- movi xh, 0
- movi xl, 0
- leaf_return
-
-.Lfixunssfdi_bigexp:
- /* Handle unsigned maximum exponent case. */
- bltz a7, 2b
- movi xl, 0
- leaf_return /* no shift needed */
-
-#endif /* L_fixunssfdi */
-
-#ifdef L_floatsisf
-
- .align 4
- .global __floatunsisf
- .type __floatunsisf, @function
-__floatunsisf:
- leaf_entry sp, 16
- beqz a2, .Lfloatsisf_return
-
- /* Set the sign to zero and jump to the floatsisf code. */
- movi a7, 0
- j .Lfloatsisf_normalize
-
- .align 4
- .global __floatsisf
- .type __floatsisf, @function
-__floatsisf:
- leaf_entry sp, 16
-
- /* Check for zero. */
- beqz a2, .Lfloatsisf_return
-
- /* Save the sign. */
- extui a7, a2, 31, 1
-
- /* Get the absolute value. */
-#if XCHAL_HAVE_ABS
- abs a2, a2
-#else
- neg a4, a2
- movltz a2, a4, a2
-#endif
-
-.Lfloatsisf_normalize:
- /* Normalize with the first 1 bit in the msb. */
- do_nsau a4, a2, a5, a6
- ssl a4
- sll a5, a2
-
- /* Shift the mantissa into position, with rounding bits in a6. */
- srli a2, a5, 8
- slli a6, a5, (32 - 8)
-
- /* Set the exponent. */
- movi a5, 0x9d /* 0x7e + 31 */
- sub a5, a5, a4
- slli a5, a5, 23
- add a2, a2, a5
-
- /* Add the sign. */
- slli a7, a7, 31
- or a2, a2, a7
-
- /* Round up if the leftover fraction is >= 1/2. */
- bgez a6, .Lfloatsisf_return
- addi a2, a2, 1 /* Overflow to the exponent is OK. */
-
- /* Check if the leftover fraction is exactly 1/2. */
- slli a6, a6, 1
- beqz a6, .Lfloatsisf_exactlyhalf
-
-.Lfloatsisf_return:
- leaf_return
-
-.Lfloatsisf_exactlyhalf:
- /* Round down to the nearest even value. */
- srli a2, a2, 1
- slli a2, a2, 1
- leaf_return
-
-#endif /* L_floatsisf */
-
-#ifdef L_floatdisf
-
- .align 4
- .global __floatundisf
- .type __floatundisf, @function
-__floatundisf:
- leaf_entry sp, 16
-
- /* Check for zero. */
- or a4, xh, xl
- beqz a4, 2f
-
- /* Set the sign to zero and jump to the floatdisf code. */
- movi a7, 0
- j .Lfloatdisf_normalize
-
- .align 4
- .global __floatdisf
- .type __floatdisf, @function
-__floatdisf:
- leaf_entry sp, 16
-
- /* Check for zero. */
- or a4, xh, xl
- beqz a4, 2f
-
- /* Save the sign. */
- extui a7, xh, 31, 1
-
- /* Get the absolute value. */
- bgez xh, .Lfloatdisf_normalize
- neg xl, xl
- neg xh, xh
- beqz xl, .Lfloatdisf_normalize
- addi xh, xh, -1
-
-.Lfloatdisf_normalize:
- /* Normalize with the first 1 bit in the msb of xh. */
- beqz xh, .Lfloatdisf_bigshift
- do_nsau a4, xh, a5, a6
- ssl a4
- src xh, xh, xl
- sll xl, xl
-
-.Lfloatdisf_shifted:
- /* Shift the mantissa into position, with rounding bits in a6. */
- ssai 8
- sll a5, xl
- src a6, xh, xl
- srl xh, xh
- beqz a5, 1f
- movi a5, 1
- or a6, a6, a5
-1:
- /* Set the exponent. */
- movi a5, 0xbd /* 0x7e + 63 */
- sub a5, a5, a4
- slli a5, a5, 23
- add a2, xh, a5
-
- /* Add the sign. */
- slli a7, a7, 31
- or a2, a2, a7
-
- /* Round up if the leftover fraction is >= 1/2. */
- bgez a6, 2f
- addi a2, a2, 1 /* Overflow to the exponent is OK. */
-
- /* Check if the leftover fraction is exactly 1/2. */
- slli a6, a6, 1
- beqz a6, .Lfloatdisf_exactlyhalf
-2: leaf_return
-
-.Lfloatdisf_bigshift:
- /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
- do_nsau a4, xl, a5, a6
- ssl a4
- sll xh, xl
- movi xl, 0
- addi a4, a4, 32
- j .Lfloatdisf_shifted
-
-.Lfloatdisf_exactlyhalf:
- /* Round down to the nearest even value. */
- srli a2, a2, 1
- slli a2, a2, 1
- leaf_return
-
-#endif /* L_floatdisf */
diff --git a/gcc/config/xtensa/lib1funcs.asm b/gcc/config/xtensa/lib1funcs.asm
deleted file mode 100644
index 071b9171177..00000000000
--- a/gcc/config/xtensa/lib1funcs.asm
+++ /dev/null
@@ -1,845 +0,0 @@
-/* Assembly functions for the Xtensa version of libgcc1.
- Copyright (C) 2001, 2002, 2003, 2005, 2006, 2007, 2009
- Free Software Foundation, Inc.
- Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 3, or (at your option) any later
-version.
-
-GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
-<http://www.gnu.org/licenses/>. */
-
-#include "xtensa-config.h"
-
-/* Define macros for the ABS and ADDX* instructions to handle cases
- where they are not included in the Xtensa processor configuration. */
-
- .macro do_abs dst, src, tmp
-#if XCHAL_HAVE_ABS
- abs \dst, \src
-#else
- neg \tmp, \src
- movgez \tmp, \src, \src
- mov \dst, \tmp
-#endif
- .endm
-
- .macro do_addx2 dst, as, at, tmp
-#if XCHAL_HAVE_ADDX
- addx2 \dst, \as, \at
-#else
- slli \tmp, \as, 1
- add \dst, \tmp, \at
-#endif
- .endm
-
- .macro do_addx4 dst, as, at, tmp
-#if XCHAL_HAVE_ADDX
- addx4 \dst, \as, \at
-#else
- slli \tmp, \as, 2
- add \dst, \tmp, \at
-#endif
- .endm
-
- .macro do_addx8 dst, as, at, tmp
-#if XCHAL_HAVE_ADDX
- addx8 \dst, \as, \at
-#else
- slli \tmp, \as, 3
- add \dst, \tmp, \at
-#endif
- .endm
-
-/* Define macros for leaf function entry and return, supporting either the
- standard register windowed ABI or the non-windowed call0 ABI. These
- macros do not allocate any extra stack space, so they only work for
- leaf functions that do not need to spill anything to the stack. */
-
- .macro leaf_entry reg, size
-#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
- entry \reg, \size
-#else
- /* do nothing */
-#endif
- .endm
-
- .macro leaf_return
-#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__
- retw
-#else
- ret
-#endif
- .endm
-
-
-#ifdef L_mulsi3
- .align 4
- .global __mulsi3
- .type __mulsi3, @function
-__mulsi3:
- leaf_entry sp, 16
-
-#if XCHAL_HAVE_MUL32
- mull a2, a2, a3
-
-#elif XCHAL_HAVE_MUL16
- or a4, a2, a3
- srai a4, a4, 16
- bnez a4, .LMUL16
- mul16u a2, a2, a3
- leaf_return
-.LMUL16:
- srai a4, a2, 16
- srai a5, a3, 16
- mul16u a7, a4, a3
- mul16u a6, a5, a2
- mul16u a4, a2, a3
- add a7, a7, a6
- slli a7, a7, 16
- add a2, a7, a4
-
-#elif XCHAL_HAVE_MAC16
- mul.aa.hl a2, a3
- mula.aa.lh a2, a3
- rsr a5, ACCLO
- umul.aa.ll a2, a3
- rsr a4, ACCLO
- slli a5, a5, 16
- add a2, a4, a5
-
-#else /* !MUL32 && !MUL16 && !MAC16 */
-
- /* Multiply one bit at a time, but unroll the loop 4x to better
- exploit the addx instructions and avoid overhead.
- Peel the first iteration to save a cycle on init. */
-
- /* Avoid negative numbers. */
- xor a5, a2, a3 /* Top bit is 1 if one input is negative. */
- do_abs a3, a3, a6
- do_abs a2, a2, a6
-
- /* Swap so the second argument is smaller. */
- sub a7, a2, a3
- mov a4, a3
- movgez a4, a2, a7 /* a4 = max (a2, a3) */
- movltz a3, a2, a7 /* a3 = min (a2, a3) */
-
- movi a2, 0
- extui a6, a3, 0, 1
- movnez a2, a4, a6
-
- do_addx2 a7, a4, a2, a7
- extui a6, a3, 1, 1
- movnez a2, a7, a6
-
- do_addx4 a7, a4, a2, a7
- extui a6, a3, 2, 1
- movnez a2, a7, a6
-
- do_addx8 a7, a4, a2, a7
- extui a6, a3, 3, 1
- movnez a2, a7, a6
-
- bgeui a3, 16, .Lmult_main_loop
- neg a3, a2
- movltz a2, a3, a5
- leaf_return
-
- .align 4
-.Lmult_main_loop:
- srli a3, a3, 4
- slli a4, a4, 4
-
- add a7, a4, a2
- extui a6, a3, 0, 1
- movnez a2, a7, a6
-
- do_addx2 a7, a4, a2, a7
- extui a6, a3, 1, 1
- movnez a2, a7, a6
-
- do_addx4 a7, a4, a2, a7
- extui a6, a3, 2, 1
- movnez a2, a7, a6
-
- do_addx8 a7, a4, a2, a7
- extui a6, a3, 3, 1
- movnez a2, a7, a6
-
- bgeui a3, 16, .Lmult_main_loop
-
- neg a3, a2
- movltz a2, a3, a5
-
-#endif /* !MUL32 && !MUL16 && !MAC16 */
-
- leaf_return
- .size __mulsi3, . - __mulsi3
-
-#endif /* L_mulsi3 */
-
-
-#ifdef L_umulsidi3
-
-#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
-#define XCHAL_NO_MUL 1
-#endif
-
- .align 4
- .global __umulsidi3
- .type __umulsidi3, @function
-__umulsidi3:
-#if __XTENSA_CALL0_ABI__
- leaf_entry sp, 32
- addi sp, sp, -32
- s32i a12, sp, 16
- s32i a13, sp, 20
- s32i a14, sp, 24
- s32i a15, sp, 28
-#elif XCHAL_NO_MUL
- /* This is not really a leaf function; allocate enough stack space
- to allow CALL12s to a helper function. */
- leaf_entry sp, 48
-#else
- leaf_entry sp, 16
-#endif
-
-#ifdef __XTENSA_EB__
-#define wh a2
-#define wl a3
-#else
-#define wh a3
-#define wl a2
-#endif /* __XTENSA_EB__ */
-
- /* This code is taken from the mulsf3 routine in ieee754-sf.S.
- See more comments there. */
-
-#if XCHAL_HAVE_MUL32_HIGH
- mull a6, a2, a3
- muluh wh, a2, a3
- mov wl, a6
-
-#else /* ! MUL32_HIGH */
-
-#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
- /* a0 and a8 will be clobbered by calling the multiply function
- but a8 is not used here and need not be saved. */
- s32i a0, sp, 0
-#endif
-
-#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
-
-#define a2h a4
-#define a3h a5
-
- /* Get the high halves of the inputs into registers. */
- srli a2h, a2, 16
- srli a3h, a3, 16
-
-#define a2l a2
-#define a3l a3
-
-#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
- /* Clear the high halves of the inputs. This does not matter
- for MUL16 because the high bits are ignored. */
- extui a2, a2, 0, 16
- extui a3, a3, 0, 16
-#endif
-#endif /* MUL16 || MUL32 */
-
-
-#if XCHAL_HAVE_MUL16
-
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
- mul16u dst, xreg ## xhalf, yreg ## yhalf
-
-#elif XCHAL_HAVE_MUL32
-
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
- mull dst, xreg ## xhalf, yreg ## yhalf
-
-#elif XCHAL_HAVE_MAC16
-
-/* The preprocessor insists on inserting a space when concatenating after
- a period in the definition of do_mul below. These macros are a workaround
- using underscores instead of periods when doing the concatenation. */
-#define umul_aa_ll umul.aa.ll
-#define umul_aa_lh umul.aa.lh
-#define umul_aa_hl umul.aa.hl
-#define umul_aa_hh umul.aa.hh
-
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
- umul_aa_ ## xhalf ## yhalf xreg, yreg; \
- rsr dst, ACCLO
-
-#else /* no multiply hardware */
-
-#define set_arg_l(dst, src) \
- extui dst, src, 0, 16
-#define set_arg_h(dst, src) \
- srli dst, src, 16
-
-#if __XTENSA_CALL0_ABI__
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
- set_arg_ ## xhalf (a13, xreg); \
- set_arg_ ## yhalf (a14, yreg); \
- call0 .Lmul_mulsi3; \
- mov dst, a12
-#else
-#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
- set_arg_ ## xhalf (a14, xreg); \
- set_arg_ ## yhalf (a15, yreg); \
- call12 .Lmul_mulsi3; \
- mov dst, a14
-#endif /* __XTENSA_CALL0_ABI__ */
-
-#endif /* no multiply hardware */
-
- /* Add pp1 and pp2 into a6 with carry-out in a9. */
- do_mul(a6, a2, l, a3, h) /* pp 1 */
- do_mul(a11, a2, h, a3, l) /* pp 2 */
- movi a9, 0
- add a6, a6, a11
- bgeu a6, a11, 1f
- addi a9, a9, 1
-1:
- /* Shift the high half of a9/a6 into position in a9. Note that
- this value can be safely incremented without any carry-outs. */
- ssai 16
- src a9, a9, a6
-
- /* Compute the low word into a6. */
- do_mul(a11, a2, l, a3, l) /* pp 0 */
- sll a6, a6
- add a6, a6, a11
- bgeu a6, a11, 1f
- addi a9, a9, 1
-1:
- /* Compute the high word into wh. */
- do_mul(wh, a2, h, a3, h) /* pp 3 */
- add wh, wh, a9
- mov wl, a6
-
-#endif /* !MUL32_HIGH */
-
-#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
- /* Restore the original return address. */
- l32i a0, sp, 0
-#endif
-#if __XTENSA_CALL0_ABI__
- l32i a12, sp, 16
- l32i a13, sp, 20
- l32i a14, sp, 24
- l32i a15, sp, 28
- addi sp, sp, 32
-#endif
- leaf_return
-
-#if XCHAL_NO_MUL
-
- /* For Xtensa processors with no multiply hardware, this simplified
- version of _mulsi3 is used for multiplying 16-bit chunks of
- the floating-point mantissas. When using CALL0, this function
- uses a custom ABI: the inputs are passed in a13 and a14, the
- result is returned in a12, and a8 and a15 are clobbered. */
- .align 4
-.Lmul_mulsi3:
- leaf_entry sp, 16
- .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
- movi \dst, 0
-1: add \tmp1, \src2, \dst
- extui \tmp2, \src1, 0, 1
- movnez \dst, \tmp1, \tmp2
-
- do_addx2 \tmp1, \src2, \dst, \tmp1
- extui \tmp2, \src1, 1, 1
- movnez \dst, \tmp1, \tmp2
-
- do_addx4 \tmp1, \src2, \dst, \tmp1
- extui \tmp2, \src1, 2, 1
- movnez \dst, \tmp1, \tmp2
-
- do_addx8 \tmp1, \src2, \dst, \tmp1
- extui \tmp2, \src1, 3, 1
- movnez \dst, \tmp1, \tmp2
-
- srli \src1, \src1, 4
- slli \src2, \src2, 4
- bnez \src1, 1b
- .endm
-#if __XTENSA_CALL0_ABI__
- mul_mulsi3_body a12, a13, a14, a15, a8
-#else
- /* The result will be written into a2, so save that argument in a4. */
- mov a4, a2
- mul_mulsi3_body a2, a4, a3, a5, a6
-#endif
- leaf_return
-#endif /* XCHAL_NO_MUL */
-
- .size __umulsidi3, . - __umulsidi3
-
-#endif /* L_umulsidi3 */
-
-
-/* Define a macro for the NSAU (unsigned normalize shift amount)
- instruction, which computes the number of leading zero bits,
- to handle cases where it is not included in the Xtensa processor
- configuration. */
-
- .macro do_nsau cnt, val, tmp, a
-#if XCHAL_HAVE_NSA
- nsau \cnt, \val
-#else
- mov \a, \val
- movi \cnt, 0
- extui \tmp, \a, 16, 16
- bnez \tmp, 0f
- movi \cnt, 16
- slli \a, \a, 16
-0:
- extui \tmp, \a, 24, 8
- bnez \tmp, 1f
- addi \cnt, \cnt, 8
- slli \a, \a, 8
-1:
- movi \tmp, __nsau_data
- extui \a, \a, 24, 8
- add \tmp, \tmp, \a
- l8ui \tmp, \tmp, 0
- add \cnt, \cnt, \tmp
-#endif /* !XCHAL_HAVE_NSA */
- .endm
-
-#ifdef L_clz
- .section .rodata
- .align 4
- .global __nsau_data
- .type __nsau_data, @object
-__nsau_data:
-#if !XCHAL_HAVE_NSA
- .byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
- .byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
- .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
- .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
- .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
- .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
- .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
- .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
- .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
- .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
- .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
- .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
- .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
- .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
- .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
- .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-#endif /* !XCHAL_HAVE_NSA */
- .size __nsau_data, . - __nsau_data
- .hidden __nsau_data
-#endif /* L_clz */
-
-
-#ifdef L_clzsi2
- .align 4
- .global __clzsi2
- .type __clzsi2, @function
-__clzsi2:
- leaf_entry sp, 16
- do_nsau a2, a2, a3, a4
- leaf_return
- .size __clzsi2, . - __clzsi2
-
-#endif /* L_clzsi2 */
-
-
-#ifdef L_ctzsi2
- .align 4
- .global __ctzsi2
- .type __ctzsi2, @function
-__ctzsi2:
- leaf_entry sp, 16
- neg a3, a2
- and a3, a3, a2
- do_nsau a2, a3, a4, a5
- neg a2, a2
- addi a2, a2, 31
- leaf_return
- .size __ctzsi2, . - __ctzsi2
-
-#endif /* L_ctzsi2 */
-
-
-#ifdef L_ffssi2
- .align 4
- .global __ffssi2
- .type __ffssi2, @function
-__ffssi2:
- leaf_entry sp, 16
- neg a3, a2
- and a3, a3, a2
- do_nsau a2, a3, a4, a5
- neg a2, a2
- addi a2, a2, 32
- leaf_return
- .size __ffssi2, . - __ffssi2
-
-#endif /* L_ffssi2 */
-
-
-#ifdef L_udivsi3
- .align 4
- .global __udivsi3
- .type __udivsi3, @function
-__udivsi3:
- leaf_entry sp, 16
-#if XCHAL_HAVE_DIV32
- quou a2, a2, a3
-#else
- bltui a3, 2, .Lle_one /* check if the divisor <= 1 */
-
- mov a6, a2 /* keep dividend in a6 */
- do_nsau a5, a6, a2, a7 /* dividend_shift = nsau (dividend) */
- do_nsau a4, a3, a2, a7 /* divisor_shift = nsau (divisor) */
- bgeu a5, a4, .Lspecial
-
- sub a4, a4, a5 /* count = divisor_shift - dividend_shift */
- ssl a4
- sll a3, a3 /* divisor <<= count */
- movi a2, 0 /* quotient = 0 */
-
- /* test-subtract-and-shift loop; one quotient bit on each iteration */
-#if XCHAL_HAVE_LOOPS
- loopnez a4, .Lloopend
-#endif /* XCHAL_HAVE_LOOPS */
-.Lloop:
- bltu a6, a3, .Lzerobit
- sub a6, a6, a3
- addi a2, a2, 1
-.Lzerobit:
- slli a2, a2, 1
- srli a3, a3, 1
-#if !XCHAL_HAVE_LOOPS
- addi a4, a4, -1
- bnez a4, .Lloop
-#endif /* !XCHAL_HAVE_LOOPS */
-.Lloopend:
-
- bltu a6, a3, .Lreturn
- addi a2, a2, 1 /* increment quotient if dividend >= divisor */
-.Lreturn:
- leaf_return
-
-.Lle_one:
- beqz a3, .Lerror /* if divisor == 1, return the dividend */
- leaf_return
-
-.Lspecial:
- /* return dividend >= divisor */
- bltu a6, a3, .Lreturn0
- movi a2, 1
- leaf_return
-
-.Lerror:
- /* Divide by zero: Use an illegal instruction to force an exception.
- The subsequent "DIV0" string can be recognized by the exception
- handler to identify the real cause of the exception. */
- ill
- .ascii "DIV0"
-
-.Lreturn0:
- movi a2, 0
-#endif /* XCHAL_HAVE_DIV32 */
- leaf_return
- .size __udivsi3, . - __udivsi3
-
-#endif /* L_udivsi3 */
-
-
-#ifdef L_divsi3
- .align 4
- .global __divsi3
- .type __divsi3, @function
-__divsi3:
- leaf_entry sp, 16
-#if XCHAL_HAVE_DIV32
- quos a2, a2, a3
-#else
- xor a7, a2, a3 /* sign = dividend ^ divisor */
- do_abs a6, a2, a4 /* udividend = abs (dividend) */
- do_abs a3, a3, a4 /* udivisor = abs (divisor) */
- bltui a3, 2, .Lle_one /* check if udivisor <= 1 */
- do_nsau a5, a6, a2, a8 /* udividend_shift = nsau (udividend) */
- do_nsau a4, a3, a2, a8 /* udivisor_shift = nsau (udivisor) */
- bgeu a5, a4, .Lspecial
-
- sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */
- ssl a4
- sll a3, a3 /* udivisor <<= count */
- movi a2, 0 /* quotient = 0 */
-
- /* test-subtract-and-shift loop; one quotient bit on each iteration */
-#if XCHAL_HAVE_LOOPS
- loopnez a4, .Lloopend
-#endif /* XCHAL_HAVE_LOOPS */
-.Lloop:
- bltu a6, a3, .Lzerobit
- sub a6, a6, a3
- addi a2, a2, 1
-.Lzerobit:
- slli a2, a2, 1
- srli a3, a3, 1
-#if !XCHAL_HAVE_LOOPS
- addi a4, a4, -1
- bnez a4, .Lloop
-#endif /* !XCHAL_HAVE_LOOPS */
-.Lloopend:
-
- bltu a6, a3, .Lreturn
- addi a2, a2, 1 /* increment if udividend >= udivisor */
-.Lreturn:
- neg a5, a2
- movltz a2, a5, a7 /* return (sign < 0) ? -quotient : quotient */
- leaf_return
-
-.Lle_one:
- beqz a3, .Lerror
- neg a2, a6 /* if udivisor == 1, then return... */
- movgez a2, a6, a7 /* (sign < 0) ? -udividend : udividend */
- leaf_return
-
-.Lspecial:
- bltu a6, a3, .Lreturn0 /* if dividend < divisor, return 0 */
- movi a2, 1
- movi a4, -1
- movltz a2, a4, a7 /* else return (sign < 0) ? -1 : 1 */
- leaf_return
-
-.Lerror:
- /* Divide by zero: Use an illegal instruction to force an exception.
- The subsequent "DIV0" string can be recognized by the exception
- handler to identify the real cause of the exception. */
- ill
- .ascii "DIV0"
-
-.Lreturn0:
- movi a2, 0
-#endif /* XCHAL_HAVE_DIV32 */
- leaf_return
- .size __divsi3, . - __divsi3
-
-#endif /* L_divsi3 */
-
-
-#ifdef L_umodsi3
- .align 4
- .global __umodsi3
- .type __umodsi3, @function
-__umodsi3:
- leaf_entry sp, 16
-#if XCHAL_HAVE_DIV32
- remu a2, a2, a3
-#else
- bltui a3, 2, .Lle_one /* check if the divisor is <= 1 */
-
- do_nsau a5, a2, a6, a7 /* dividend_shift = nsau (dividend) */
- do_nsau a4, a3, a6, a7 /* divisor_shift = nsau (divisor) */
- bgeu a5, a4, .Lspecial
-
- sub a4, a4, a5 /* count = divisor_shift - dividend_shift */
- ssl a4
- sll a3, a3 /* divisor <<= count */
-
- /* test-subtract-and-shift loop */
-#if XCHAL_HAVE_LOOPS
- loopnez a4, .Lloopend
-#endif /* XCHAL_HAVE_LOOPS */
-.Lloop:
- bltu a2, a3, .Lzerobit
- sub a2, a2, a3
-.Lzerobit:
- srli a3, a3, 1
-#if !XCHAL_HAVE_LOOPS
- addi a4, a4, -1
- bnez a4, .Lloop
-#endif /* !XCHAL_HAVE_LOOPS */
-.Lloopend:
-
-.Lspecial:
- bltu a2, a3, .Lreturn
- sub a2, a2, a3 /* subtract once more if dividend >= divisor */
-.Lreturn:
- leaf_return
-
-.Lle_one:
- bnez a3, .Lreturn0
-
- /* Divide by zero: Use an illegal instruction to force an exception.
- The subsequent "DIV0" string can be recognized by the exception
- handler to identify the real cause of the exception. */
- ill
- .ascii "DIV0"
-
-.Lreturn0:
- movi a2, 0
-#endif /* XCHAL_HAVE_DIV32 */
- leaf_return
- .size __umodsi3, . - __umodsi3
-
-#endif /* L_umodsi3 */
-
-
-#ifdef L_modsi3
- .align 4
- .global __modsi3
- .type __modsi3, @function
-__modsi3:
- leaf_entry sp, 16
-#if XCHAL_HAVE_DIV32
- rems a2, a2, a3
-#else
- mov a7, a2 /* save original (signed) dividend */
- do_abs a2, a2, a4 /* udividend = abs (dividend) */
- do_abs a3, a3, a4 /* udivisor = abs (divisor) */
- bltui a3, 2, .Lle_one /* check if udivisor <= 1 */
- do_nsau a5, a2, a6, a8 /* udividend_shift = nsau (udividend) */
- do_nsau a4, a3, a6, a8 /* udivisor_shift = nsau (udivisor) */
- bgeu a5, a4, .Lspecial
-
- sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */
- ssl a4
- sll a3, a3 /* udivisor <<= count */
-
- /* test-subtract-and-shift loop */
-#if XCHAL_HAVE_LOOPS
- loopnez a4, .Lloopend
-#endif /* XCHAL_HAVE_LOOPS */
-.Lloop:
- bltu a2, a3, .Lzerobit
- sub a2, a2, a3
-.Lzerobit:
- srli a3, a3, 1
-#if !XCHAL_HAVE_LOOPS
- addi a4, a4, -1
- bnez a4, .Lloop
-#endif /* !XCHAL_HAVE_LOOPS */
-.Lloopend:
-
-.Lspecial:
- bltu a2, a3, .Lreturn
- sub a2, a2, a3 /* subtract again if udividend >= udivisor */
-.Lreturn:
- bgez a7, .Lpositive
- neg a2, a2 /* if (dividend < 0), return -udividend */
-.Lpositive:
- leaf_return
-
-.Lle_one:
- bnez a3, .Lreturn0
-
- /* Divide by zero: Use an illegal instruction to force an exception.
- The subsequent "DIV0" string can be recognized by the exception
- handler to identify the real cause of the exception. */
- ill
- .ascii "DIV0"
-
-.Lreturn0:
- movi a2, 0
-#endif /* XCHAL_HAVE_DIV32 */
- leaf_return
- .size __modsi3, . - __modsi3
-
-#endif /* L_modsi3 */
-
-
-#ifdef __XTENSA_EB__
-#define uh a2
-#define ul a3
-#else
-#define uh a3
-#define ul a2
-#endif /* __XTENSA_EB__ */
-
-
-#ifdef L_ashldi3
- .align 4
- .global __ashldi3
- .type __ashldi3, @function
-__ashldi3:
- leaf_entry sp, 16
- ssl a4
- bgei a4, 32, .Llow_only
- src uh, uh, ul
- sll ul, ul
- leaf_return
-
-.Llow_only:
- sll uh, ul
- movi ul, 0
- leaf_return
- .size __ashldi3, . - __ashldi3
-
-#endif /* L_ashldi3 */
-
-
-#ifdef L_ashrdi3
- .align 4
- .global __ashrdi3
- .type __ashrdi3, @function
-__ashrdi3:
- leaf_entry sp, 16
- ssr a4
- bgei a4, 32, .Lhigh_only
- src ul, uh, ul
- sra uh, uh
- leaf_return
-
-.Lhigh_only:
- sra ul, uh
- srai uh, uh, 31
- leaf_return
- .size __ashrdi3, . - __ashrdi3
-
-#endif /* L_ashrdi3 */
-
-
-#ifdef L_lshrdi3
- .align 4
- .global __lshrdi3
- .type __lshrdi3, @function
-__lshrdi3:
- leaf_entry sp, 16
- ssr a4
- bgei a4, 32, .Lhigh_only1
- src ul, uh, ul
- srl uh, uh
- leaf_return
-
-.Lhigh_only1:
- srl ul, uh
- movi uh, 0
- leaf_return
- .size __lshrdi3, . - __lshrdi3
-
-#endif /* L_lshrdi3 */
-
-
-#include "ieee754-df.S"
-#include "ieee754-sf.S"
diff --git a/gcc/config/xtensa/t-xtensa b/gcc/config/xtensa/t-xtensa
index c0a7cb5202f..31ac2ad2452 100644
--- a/gcc/config/xtensa/t-xtensa
+++ b/gcc/config/xtensa/t-xtensa
@@ -17,18 +17,6 @@
# along with GCC; see the file COPYING3. If not see
# <http://www.gnu.org/licenses/>.
-LIB1ASMSRC = xtensa/lib1funcs.asm
-LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 _udivsi3 _umodsi3 \
- _umulsidi3 _clz _clzsi2 _ctzsi2 _ffssi2 \
- _ashldi3 _ashrdi3 _lshrdi3 \
- _negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \
- _fixunssfsi _fixunssfdi _floatsisf _floatunsisf \
- _floatdisf _floatundisf \
- _negdf2 _addsubdf3 _muldf3 _divdf3 _cmpdf2 _fixdfsi _fixdfdi \
- _fixunsdfsi _fixunsdfdi _floatsidf _floatunsidf \
- _floatdidf _floatundidf \
- _truncdfsf2 _extendsfdf2
-
LIB2FUNCS_EXTRA = $(srcdir)/config/xtensa/lib2funcs.S
$(out_object_file): gt-xtensa.h