diff options
Diffstat (limited to 'REORG.TODO/sysdeps/sparc/sparc64')
184 files changed, 14413 insertions, 0 deletions
diff --git a/REORG.TODO/sysdeps/sparc/sparc64/Implies b/REORG.TODO/sysdeps/sparc/sparc64/Implies new file mode 100644 index 0000000000..7abc50efcc --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/Implies @@ -0,0 +1,7 @@ +wordsize-64 +# SPARC uses IEEE 754 floating point. +ieee754/ldbl-128 +ieee754/dbl-64/wordsize-64 +ieee754/dbl-64 +ieee754/flt-32 +sparc/sparc64/soft-fp diff --git a/REORG.TODO/sysdeps/sparc/sparc64/Makefile b/REORG.TODO/sysdeps/sparc/sparc64/Makefile new file mode 100644 index 0000000000..a5e403630e --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/Makefile @@ -0,0 +1,21 @@ +sysdep-CFLAGS += -Wa,-Av9a -mvis + +ifeq ($(subdir),string) +sysdep_routines += align-cpy +endif + +ifeq ($(have-as-vis3),yes) +ASFLAGS-.o += -Wa,-Av9d +ASFLAGS-.os += -Wa,-Av9d +ASFLAGS-.op += -Wa,-Av9d +ASFLAGS-.oS += -Wa,-Av9d +endif + +# nscd uses atomic_spin_nop which in turn requires cpu_relax +ifeq ($(subdir),nscd) +routines += cpu_relax +endif + +ifeq ($(subdir),nptl) +libpthread-routines += cpu_relax +endif diff --git a/REORG.TODO/sysdeps/sparc/sparc64/Versions b/REORG.TODO/sysdeps/sparc/sparc64/Versions new file mode 100644 index 0000000000..4cef7bcc71 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/Versions @@ -0,0 +1,14 @@ +libc { + GLIBC_2.1.1 { + # SPARC v9 SYSV ABI helper functions + __align_cpy_1; __align_cpy_2; __align_cpy_4; + __align_cpy_8; __align_cpy_16; + } +} +libm { + GLIBC_2.1 { + # A generic bug got this omitted from other configurations' version + # sets, but we always had it. + exp2l; + } +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/add_n.S b/REORG.TODO/sysdeps/sparc/sparc64/add_n.S new file mode 100644 index 0000000000..e7f7c46c42 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/add_n.S @@ -0,0 +1,57 @@ +/* SPARC v9 __mpn_add_n -- Add two limb vectors of the same length > 0 and + store sum in a third limb vector. + + Copyright (C) 1995-2017 Free Software Foundation, Inc. + + This file is part of the GNU MP Library. + + The GNU MP Library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or (at your + option) any later version. + + The GNU MP Library is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with the GNU MP Library; see the file COPYING.LIB. If not, + see <http://www.gnu.org/licenses/>. */ + + +#include <sysdep.h> + + +/* INPUT PARAMETERS + res_ptr %o0 + s1_ptr %o1 + s2_ptr %o2 + size %o3 */ + + +ENTRY(__mpn_add_n) + + sub %g0,%o3,%g5 + sllx %o3,3,%g1 + add %o1,%g1,%o1 ! make s1_ptr point at end + add %o2,%g1,%o2 ! make s2_ptr point at end + add %o0,%g1,%o0 ! make res_ptr point at end + mov 0,%o4 ! clear carry variable + sllx %g5,3,%o5 ! compute initial address index + +1: ldx [%o2+%o5],%g1 ! load s2 limb + add %g5,1,%g5 ! increment loop count + ldx [%o1+%o5],%o3 ! load s1 limb + addcc %g1,%o4,%g1 ! add s2 limb and carry variable + movcc %xcc,0,%o4 ! if carry-out, o4 was 1; clear it + addcc %g1,%o3,%g1 ! add s1 limb to sum + stx %g1,[%o0+%o5] ! store result + add %o5,8,%o5 ! increment address index + brnz,pt %g5,1b + movcs %xcc,1,%o4 ! if s1 add gave carry, record it + + retl + mov %o4,%o0 + +END(__mpn_add_n) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/addmul_1.S b/REORG.TODO/sysdeps/sparc/sparc64/addmul_1.S new file mode 100644 index 0000000000..96450cc4c8 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/addmul_1.S @@ -0,0 +1,83 @@ +/* SPARC v9 __mpn_addmul_1 -- Multiply a limb vector with a single limb and + add the product to a second limb vector. + + Copyright (C) 1996-2017 Free Software Foundation, Inc. + + This file is part of the GNU MP Library. + + The GNU MP Library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or (at your + option) any later version. + + The GNU MP Library is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with the GNU MP Library; see the file COPYING.LIB. If not, + see <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + +/* INPUT PARAMETERS + res_ptr o0 + s1_ptr o1 + size o2 + s2_limb o3 */ + + +ENTRY(__mpn_addmul_1) + save %sp,-192,%sp + + sub %g0,%i2,%o7 + mov 0,%o0 ! zero cy_limb + sllx %o7,3,%o7 + sethi %hi(0x80000000),%o2 + srl %i3,0,%o1 ! extract low 32 bits of s2_limb + sub %i1,%o7,%o3 + srlx %i3,32,%i3 ! extract high 32 bits of s2_limb + sub %i0,%o7,%o4 + add %o2,%o2,%o2 ! o2 = 0x100000000 + + ! hi ! + ! mid-1 ! + ! mid-2 ! + ! lo ! +1: + ldx [%o3+%o7],%g5 + srl %g5,0,%i0 ! zero hi bits + ldx [%o4+%o7],%l1 + srlx %g5,32,%g5 + mulx %o1,%i0,%i4 ! lo product + mulx %i3,%i0,%i1 ! mid-1 product + mulx %o1,%g5,%l2 ! mid-2 product + mulx %i3,%g5,%i5 ! hi product + srlx %i4,32,%i0 ! extract high 32 bits of lo product... + add %i1,%i0,%i1 ! ...and add it to the mid-1 product + addcc %i1,%l2,%i1 ! add mid products + mov 0,%l0 ! we need the carry from that add... + movcs %xcc,%o2,%l0 ! ...compute it and... + sllx %i1,32,%i0 ! align low bits of mid product + add %i5,%l0,%i5 ! ...add to bit 32 of the hi product + srl %i4,0,%g5 ! zero high 32 bits of lo product + add %i0,%g5,%i0 ! combine into low 64 bits of result + srlx %i1,32,%i1 ! extract high bits of mid product... + addcc %i0,%o0,%i0 ! add cy_limb to low 64 bits of result + add %i5,%i1,%i1 ! ...and add them to the high result + mov 0,%g5 + movcs %xcc,1,%g5 + addcc %l1,%i0,%i0 + stx %i0,[%o4+%o7] + add %g5,1,%l1 + movcs %xcc,%l1,%g5 + addcc %o7,8,%o7 + bne,pt %xcc,1b + add %i1,%g5,%o0 ! compute new cy_limb + + jmpl %i7+8, %g0 + restore %o0,%g0,%o0 + +END(__mpn_addmul_1) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/align-cpy.S b/REORG.TODO/sysdeps/sparc/sparc64/align-cpy.S new file mode 100644 index 0000000000..e36799a405 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/align-cpy.S @@ -0,0 +1,84 @@ +/* Aligned copy routines specified by Sparc V9 ABI. + For 64-bit sparc. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller (davem@davemloft.net) + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .text + .align 8 +ENTRY(__align_cpy_8) +10: cmp %o0, %o1 + be,pn %xcc, 9f + mov %o0, %o3 + subcc %o2, 0x08, %o2 + be,pn %xcc, 8f +1: ldx [%o1 + 0x00], %o5 + ldx [%o1 + 0x08], %o4 + subcc %o2, 0x10, %o2 + add %o1, 0x10, %o1 + stx %o5, [%o3 + 0x00] + stx %o4, [%o3 + 0x08] + bg,pt %xcc, 1b + add %o3, 0x10, %o3 + bne,pn %xcc, 9f + nop + ldx [%o1 + 0x00], %o5 +8: stx %o5, [%o3 + 0x00] +9: retl + nop +END(__align_cpy_8) + + .align 8 +ENTRY(__align_cpy_4) +20: cmp %o0, %o1 + be,pn %xcc, 9f + mov %o0, %o3 + subcc %o2, 0x04, %o2 + be,pn %xcc, 8f +1: lduw [%o1 + 0x00], %o5 + lduw [%o1 + 0x04], %o4 + subcc %o2, 0x08, %o2 + add %o1, 0x08, %o1 + stw %o5, [%o3 + 0x00] + stw %o4, [%o3 + 0x04] + bg,pt %xcc, 1b + add %o3, 0x08, %o3 + bne,pn %xcc, 9f + nop + lduw [%o1 + 0x00], %o5 +8: stw %o5, [%o3 + 0x00] +9: retl + nop +END(__align_cpy_4) + + .align 8 +ENTRY(__align_cpy_2) + or %o0, %o1, %o3 + or %o2, %o3, %o3 + andcc %o3, 0x7, %g0 + be,pt %xcc, 10b + andcc %o3, 0x3, %g0 + be,pt %xcc, 20b + mov %o7, %g1 + call HIDDEN_JUMPTARGET(memcpy) + mov %o7, %g1 +END(__align_cpy_2) + +weak_alias (__align_cpy_8, __align_cpy_16) +weak_alias (__align_cpy_2, __align_cpy_1) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/atomic-machine.h b/REORG.TODO/sysdeps/sparc/sparc64/atomic-machine.h new file mode 100644 index 0000000000..72009329c3 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/atomic-machine.h @@ -0,0 +1,129 @@ +/* Atomic operations. sparc64 version. + Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek <jakub@redhat.com>, 2003. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdint.h> + +typedef int8_t atomic8_t; +typedef uint8_t uatomic8_t; +typedef int_fast8_t atomic_fast8_t; +typedef uint_fast8_t uatomic_fast8_t; + +typedef int16_t atomic16_t; +typedef uint16_t uatomic16_t; +typedef int_fast16_t atomic_fast16_t; +typedef uint_fast16_t uatomic_fast16_t; + +typedef int32_t atomic32_t; +typedef uint32_t uatomic32_t; +typedef int_fast32_t atomic_fast32_t; +typedef uint_fast32_t uatomic_fast32_t; + +typedef int64_t atomic64_t; +typedef uint64_t uatomic64_t; +typedef int_fast64_t atomic_fast64_t; +typedef uint_fast64_t uatomic_fast64_t; + +typedef intptr_t atomicptr_t; +typedef uintptr_t uatomicptr_t; +typedef intmax_t atomic_max_t; +typedef uintmax_t uatomic_max_t; + +#define __HAVE_64B_ATOMICS 1 +#define USE_ATOMIC_COMPILER_BUILTINS 0 + +/* XXX Is this actually correct? */ +#define ATOMIC_EXCHANGE_USES_CAS 1 + + +#define __arch_compare_and_exchange_val_8_acq(mem, newval, oldval) \ + (abort (), (__typeof (*mem)) 0) + +#define __arch_compare_and_exchange_val_16_acq(mem, newval, oldval) \ + (abort (), (__typeof (*mem)) 0) + +#define __arch_compare_and_exchange_val_32_acq(mem, newval, oldval) \ +({ \ + __typeof (*(mem)) __acev_tmp; \ + __typeof (mem) __acev_mem = (mem); \ + if (__builtin_constant_p (oldval) && (oldval) == 0) \ + __asm __volatile ("cas [%3], %%g0, %0" \ + : "=r" (__acev_tmp), "=m" (*__acev_mem) \ + : "m" (*__acev_mem), "r" (__acev_mem), \ + "0" (newval) : "memory"); \ + else \ + __asm __volatile ("cas [%4], %2, %0" \ + : "=r" (__acev_tmp), "=m" (*__acev_mem) \ + : "r" (oldval), "m" (*__acev_mem), "r" (__acev_mem), \ + "0" (newval) : "memory"); \ + __acev_tmp; }) + +#define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \ +({ \ + __typeof (*(mem)) __acev_tmp; \ + __typeof (mem) __acev_mem = (mem); \ + if (__builtin_constant_p (oldval) && (oldval) == 0) \ + __asm __volatile ("casx [%3], %%g0, %0" \ + : "=r" (__acev_tmp), "=m" (*__acev_mem) \ + : "m" (*__acev_mem), "r" (__acev_mem), \ + "0" ((long) (newval)) : "memory"); \ + else \ + __asm __volatile ("casx [%4], %2, %0" \ + : "=r" (__acev_tmp), "=m" (*__acev_mem) \ + : "r" ((long) (oldval)), "m" (*__acev_mem), \ + "r" (__acev_mem), "0" ((long) (newval)) : "memory"); \ + __acev_tmp; }) + +#define atomic_exchange_acq(mem, newvalue) \ + ({ __typeof (*(mem)) __oldval, __val; \ + __typeof (mem) __memp = (mem); \ + __typeof (*(mem)) __value = (newvalue); \ + \ + if (sizeof (*(mem)) == 4) \ + __asm ("swap %0, %1" \ + : "=m" (*__memp), "=r" (__oldval) \ + : "m" (*__memp), "1" (__value) : "memory"); \ + else \ + { \ + __val = *__memp; \ + do \ + { \ + __oldval = __val; \ + __val = atomic_compare_and_exchange_val_acq (__memp, __value, \ + __oldval); \ + } \ + while (__builtin_expect (__val != __oldval, 0)); \ + } \ + __oldval; }) + +#define atomic_compare_and_exchange_val_24_acq(mem, newval, oldval) \ + atomic_compare_and_exchange_val_acq (mem, newval, oldval) + +#define atomic_exchange_24_rel(mem, newval) \ + atomic_exchange_rel (mem, newval) + +#define atomic_full_barrier() \ + __asm __volatile ("membar #LoadLoad | #LoadStore" \ + " | #StoreLoad | #StoreStore" : : : "memory") +#define atomic_read_barrier() \ + __asm __volatile ("membar #LoadLoad | #LoadStore" : : : "memory") +#define atomic_write_barrier() \ + __asm __volatile ("membar #LoadStore | #StoreStore" : : : "memory") + +extern void __cpu_relax (void); +#define atomic_spin_nop() __cpu_relax () diff --git a/REORG.TODO/sysdeps/sparc/sparc64/backtrace.h b/REORG.TODO/sysdeps/sparc/sparc64/backtrace.h new file mode 100644 index 0000000000..b9c95c51cf --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/backtrace.h @@ -0,0 +1,7 @@ +/* Private macros for guiding the backtrace implementation, sparc64 + version. */ + +#define backtrace_flush_register_windows() \ + asm volatile ("flushw") + +#define BACKTRACE_STACK_BIAS STACK_BIAS diff --git a/REORG.TODO/sysdeps/sparc/sparc64/bits/wordsize.h b/REORG.TODO/sysdeps/sparc/sparc64/bits/wordsize.h new file mode 100644 index 0000000000..2f66f10d72 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/bits/wordsize.h @@ -0,0 +1,11 @@ +/* Determine the wordsize from the preprocessor defines. */ + +#if defined __arch64__ || defined __sparcv9 +# define __WORDSIZE 64 +# define __WORDSIZE_TIME64_COMPAT32 1 +#else +# define __WORDSIZE 32 +# define __WORDSIZE_TIME64_COMPAT32 0 +# define __WORDSIZE32_SIZE_ULONG 0 +# define __WORDSIZE32_PTRDIFF_LONG 0 +#endif diff --git a/REORG.TODO/sysdeps/sparc/sparc64/bzero.c b/REORG.TODO/sysdeps/sparc/sparc64/bzero.c new file mode 100644 index 0000000000..37f0f6f993 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/bzero.c @@ -0,0 +1 @@ +/* bzero is in memset.S */ diff --git a/REORG.TODO/sysdeps/sparc/sparc64/cpu_relax.S b/REORG.TODO/sysdeps/sparc/sparc64/cpu_relax.S new file mode 100644 index 0000000000..5271164607 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/cpu_relax.S @@ -0,0 +1,67 @@ +/* CPU strand yielding for busy loops. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + Contributed by David S. Miller (davem@davemloft.net) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .text +__cpu_relax_generic: + rd %ccr, %g0 + rd %ccr, %g0 + rd %ccr, %g0 + retl + nop + .size __cpu_relax_generic,.-__cpu_relax_generic + +__cpu_relax_pause: + wr %g0, 128, %asr27 + retl + nop + .size __cpu_relax_pause,.-__cpu_relax_pause + +ENTRY(__cpu_relax) + .type __cpu_relax, @gnu_indirect_function +# ifdef SHARED + SETUP_PIC_REG_LEAF(o3, o5) +# endif + set HWCAP_SPARC_PAUSE, %o1 + andcc %o0, %o1, %g0 + be 1f + nop +# ifdef SHARED + sethi %gdop_hix22(__cpu_relax_pause), %o1 + xor %o1, %gdop_lox10(__cpu_relax_pause), %o1 +# else + set __cpu_relax_pause, %o1 +# endif + ba 10f + nop +1: +# ifdef SHARED + sethi %gdop_hix22(__cpu_relax_generic), %o1 + xor %o1, %gdop_lox10(__cpu_relax_generic), %o1 +# else + set __cpu_relax_generic, %o1 +# endif +10: +# ifdef SHARED + add %o3, %o1, %o1 +# endif + retl + mov %o1, %o0 +END(__cpu_relax) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/dl-irel.h b/REORG.TODO/sysdeps/sparc/sparc64/dl-irel.h new file mode 100644 index 0000000000..dfd4637557 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/dl-irel.h @@ -0,0 +1,65 @@ +/* Machine-dependent ELF indirect relocation inline functions. + SPARC 64-bit version. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _DL_IREL_H +#define _DL_IREL_H + +#include <stdio.h> +#include <unistd.h> +#include <dl-plt.h> +#include <ldsodefs.h> + +#define ELF_MACHINE_IRELA 1 + +static inline Elf64_Addr +__attribute ((always_inline)) +elf_ifunc_invoke (Elf64_Addr addr) +{ + return ((Elf64_Addr (*) (int)) (addr)) (GLRO(dl_hwcap)); +} + +static inline void +__attribute ((always_inline)) +elf_irela (const Elf64_Rela *reloc) +{ + unsigned int r_type = (reloc->r_info & 0xff); + + if (__glibc_likely (r_type == R_SPARC_IRELATIVE)) + { + Elf64_Addr *const reloc_addr = (void *) reloc->r_offset; + Elf64_Addr value = elf_ifunc_invoke(reloc->r_addend); + *reloc_addr = value; + } + else if (__glibc_likely (r_type == R_SPARC_JMP_IREL)) + { + Elf64_Addr *const reloc_addr = (void *) reloc->r_offset; + Elf64_Addr value = elf_ifunc_invoke(reloc->r_addend); + struct link_map map = { .l_addr = 0 }; + + /* 'high' is always zero, for large PLT entries the linker + emits an R_SPARC_IRELATIVE. */ + sparc64_fixup_plt (&map, reloc, reloc_addr, value, 0, 0); + } + else if (r_type == R_SPARC_NONE) + ; + else + __libc_fatal ("unexpected reloc type in static binary"); +} + +#endif /* dl-irel.h */ diff --git a/REORG.TODO/sysdeps/sparc/sparc64/dl-machine.h b/REORG.TODO/sysdeps/sparc/sparc64/dl-machine.h new file mode 100644 index 0000000000..1b59d78a25 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/dl-machine.h @@ -0,0 +1,685 @@ +/* Machine-dependent ELF dynamic relocation inline functions. Sparc64 version. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef dl_machine_h +#define dl_machine_h + +#define ELF_MACHINE_NAME "sparc64" + +#include <string.h> +#include <sys/param.h> +#include <ldsodefs.h> +#include <sysdep.h> +#include <dl-plt.h> + +#define ELF64_R_TYPE_ID(info) ((info) & 0xff) +#define ELF64_R_TYPE_DATA(info) ((info) >> 8) + +/* Return nonzero iff ELF header is compatible with the running host. */ +static inline int +elf_machine_matches_host (const Elf64_Ehdr *ehdr) +{ + return ehdr->e_machine == EM_SPARCV9; +} + +/* We have to do this because elf_machine_{dynamic,load_address} can be + invoked from functions that have no GOT references, and thus the compiler + has no obligation to load the PIC register. */ +#define LOAD_PIC_REG(PIC_REG) \ +do { Elf64_Addr tmp; \ + __asm("sethi %%hi(_GLOBAL_OFFSET_TABLE_-4), %1\n\t" \ + "rd %%pc, %0\n\t" \ + "add %1, %%lo(_GLOBAL_OFFSET_TABLE_+4), %1\n\t" \ + "add %0, %1, %0" \ + : "=r" (PIC_REG), "=r" (tmp)); \ +} while (0) + +/* Return the link-time address of _DYNAMIC. Conveniently, this is the + first element of the GOT. This must be inlined in a function which + uses global data. */ +static inline Elf64_Addr +elf_machine_dynamic (void) +{ + register Elf64_Addr *elf_pic_register __asm__("%l7"); + + LOAD_PIC_REG (elf_pic_register); + + return *elf_pic_register; +} + +/* Return the run-time load address of the shared object. */ +static inline Elf64_Addr +elf_machine_load_address (void) +{ + register Elf32_Addr *pc __asm ("%o7"); + register Elf64_Addr *got __asm ("%l7"); + + __asm ("sethi %%hi(_GLOBAL_OFFSET_TABLE_-4), %1\n\t" + "call 1f\n\t" + " add %1, %%lo(_GLOBAL_OFFSET_TABLE_+4), %1\n\t" + "call _DYNAMIC\n\t" + "call _GLOBAL_OFFSET_TABLE_\n" + "1:\tadd %1, %0, %1\n\t" : "=r" (pc), "=r" (got)); + + /* got is now l_addr + _GLOBAL_OFFSET_TABLE_ + *got is _DYNAMIC + pc[2]*4 is l_addr + _DYNAMIC - (long)pc - 8 + pc[3]*4 is l_addr + _GLOBAL_OFFSET_TABLE_ - (long)pc - 12 */ + return (Elf64_Addr) got - *got + (Elf32_Sword) ((pc[2] - pc[3]) * 4) - 4; +} + +static inline Elf64_Addr __attribute__ ((always_inline)) +elf_machine_fixup_plt (struct link_map *map, lookup_t t, + const Elf64_Rela *reloc, + Elf64_Addr *reloc_addr, Elf64_Addr value) +{ + sparc64_fixup_plt (map, reloc, reloc_addr, value + reloc->r_addend, + reloc->r_addend, 1); + return value; +} + +/* Return the final value of a plt relocation. */ +static inline Elf64_Addr +elf_machine_plt_value (struct link_map *map, const Elf64_Rela *reloc, + Elf64_Addr value) +{ + /* Don't add addend here, but in elf_machine_fixup_plt instead. + value + reloc->r_addend is the value which should actually be + stored into .plt data slot. */ + return value; +} + +/* ELF_RTYPE_CLASS_PLT iff TYPE describes relocation of a PLT entry, so + PLT entries should not be allowed to define the value. + ELF_RTYPE_CLASS_COPY iff TYPE should not be allowed to resolve to one + of the main executable's symbols, as for a COPY reloc. */ +#define elf_machine_type_class(type) \ + ((((type) == R_SPARC_JMP_SLOT \ + || ((type) >= R_SPARC_TLS_GD_HI22 && (type) <= R_SPARC_TLS_TPOFF64)) \ + * ELF_RTYPE_CLASS_PLT) \ + | (((type) == R_SPARC_COPY) * ELF_RTYPE_CLASS_COPY)) + +/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */ +#define ELF_MACHINE_JMP_SLOT R_SPARC_JMP_SLOT + +/* The SPARC never uses Elf64_Rel relocations. */ +#define ELF_MACHINE_NO_REL 1 +#define ELF_MACHINE_NO_RELA 0 + +/* Set up the loaded object described by L so its unrelocated PLT + entries will jump to the on-demand fixup code in dl-runtime.c. */ + +static inline int +elf_machine_runtime_setup (struct link_map *l, int lazy, int profile) +{ + if (l->l_info[DT_JMPREL] && lazy) + { + extern void _dl_runtime_resolve_0 (void); + extern void _dl_runtime_resolve_1 (void); + extern void _dl_runtime_profile_0 (void); + extern void _dl_runtime_profile_1 (void); + Elf64_Addr res0_addr, res1_addr; + unsigned int *plt = (void *) D_PTR (l, l_info[DT_PLTGOT]); + + if (__builtin_expect(profile, 0)) + { + res0_addr = (Elf64_Addr) &_dl_runtime_profile_0; + res1_addr = (Elf64_Addr) &_dl_runtime_profile_1; + + if (GLRO(dl_profile) != NULL + && _dl_name_match_p (GLRO(dl_profile), l)) + GL(dl_profile_map) = l; + } + else + { + res0_addr = (Elf64_Addr) &_dl_runtime_resolve_0; + res1_addr = (Elf64_Addr) &_dl_runtime_resolve_1; + } + + /* PLT0 looks like: + + sethi %uhi(_dl_runtime_{resolve,profile}_0), %g4 + sethi %hi(_dl_runtime_{resolve,profile}_0), %g5 + or %g4, %ulo(_dl_runtime_{resolve,profile}_0), %g4 + or %g5, %lo(_dl_runtime_{resolve,profile}_0), %g5 + sllx %g4, 32, %g4 + add %g4, %g5, %g5 + jmpl %g5, %g4 + nop + */ + + plt[0] = 0x09000000 | (res0_addr >> (64 - 22)); + plt[1] = 0x0b000000 | ((res0_addr >> 10) & 0x003fffff); + plt[2] = 0x88112000 | ((res0_addr >> 32) & 0x3ff); + plt[3] = 0x8a116000 | (res0_addr & 0x3ff); + plt[4] = 0x89293020; + plt[5] = 0x8a010005; + plt[6] = 0x89c14000; + plt[7] = 0x01000000; + + /* PLT1 looks like: + + sethi %uhi(_dl_runtime_{resolve,profile}_1), %g4 + sethi %hi(_dl_runtime_{resolve,profile}_1), %g5 + or %g4, %ulo(_dl_runtime_{resolve,profile}_1), %g4 + or %g5, %lo(_dl_runtime_{resolve,profile}_1), %g5 + sllx %g4, 32, %g4 + add %g4, %g5, %g5 + jmpl %g5, %g4 + nop + */ + + plt[8] = 0x09000000 | (res1_addr >> (64 - 22)); + plt[9] = 0x0b000000 | ((res1_addr >> 10) & 0x003fffff); + plt[10] = 0x88112000 | ((res1_addr >> 32) & 0x3ff); + plt[11] = 0x8a116000 | (res1_addr & 0x3ff); + plt[12] = 0x89293020; + plt[13] = 0x8a010005; + plt[14] = 0x89c14000; + plt[15] = 0x01000000; + + /* Now put the magic cookie at the beginning of .PLT2 + Entry .PLT3 is unused by this implementation. */ + *((struct link_map **)(&plt[16])) = l; + + if (__builtin_expect (l->l_info[VALIDX(DT_GNU_PRELINKED)] != NULL, 0) + || __builtin_expect (l->l_info [VALIDX (DT_GNU_LIBLISTSZ)] != NULL, 0)) + { + /* Need to reinitialize .plt to undo prelinking. */ + Elf64_Rela *rela = (Elf64_Rela *) D_PTR (l, l_info[DT_JMPREL]); + Elf64_Rela *relaend + = (Elf64_Rela *) ((char *) rela + + l->l_info[DT_PLTRELSZ]->d_un.d_val); + + /* prelink must ensure there are no R_SPARC_NONE relocs left + in .rela.plt. */ + while (rela < relaend) + { + if (__builtin_expect (rela->r_addend, 0) != 0) + { + Elf64_Addr slot = ((rela->r_offset + l->l_addr + 0x400 + - (Elf64_Addr) plt) + / 0x1400) * 0x1400 + + (Elf64_Addr) plt - 0x400; + /* ldx [%o7 + X], %g1 */ + unsigned int first_ldx = *(unsigned int *)(slot + 12); + Elf64_Addr ptr = slot + (first_ldx & 0xfff) + 4; + + *(Elf64_Addr *) (rela->r_offset + l->l_addr) + = (Elf64_Addr) plt + - (slot + ((rela->r_offset + l->l_addr - ptr) / 8) * 24 + + 4); + ++rela; + continue; + } + + *(unsigned int *) (rela->r_offset + l->l_addr) + = 0x03000000 | (rela->r_offset + l->l_addr - (Elf64_Addr) plt); + *(unsigned int *) (rela->r_offset + l->l_addr + 4) + = 0x30680000 | ((((Elf64_Addr) plt + 32 - rela->r_offset + - l->l_addr - 4) >> 2) & 0x7ffff); + __asm __volatile ("flush %0" : : "r" (rela->r_offset + + l->l_addr)); + __asm __volatile ("flush %0+4" : : "r" (rela->r_offset + + l->l_addr)); + ++rela; + } + } + } + + return lazy; +} + +/* The PLT uses Elf64_Rela relocs. */ +#define elf_machine_relplt elf_machine_rela + +/* Undo the sub %sp, 6*8, %sp; add %sp, STACK_BIAS + 22*8, %o0 below + (but w/o STACK_BIAS) to get at the value we want in __libc_stack_end. */ +#define DL_STACK_END(cookie) \ + ((void *) (((long) (cookie)) - (22 - 6) * 8)) + +/* Initial entry point code for the dynamic linker. + The C function `_dl_start' is the real entry point; + its return value is the user program's entry point. */ + +#define RTLD_GOT_ADDRESS(pic_reg, reg, symbol) \ + "sethi %gdop_hix22(" #symbol "), " #reg "\n\t" \ + "xor " #reg ", %gdop_lox10(" #symbol "), " #reg "\n\t" \ + "ldx [" #pic_reg " + " #reg "], " #reg ", %gdop(" #symbol ")\n" + +#define __S1(x) #x +#define __S(x) __S1(x) + +#define RTLD_START __asm__ ( "\n" \ +" .text\n" \ +" .global _start\n" \ +" .type _start, @function\n" \ +" .align 32\n" \ +"_start:\n" \ +" /* Make room for functions to drop their arguments on the stack. */\n" \ +" sub %sp, 6*8, %sp\n" \ +" /* Pass pointer to argument block to _dl_start. */\n" \ +" call _dl_start\n" \ +" add %sp," __S(STACK_BIAS) "+22*8,%o0\n" \ +" /* FALLTHRU */\n" \ +" .size _start, .-_start\n" \ +"\n" \ +" .global _dl_start_user\n" \ +" .type _dl_start_user, @function\n" \ +"_dl_start_user:\n" \ +" /* Load the GOT register. */\n" \ +"1: call 11f\n" \ +" sethi %hi(_GLOBAL_OFFSET_TABLE_-(1b-.)), %l7\n" \ +"11: or %l7, %lo(_GLOBAL_OFFSET_TABLE_-(1b-.)), %l7\n" \ +" add %l7, %o7, %l7\n" \ +" /* Save the user entry point address in %l0. */\n" \ +" mov %o0, %l0\n" \ +" /* See if we were run as a command with the executable file name as an\n" \ +" extra leading argument. If so, we must shift things around since we\n" \ +" must keep the stack doubleword aligned. */\n" \ + RTLD_GOT_ADDRESS(%l7, %g5, _dl_skip_args) \ +" ld [%g5], %i0\n" \ +" brz,pt %i0, 2f\n" \ +" ldx [%sp + " __S(STACK_BIAS) " + 22*8], %i5\n" \ +" /* Find out how far to shift. */\n" \ +" sub %i5, %i0, %i5\n" \ +" sllx %i0, 3, %l6\n" \ + RTLD_GOT_ADDRESS(%l7, %l4, _dl_argv) \ +" stx %i5, [%sp + " __S(STACK_BIAS) " + 22*8]\n" \ +" add %sp, " __S(STACK_BIAS) " + 23*8, %i1\n" \ +" add %i1, %l6, %i2\n" \ +" ldx [%l4], %l5\n" \ +" /* Copy down argv. */\n" \ +"12: ldx [%i2], %i3\n" \ +" add %i2, 8, %i2\n" \ +" stx %i3, [%i1]\n" \ +" brnz,pt %i3, 12b\n" \ +" add %i1, 8, %i1\n" \ +" sub %l5, %l6, %l5\n" \ +" /* Copy down envp. */\n" \ +"13: ldx [%i2], %i3\n" \ +" add %i2, 8, %i2\n" \ +" stx %i3, [%i1]\n" \ +" brnz,pt %i3, 13b\n" \ +" add %i1, 8, %i1\n" \ +" /* Copy down auxiliary table. */\n" \ +"14: ldx [%i2], %i3\n" \ +" ldx [%i2 + 8], %i4\n" \ +" add %i2, 16, %i2\n" \ +" stx %i3, [%i1]\n" \ +" stx %i4, [%i1 + 8]\n" \ +" brnz,pt %i3, 14b\n" \ +" add %i1, 16, %i1\n" \ +" stx %l5, [%l4]\n" \ +" /* %o0 = _dl_loaded, %o1 = argc, %o2 = argv, %o3 = envp. */\n" \ +"2:\t" RTLD_GOT_ADDRESS(%l7, %o0, _rtld_local) \ +" sllx %i5, 3, %o3\n" \ +" add %sp, " __S(STACK_BIAS) " + 23*8, %o2\n" \ +" add %o3, 8, %o3\n" \ +" mov %i5, %o1\n" \ +" add %o2, %o3, %o3\n" \ +" call _dl_init\n" \ +" ldx [%o0], %o0\n" \ +" /* Pass our finalizer function to the user in %g1. */\n" \ + RTLD_GOT_ADDRESS(%l7, %g1, _dl_fini) \ +" /* Jump to the user's entry point and deallocate the extra stack we got. */\n" \ +" jmp %l0\n" \ +" add %sp, 6*8, %sp\n" \ +" .size _dl_start_user, . - _dl_start_user\n" \ +" .previous\n"); + +#endif /* dl_machine_h */ + +#define ARCH_LA_PLTENTER sparc64_gnu_pltenter +#define ARCH_LA_PLTEXIT sparc64_gnu_pltexit + +#ifdef RESOLVE_MAP + +/* Perform the relocation specified by RELOC and SYM (which is fully resolved). + MAP is the object containing the reloc. */ + +auto inline void +__attribute__ ((always_inline)) +elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc, + const Elf64_Sym *sym, const struct r_found_version *version, + void *const reloc_addr_arg, int skip_ifunc) +{ + Elf64_Addr *const reloc_addr = reloc_addr_arg; +#if !defined RTLD_BOOTSTRAP && !defined RESOLVE_CONFLICT_FIND_MAP + const Elf64_Sym *const refsym = sym; +#endif + Elf64_Addr value; + const unsigned long int r_type = ELF64_R_TYPE_ID (reloc->r_info); +#if !defined RESOLVE_CONFLICT_FIND_MAP + struct link_map *sym_map = NULL; +#endif + +#if !defined RTLD_BOOTSTRAP && !defined HAVE_Z_COMBRELOC + /* This is defined in rtld.c, but nowhere in the static libc.a; make the + reference weak so static programs can still link. This declaration + cannot be done when compiling rtld.c (i.e. #ifdef RTLD_BOOTSTRAP) + because rtld.c contains the common defn for _dl_rtld_map, which is + incompatible with a weak decl in the same file. */ + weak_extern (_dl_rtld_map); +#endif + + if (__glibc_unlikely (r_type == R_SPARC_NONE)) + return; + + if (__glibc_unlikely (r_type == R_SPARC_SIZE64)) + { + *reloc_addr = sym->st_size + reloc->r_addend; + return; + } + +#if !defined RTLD_BOOTSTRAP || !defined HAVE_Z_COMBRELOC + if (__glibc_unlikely (r_type == R_SPARC_RELATIVE)) + { +# if !defined RTLD_BOOTSTRAP && !defined HAVE_Z_COMBRELOC + if (map != &_dl_rtld_map) /* Already done in rtld itself. */ +# endif + *reloc_addr += map->l_addr + reloc->r_addend; + return; + } +#endif + +#ifndef RESOLVE_CONFLICT_FIND_MAP + if (__builtin_expect (ELF64_ST_BIND (sym->st_info) == STB_LOCAL, 0) + && sym->st_shndx != SHN_UNDEF) + { + value = map->l_addr; + } + else + { + sym_map = RESOLVE_MAP (&sym, version, r_type); + value = sym_map == NULL ? 0 : sym_map->l_addr + sym->st_value; + } +#else + value = 0; +#endif + + value += reloc->r_addend; /* Assume copy relocs have zero addend. */ + + if (sym != NULL + && __builtin_expect (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC, 0) + && __builtin_expect (sym->st_shndx != SHN_UNDEF, 1) + && __builtin_expect (!skip_ifunc, 1)) + value = ((Elf64_Addr (*) (int)) value) (GLRO(dl_hwcap)); + + switch (r_type) + { +#if !defined RTLD_BOOTSTRAP && !defined RESOLVE_CONFLICT_FIND_MAP + case R_SPARC_COPY: + if (sym == NULL) + /* This can happen in trace mode if an object could not be + found. */ + break; + if (sym->st_size > refsym->st_size + || (GLRO(dl_verbose) && sym->st_size < refsym->st_size)) + { + const char *strtab; + + strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]); + _dl_error_printf ("\ +%s: Symbol `%s' has different size in shared object, consider re-linking\n", + RTLD_PROGNAME, strtab + refsym->st_name); + } + memcpy (reloc_addr_arg, (void *) value, + MIN (sym->st_size, refsym->st_size)); + break; +#endif + case R_SPARC_64: + case R_SPARC_GLOB_DAT: + *reloc_addr = value; + break; + case R_SPARC_IRELATIVE: + value = ((Elf64_Addr (*) (int)) value) (GLRO(dl_hwcap)); + *reloc_addr = value; + break; + case R_SPARC_JMP_IREL: + value = ((Elf64_Addr (*) (int)) value) (GLRO(dl_hwcap)); + /* 'high' is always zero, for large PLT entries the linker + emits an R_SPARC_IRELATIVE. */ +#ifdef RESOLVE_CONFLICT_FIND_MAP + sparc64_fixup_plt (NULL, reloc, reloc_addr, value, 0, 0); +#else + sparc64_fixup_plt (map, reloc, reloc_addr, value, 0, 0); +#endif + break; + case R_SPARC_JMP_SLOT: +#ifdef RESOLVE_CONFLICT_FIND_MAP + /* R_SPARC_JMP_SLOT conflicts against .plt[32768+] + relocs should be turned into R_SPARC_64 relocs + in .gnu.conflict section. + r_addend non-zero does not mean it is a .plt[32768+] + reloc, instead it is the actual address of the function + to call. */ + sparc64_fixup_plt (NULL, reloc, reloc_addr, value, 0, 0); +#else + sparc64_fixup_plt (map, reloc, reloc_addr, value, reloc->r_addend, 0); +#endif + break; +#ifndef RESOLVE_CONFLICT_FIND_MAP + case R_SPARC_TLS_DTPMOD64: + /* Get the information from the link map returned by the + resolv function. */ + if (sym_map != NULL) + *reloc_addr = sym_map->l_tls_modid; + break; + case R_SPARC_TLS_DTPOFF64: + /* During relocation all TLS symbols are defined and used. + Therefore the offset is already correct. */ + *reloc_addr = (sym == NULL ? 0 : sym->st_value) + reloc->r_addend; + break; + case R_SPARC_TLS_TPOFF64: + /* The offset is negative, forward from the thread pointer. */ + /* We know the offset of object the symbol is contained in. + It is a negative value which will be added to the + thread pointer. */ + if (sym != NULL) + { + CHECK_STATIC_TLS (map, sym_map); + *reloc_addr = sym->st_value - sym_map->l_tls_offset + + reloc->r_addend; + } + break; +# ifndef RTLD_BOOTSTRAP + case R_SPARC_TLS_LE_HIX22: + case R_SPARC_TLS_LE_LOX10: + if (sym != NULL) + { + CHECK_STATIC_TLS (map, sym_map); + value = sym->st_value - sym_map->l_tls_offset + + reloc->r_addend; + if (r_type == R_SPARC_TLS_LE_HIX22) + *(unsigned int *)reloc_addr = + ((*(unsigned int *)reloc_addr & 0xffc00000) + | (((~value) >> 10) & 0x3fffff)); + else + *(unsigned int *)reloc_addr = + ((*(unsigned int *)reloc_addr & 0xffffe000) | (value & 0x3ff) + | 0x1c00); + } + break; +# endif +#endif +#ifndef RTLD_BOOTSTRAP + case R_SPARC_8: + *(char *) reloc_addr = value; + break; + case R_SPARC_16: + *(short *) reloc_addr = value; + break; + case R_SPARC_32: + *(unsigned int *) reloc_addr = value; + break; + case R_SPARC_DISP8: + *(char *) reloc_addr = (value - (Elf64_Addr) reloc_addr); + break; + case R_SPARC_DISP16: + *(short *) reloc_addr = (value - (Elf64_Addr) reloc_addr); + break; + case R_SPARC_DISP32: + *(unsigned int *) reloc_addr = (value - (Elf64_Addr) reloc_addr); + break; + case R_SPARC_DISP64: + *reloc_addr = (value - (Elf64_Addr) reloc_addr); + break; + case R_SPARC_REGISTER: + *reloc_addr = value; + break; + case R_SPARC_WDISP30: + *(unsigned int *) reloc_addr = + ((*(unsigned int *)reloc_addr & 0xc0000000) | + (((value - (Elf64_Addr) reloc_addr) >> 2) & 0x3fffffff)); + break; + + /* MEDLOW code model relocs */ + case R_SPARC_LO10: + *(unsigned int *) reloc_addr = + ((*(unsigned int *)reloc_addr & ~0x3ff) | + (value & 0x3ff)); + break; + case R_SPARC_HI22: + *(unsigned int *) reloc_addr = + ((*(unsigned int *)reloc_addr & 0xffc00000) | + ((value >> 10) & 0x3fffff)); + break; + case R_SPARC_OLO10: + *(unsigned int *) reloc_addr = + ((*(unsigned int *)reloc_addr & ~0x1fff) | + (((value & 0x3ff) + ELF64_R_TYPE_DATA (reloc->r_info)) & 0x1fff)); + break; + + /* ABS34 code model reloc */ + case R_SPARC_H34: + *(unsigned int *) reloc_addr = + ((*(unsigned int *)reloc_addr & 0xffc00000) | + ((value >> 12) & 0x3fffff)); + + /* MEDMID code model relocs */ + case R_SPARC_H44: + *(unsigned int *) reloc_addr = + ((*(unsigned int *)reloc_addr & 0xffc00000) | + ((value >> 22) & 0x3fffff)); + break; + case R_SPARC_M44: + *(unsigned int *) reloc_addr = + ((*(unsigned int *)reloc_addr & ~0x3ff) | + ((value >> 12) & 0x3ff)); + break; + case R_SPARC_L44: + *(unsigned int *) reloc_addr = + ((*(unsigned int *)reloc_addr & ~0xfff) | + (value & 0xfff)); + break; + + /* MEDANY code model relocs */ + case R_SPARC_HH22: + *(unsigned int *) reloc_addr = + ((*(unsigned int *)reloc_addr & 0xffc00000) | + (value >> 42)); + break; + case R_SPARC_HM10: + *(unsigned int *) reloc_addr = + ((*(unsigned int *)reloc_addr & ~0x3ff) | + ((value >> 32) & 0x3ff)); + break; + case R_SPARC_LM22: + *(unsigned int *) reloc_addr = + ((*(unsigned int *)reloc_addr & 0xffc00000) | + ((value >> 10) & 0x003fffff)); + break; + case R_SPARC_UA16: + ((unsigned char *) reloc_addr_arg) [0] = value >> 8; + ((unsigned char *) reloc_addr_arg) [1] = value; + break; + case R_SPARC_UA32: + ((unsigned char *) reloc_addr_arg) [0] = value >> 24; + ((unsigned char *) reloc_addr_arg) [1] = value >> 16; + ((unsigned char *) reloc_addr_arg) [2] = value >> 8; + ((unsigned char *) reloc_addr_arg) [3] = value; + break; + case R_SPARC_UA64: + if (! ((long) reloc_addr_arg & 3)) + { + /* Common in .eh_frame */ + ((unsigned int *) reloc_addr_arg) [0] = value >> 32; + ((unsigned int *) reloc_addr_arg) [1] = value; + break; + } + ((unsigned char *) reloc_addr_arg) [0] = value >> 56; + ((unsigned char *) reloc_addr_arg) [1] = value >> 48; + ((unsigned char *) reloc_addr_arg) [2] = value >> 40; + ((unsigned char *) reloc_addr_arg) [3] = value >> 32; + ((unsigned char *) reloc_addr_arg) [4] = value >> 24; + ((unsigned char *) reloc_addr_arg) [5] = value >> 16; + ((unsigned char *) reloc_addr_arg) [6] = value >> 8; + ((unsigned char *) reloc_addr_arg) [7] = value; + break; +#endif +#if !defined RTLD_BOOTSTRAP || defined _NDEBUG + default: + _dl_reloc_bad_type (map, r_type, 0); + break; +#endif + } +} + +auto inline void +__attribute__ ((always_inline)) +elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc, + void *const reloc_addr_arg) +{ + Elf64_Addr *const reloc_addr = reloc_addr_arg; + *reloc_addr = l_addr + reloc->r_addend; +} + +auto inline void +__attribute__ ((always_inline)) +elf_machine_lazy_rel (struct link_map *map, + Elf64_Addr l_addr, const Elf64_Rela *reloc, + int skip_ifunc) +{ + Elf64_Addr *const reloc_addr = (void *) (l_addr + reloc->r_offset); + const unsigned int r_type = ELF64_R_TYPE (reloc->r_info); + + if (__glibc_likely (r_type == R_SPARC_JMP_SLOT)) + ; + else if (r_type == R_SPARC_JMP_IREL + || r_type == R_SPARC_IRELATIVE) + { + Elf64_Addr value = map->l_addr + reloc->r_addend; + if (__glibc_likely (!skip_ifunc)) + value = ((Elf64_Addr (*) (int)) value) (GLRO(dl_hwcap)); + if (r_type == R_SPARC_JMP_IREL) + { + /* 'high' is always zero, for large PLT entries the linker + emits an R_SPARC_IRELATIVE. */ + sparc64_fixup_plt (map, reloc, reloc_addr, value, 0, 1); + } + else + *reloc_addr = value; + } + else if (r_type == R_SPARC_NONE) + ; + else + _dl_reloc_bad_type (map, r_type, 1); +} + +#endif /* RESOLVE_MAP */ diff --git a/REORG.TODO/sysdeps/sparc/sparc64/dl-plt.h b/REORG.TODO/sysdeps/sparc/sparc64/dl-plt.h new file mode 100644 index 0000000000..daad1063b3 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/dl-plt.h @@ -0,0 +1,167 @@ +/* PLT fixups. Sparc 64-bit version. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _DL_PLT_H +#define _DL_PLT_H + +/* We have 4 cases to handle. And we code different code sequences + for each one. I love V9 code models... */ +static inline void __attribute__ ((always_inline)) +sparc64_fixup_plt (struct link_map *map, const Elf64_Rela *reloc, + Elf64_Addr *reloc_addr, Elf64_Addr value, + Elf64_Addr high, int t) +{ + unsigned int *insns = (unsigned int *) reloc_addr; + Elf64_Addr plt_vaddr = (Elf64_Addr) reloc_addr; + Elf64_Sxword disp = value - plt_vaddr; + + /* 't' is '0' if we are resolving this PLT entry for RTLD bootstrap, + in which case we'll be resolving all PLT entries and thus can + optimize by overwriting instructions starting at the first PLT entry + instruction and we need not be mindful of thread safety. + + Otherwise, 't' is '1'. + + Now move plt_vaddr up to the call instruction. */ + plt_vaddr += ((t + 1) * 4); + + /* PLT entries .PLT32768 and above look always the same. */ + if (__builtin_expect (high, 0) != 0) + { + *reloc_addr = value - map->l_addr; + } + /* Near destination. */ + else if (disp >= -0x800000 && disp < 0x800000) + { + unsigned int insn; + + /* ba,a */ + insn = 0x30800000 | ((disp >> 2) & 0x3fffff); + + if (disp >= -0x100000 && disp < 0x100000) + { + /* ba,a,pt %icc */ + insn = 0x30480000 | ((disp >> 2) & 0x07ffff); + } + + /* As this is just one instruction, it is thread safe and so we + can avoid the unnecessary sethi FOO, %g1. Each 64-bit PLT + entry is 8 instructions long, so we can't run into the 'jmp' + delay slot problems 32-bit PLTs can. */ + insns[0] = insn; + __asm __volatile ("flush %0" : : "r" (insns)); + } + /* 32-bit Sparc style, the target is in the lower 32-bits of + address space. */ + else if (insns += t, (value >> 32) == 0) + { + /* sethi %hi(target), %g1 + jmpl %g1 + %lo(target), %g0 */ + + insns[1] = 0x81c06000 | (value & 0x3ff); + __asm __volatile ("flush %0 + 4" : : "r" (insns)); + + insns[0] = 0x03000000 | ((unsigned int)(value >> 10)); + __asm __volatile ("flush %0" : : "r" (insns)); + } + /* We can also get somewhat simple sequences if the distance between + the target and the PLT entry is within +/- 2GB. */ + else if ((plt_vaddr > value + && ((plt_vaddr - value) >> 31) == 0) + || (value > plt_vaddr + && ((value - plt_vaddr) >> 31) == 0)) + { + unsigned int displacement; + + if (plt_vaddr > value) + displacement = (0 - (plt_vaddr - value)); + else + displacement = value - plt_vaddr; + + /* mov %o7, %g1 + call displacement + mov %g1, %o7 */ + + insns[2] = 0x9e100001; + __asm __volatile ("flush %0 + 8" : : "r" (insns)); + + insns[1] = 0x40000000 | (displacement >> 2); + __asm __volatile ("flush %0 + 4" : : "r" (insns)); + + insns[0] = 0x8210000f; + __asm __volatile ("flush %0" : : "r" (insns)); + } + /* Worst case, ho hum... */ + else + { + unsigned int high32 = (value >> 32); + unsigned int low32 = (unsigned int) value; + + /* ??? Some tricks can be stolen from the sparc64 egcs backend + constant formation code I wrote. -DaveM */ + + if (__glibc_unlikely (high32 & 0x3ff)) + { + /* sethi %hh(value), %g1 + sethi %lm(value), %g5 + or %g1, %hm(value), %g1 + or %g5, %lo(value), %g5 + sllx %g1, 32, %g1 + jmpl %g1 + %g5, %g0 + nop */ + + insns[5] = 0x81c04005; + __asm __volatile ("flush %0 + 20" : : "r" (insns)); + + insns[4] = 0x83287020; + __asm __volatile ("flush %0 + 16" : : "r" (insns)); + + insns[3] = 0x8a116000 | (low32 & 0x3ff); + __asm __volatile ("flush %0 + 12" : : "r" (insns)); + + insns[2] = 0x82106000 | (high32 & 0x3ff); + } + else + { + /* sethi %hh(value), %g1 + sethi %lm(value), %g5 + sllx %g1, 32, %g1 + or %g5, %lo(value), %g5 + jmpl %g1 + %g5, %g0 + nop */ + + insns[4] = 0x81c04005; + __asm __volatile ("flush %0 + 16" : : "r" (insns)); + + insns[3] = 0x8a116000 | (low32 & 0x3ff); + __asm __volatile ("flush %0 + 12" : : "r" (insns)); + + insns[2] = 0x83287020; + } + + __asm __volatile ("flush %0 + 8" : : "r" (insns)); + + insns[1] = 0x0b000000 | (low32 >> 10); + __asm __volatile ("flush %0 + 4" : : "r" (insns)); + + insns[0] = 0x03000000 | (high32 >> 10); + __asm __volatile ("flush %0" : : "r" (insns)); + } +} + +#endif /* dl-plt.h */ diff --git a/REORG.TODO/sysdeps/sparc/sparc64/dl-trampoline.S b/REORG.TODO/sysdeps/sparc/sparc64/dl-trampoline.S new file mode 100644 index 0000000000..8e1a6efb13 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/dl-trampoline.S @@ -0,0 +1,325 @@ +/* PLT trampolines. Sparc 64-bit version. + Copyright (C) 2005-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .text + .align 32 + + /* %g1: PLT offset loaded by PLT entry + * %g4: callers PC, which is PLT0 + 24, therefore we + * add (32 + 8) to get the address of PLT2 which + * is where the magic cookie is stored + */ + .globl _dl_runtime_resolve_0 + .type _dl_runtime_resolve_0, @function +_dl_runtime_resolve_0: + cfi_startproc + + save %sp, -192, %sp + cfi_def_cfa_register(%fp) + cfi_window_save + cfi_register(%o7, %i7) + + sethi %hi(1047552), %l2 + ldx [%g4 + 32 + 8], %o0 + sub %g1, %g4, %l0 + xor %l2, -1016, %l2 + sethi %hi(5120), %l3 /* 160 * 32 */ + add %l0, %l2, %l0 + sethi %hi(32768), %l4 + udivx %l0, %l3, %l3 + sllx %l3, 2, %l1 + add %l1, %l3, %l1 + sllx %l1, 10, %l2 + sub %l4, 4, %l4 + sllx %l1, 5, %l1 + sub %l0, %l2, %l0 + udivx %l0, 24, %l0 + add %l0, %l4, %l0 + add %l1, %l0, %l1 + add %l1, %l1, %l0 + add %l0, %l1, %l0 + call _dl_fixup + sllx %l0, 3, %o1 + jmp %o0 + restore + + cfi_endproc + + .size _dl_runtime_resolve_0, .-_dl_runtime_resolve_0 + + /* %g1: PLT offset loaded by PLT entry + * %g4: callers PC, which is PLT1 + 24, therefore we + * add 8 to get the address of PLT2 which + * is where the magic cookie is stored + */ + .globl _dl_runtime_resolve_1 + .type _dl_runtime_resolve_1, @function +_dl_runtime_resolve_1: + cfi_startproc + + save %sp, -192, %sp + cfi_def_cfa_register(%fp) + cfi_window_save + cfi_register(%o7, %i7) + + srlx %g1, 12, %o1 + ldx [%g4 + 8], %o0 + add %o1, %o1, %o3 + sub %o1, 96, %o1 + call _dl_fixup + add %o1, %o3, %o1 + jmp %o0 + restore + + cfi_endproc + + .size _dl_runtime_resolve_1, .-_dl_runtime_resolve_1 + + /* For the profiling cases we pass in our stack frame + * as the base of the La_sparc64_regs, so it looks + * like: + * %l0 %sp + * ... + * %l7 %sp + (7 * 8) + * %i0 %sp + (8 * 8) + * ... + * %i7 %sp + (15 * 8) + * %f0 %sp + (16 * 8) + * %f16 %sp + (31 * 8) + * framesize %sp + (32 * 8) + */ + + .globl _dl_profile_save_regs + .type _dl_profile_save_regs, @function +_dl_profile_save_regs: + cfi_startproc + + stx %l0, [%sp + STACK_BIAS + ( 0 * 8)] + stx %l1, [%sp + STACK_BIAS + ( 1 * 8)] + stx %l2, [%sp + STACK_BIAS + ( 2 * 8)] + stx %l3, [%sp + STACK_BIAS + ( 3 * 8)] + stx %l4, [%sp + STACK_BIAS + ( 4 * 8)] + stx %l5, [%sp + STACK_BIAS + ( 5 * 8)] + stx %l6, [%sp + STACK_BIAS + ( 6 * 8)] + stx %l7, [%sp + STACK_BIAS + ( 7 * 8)] + stx %i0, [%sp + STACK_BIAS + ( 8 * 8)] + stx %i1, [%sp + STACK_BIAS + ( 9 * 8)] + stx %i2, [%sp + STACK_BIAS + (10 * 8)] + stx %i3, [%sp + STACK_BIAS + (11 * 8)] + stx %i4, [%sp + STACK_BIAS + (12 * 8)] + stx %i5, [%sp + STACK_BIAS + (13 * 8)] + stx %i6, [%sp + STACK_BIAS + (14 * 8)] + stx %i7, [%sp + STACK_BIAS + (15 * 8)] + std %f0, [%sp + STACK_BIAS + (16 * 8)] + std %f2, [%sp + STACK_BIAS + (17 * 8)] + std %f4, [%sp + STACK_BIAS + (18 * 8)] + std %f6, [%sp + STACK_BIAS + (19 * 8)] + std %f8, [%sp + STACK_BIAS + (20 * 8)] + std %f10, [%sp + STACK_BIAS + (21 * 8)] + std %f12, [%sp + STACK_BIAS + (22 * 8)] + std %f14, [%sp + STACK_BIAS + (23 * 8)] + std %f16, [%sp + STACK_BIAS + (24 * 8)] + std %f18, [%sp + STACK_BIAS + (25 * 8)] + std %f20, [%sp + STACK_BIAS + (26 * 8)] + std %f22, [%sp + STACK_BIAS + (27 * 8)] + std %f24, [%sp + STACK_BIAS + (28 * 8)] + std %f26, [%sp + STACK_BIAS + (29 * 8)] + std %f28, [%sp + STACK_BIAS + (30 * 8)] + retl + std %f30, [%sp + STACK_BIAS + (31 * 8)] + + cfi_endproc + + .size _dl_profile_save_regs, .-_dl_profile_save_regs + + /* If we are going to call pltexit, then we must replicate + * the caller's stack frame. + * %o0: PLT resolved function address + */ + .globl _dl_profile_invoke + .type _dl_profile_invoke, @function +_dl_profile_invoke: + cfi_startproc + + add %l0, 7, %l0 + andn %l0, 7, %l0 + add %l0, (8 * 8), %g1 + + sub %sp, %g1, %sp + srlx %l0, 3, %l7 + mov %o0, %l1 + mov %i0, %o0 + mov %i1, %o1 + mov %i2, %o2 + mov %i3, %o3 + mov %i4, %o4 + mov %i5, %o5 + add %fp, STACK_BIAS, %l2 + brz %l0, 2f + add %sp, STACK_BIAS, %l3 +1: ldx [%l2], %l4 + add %l2, 0x8, %l2 + subcc %l7, 1, %l7 + stx %l4, [%l3] + bne,pt %xcc, 1b + add %l3, 0x8, %l3 + +2: jmpl %l1, %o7 + nop + + stx %o0, [%sp + STACK_BIAS + (16 * 8)] + stx %o1, [%sp + STACK_BIAS + (17 * 8)] + stx %o2, [%sp + STACK_BIAS + (18 * 8)] + stx %o3, [%sp + STACK_BIAS + (19 * 8)] + std %f0, [%sp + STACK_BIAS + (20 * 8)] + std %f2, [%sp + STACK_BIAS + (21 * 8)] + std %f4, [%sp + STACK_BIAS + (22 * 8)] + std %f8, [%sp + STACK_BIAS + (23 * 8)] + + mov %l5, %o0 + mov %l6, %o1 + add %sp, STACK_BIAS + (24 * 8), %o2 + call _dl_call_pltexit + add %sp, STACK_BIAS + (16 * 8), %o3 + + ldx [%sp + STACK_BIAS + (16 * 8)], %i0 + ldx [%sp + STACK_BIAS + (17 * 8)], %i1 + ldx [%sp + STACK_BIAS + (18 * 8)], %i2 + ldx [%sp + STACK_BIAS + (19 * 8)], %i3 + ldd [%sp + STACK_BIAS + (20 * 8)], %f0 + ldd [%sp + STACK_BIAS + (21 * 8)], %f2 + ldd [%sp + STACK_BIAS + (22 * 8)], %f4 + ldd [%sp + STACK_BIAS + (23 * 8)], %f8 + + jmpl %i7 + 8, %g0 + restore + + cfi_endproc + + .size _dl_profile_invoke, .-_dl_profile_invoke + + /* %g1: PLT offset loaded by PLT entry + * %g4: callers PC, which is PLT0 + 24, therefore we + * add (32 + 8) to get the address of PLT2 which + * is where the magic cookie is stored + */ + .align 32 + .globl _dl_runtime_profile_0 + .type _dl_runtime_profile_0, @function +_dl_runtime_profile_0: + cfi_startproc + + save %sp, -336, %sp + cfi_def_cfa_register(%fp) + cfi_window_save + cfi_register(%o7, %i7) + + sethi %hi(1047552), %l2 + ldx [%g4 + 32 + 8], %o0 + sub %g1, %g4, %l0 + xor %l2, -1016, %l2 + sethi %hi(5120), %l3 /* 160 * 32 */ + add %l0, %l2, %l0 + sethi %hi(32768), %l4 + udivx %l0, %l3, %l3 + sllx %l3, 2, %l1 + add %l1, %l3, %l1 + sllx %l1, 10, %l2 + sub %l4, 4, %l4 + sllx %l1, 5, %l1 + sub %l0, %l2, %l0 + udivx %l0, 24, %l0 + add %l0, %l4, %l0 + add %l1, %l0, %l1 + add %l1, %l1, %l0 + add %l0, %l1, %l0 + + mov %i7, %o2 + sllx %l0, 3, %o1 + + mov %o0, %l5 + mov %o1, %l6 + + call _dl_profile_save_regs + nop + + add %sp, STACK_BIAS, %o3 + call _dl_profile_fixup + add %sp, (STACK_BIAS + (32 * 8)), %o4 + + ldx [%sp + STACK_BIAS + (32 * 8)], %l0 + brlz,pt %l0, 1f + nop + + call _dl_profile_invoke + nop + +1: jmp %o0 + restore + + cfi_endproc + + .size _dl_runtime_profile_0, .-_dl_runtime_profile_0 + + /* %g1: PLT offset loaded by PLT entry + * %g4: callers PC, which is PLT1 + 24, therefore we + * add 8 to get the address of PLT2 which + * is where the magic cookie is stored + */ + .globl _dl_runtime_profile_1 + .type _dl_runtime_profile_1, @function +_dl_runtime_profile_1: + cfi_startproc + + save %sp, -336, %sp + cfi_def_cfa_register(%fp) + cfi_window_save + cfi_register(%o7, %i7) + + srlx %g1, 12, %o1 + ldx [%g4 + 8], %o0 + add %o1, %o1, %o3 + sub %o1, 96, %o1 + mov %i7, %o2 + add %o1, %o3, %o1 + + mov %o0, %l5 + mov %o1, %l6 + + call _dl_profile_save_regs + nop + + add %sp, STACK_BIAS, %o3 + call _dl_profile_fixup + add %sp, (STACK_BIAS + (32 * 8)), %o4 + + ldx [%sp + STACK_BIAS + (32 * 8)], %l0 + brlz,pt %l0, 1f + nop + + call _dl_profile_invoke + nop + +1: jmp %o0 + restore + + cfi_endproc + + .size _dl_runtime_resolve_1, .-_dl_runtime_resolve_1 diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/e_sqrtl.c b/REORG.TODO/sysdeps/sparc/sparc64/fpu/e_sqrtl.c new file mode 100644 index 0000000000..c540d05841 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/e_sqrtl.c @@ -0,0 +1,31 @@ +/* Long double square root, sparc64 version. + Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek <jakub@redhat.com>, 2000. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +extern void _Qp_sqrt(long double *, const long double *); + +long double +__ieee754_sqrtl (long double x) +{ + long double ret; + _Qp_sqrt (&ret, &x); + return ret; +} +strong_alias (__ieee754_sqrtl, __sqrtl_finite) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/Makefile b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/Makefile new file mode 100644 index 0000000000..03a271dfa4 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/Makefile @@ -0,0 +1,22 @@ +ifeq ($(subdir),math) +ifeq ($(have-as-vis3),yes) +libm-sysdep_routines += m_signbitf-vis3 m_signbit-vis3 m_finitef-vis3 \ + m_finite-vis3 m_isinff-vis3 m_isinf-vis3 \ + m_isnanf-vis3 m_isnan-vis3 s_lrintf-vis3 \ + s_lrint-vis3 s_rintf-vis3 s_rint-vis3 \ + s_fmaf-vis3 s_fma-vis3 \ + s_nearbyint-vis3 s_nearbyintf-vis3 \ + s_ceilf-vis3 s_ceil-vis3 s_floorf-vis3 \ + s_floor-vis3 s_truncf-vis3 s_trunc-vis3 +sysdep_routines += s_signbitf-vis3 s_signbit-vis3 s_finitef-vis3 \ + s_finite-vis3 s_isinff-vis3 s_isinf-vis3 \ + s_isnanf-vis3 s_isnan-vis3 + +CFLAGS-s_ceilf-vis3.c += -Wa,-Av9d -mvis3 +CFLAGS-s_ceil-vis3.c += -Wa,-Av9d -mvis3 +CFLAGS-s_floorf-vis3.c += -Wa,-Av9d -mvis3 +CFLAGS-s_floor-vis3.c += -Wa,-Av9d -mvis3 +CFLAGS-s_truncf-vis3.c += -Wa,-Av9d -mvis3 +CFLAGS-s_trunc-vis3.c += -Wa,-Av9d -mvis3 +endif +endif diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis3.c b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis3.c new file mode 100644 index 0000000000..fa9c5d33ea --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil-vis3.c @@ -0,0 +1,23 @@ +/* ceil function, sparc64 vis3 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#define __ceil __ceil_vis3 + +#include <sysdeps/ieee754/dbl-64/wordsize-64/s_ceil.c> diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil.c b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil.c new file mode 100644 index 0000000000..efa05e94df --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_ceil.c @@ -0,0 +1,32 @@ +/* ceil function, sparc64 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_AS_VIS3_SUPPORT +# include <sparc-ifunc.h> +# include <math.h> + +extern double __ceil_vis3 (double); +extern double __ceil_generic (double); + +sparc_libm_ifunc(__ceil, hwcap & HWCAP_SPARC_VIS3 ? __ceil_vis3 : __ceil_generic); +weak_alias (__ceil, ceil) + +# define __ceil __ceil_generic +#endif + +#include <sysdeps/ieee754/dbl-64/wordsize-64/s_ceil.c> diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis3.c b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis3.c new file mode 100644 index 0000000000..1d918de7af --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf-vis3.c @@ -0,0 +1,23 @@ +/* Float ceil function, sparc64 vis3 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#define __ceilf __ceilf_vis3 + +#include <sysdeps/ieee754/flt-32/s_ceilf.c> diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf.c b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf.c new file mode 100644 index 0000000000..62ada7fd2a --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_ceilf.c @@ -0,0 +1,32 @@ +/* Float ceil function, sparc64 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_AS_VIS3_SUPPORT +# include <sparc-ifunc.h> +# include <math.h> + +extern float __ceilf_vis3 (float); +extern float __ceilf_generic (float); + +sparc_libm_ifunc(__ceilf, hwcap & HWCAP_SPARC_VIS3 ? __ceilf_vis3 : __ceilf_generic); +weak_alias (__ceilf, ceilf) + +# define __ceilf __ceilf_generic +#endif + +#include <sysdeps/ieee754/flt-32/s_ceilf.c> diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_finite-vis3.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_finite-vis3.S new file mode 100644 index 0000000000..0e2b5f6b00 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_finite-vis3.S @@ -0,0 +1,28 @@ +/* finite(). sparc64 vis3 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY (__finite_vis3) + fabsd %f0, %f0 + movstouw %f0, %o0 + sethi %hi(0x7ff00000), %o2 + sub %o0, %o2, %o0 + retl + srl %o0, 31, %o0 +END (__finite_vis3) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_finite.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_finite.S new file mode 100644 index 0000000000..78406a62b1 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_finite.S @@ -0,0 +1,15 @@ +#include <sparc-ifunc.h> + +SPARC_ASM_VIS3_IFUNC(finite) + +hidden_def (__finite) +weak_alias (__finite, finite) + +# undef weak_alias +# define weak_alias(a, b) +# undef hidden_def +# define hidden_def(a) + +#define __finite __finite_generic + +#include "../s_finite.S" diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_finitef-vis3.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_finitef-vis3.S new file mode 100644 index 0000000000..59e17bc75c --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_finitef-vis3.S @@ -0,0 +1,28 @@ +/* finitef(). sparc64 vis3 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY (__finitef_vis3) + fabss %f1, %f0 + movstouw %f0, %o0 + sethi %hi(0x7f800000), %o2 + sub %o0, %o2, %o0 + retl + srl %o0, 31, %o0 +END (__finitef_vis3) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_finitef.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_finitef.S new file mode 100644 index 0000000000..cafd41fd92 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_finitef.S @@ -0,0 +1,15 @@ +#include <sparc-ifunc.h> + +SPARC_ASM_VIS3_IFUNC(finitef) + +hidden_def (__finitef) +weak_alias (__finitef, finitef) + +# undef weak_alias +# define weak_alias(a, b) +# undef hidden_def +# define hidden_def(a) + +#define __finitef __finitef_generic + +#include "../s_finitef.S" diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis3.c b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis3.c new file mode 100644 index 0000000000..04f6183664 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_floor-vis3.c @@ -0,0 +1,23 @@ +/* floor function, sparc64 vis3 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#define __floor __floor_vis3 + +#include <sysdeps/ieee754/dbl-64/wordsize-64/s_floor.c> diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_floor.c b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_floor.c new file mode 100644 index 0000000000..d097f68866 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_floor.c @@ -0,0 +1,32 @@ +/* floor function, sparc64 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_AS_VIS3_SUPPORT +# include <sparc-ifunc.h> +# include <math.h> + +extern double __floor_vis3 (double); +extern double __floor_generic (double); + +sparc_libm_ifunc(__floor, hwcap & HWCAP_SPARC_VIS3 ? __floor_vis3 : __floor_generic); +weak_alias (__floor, floor) + +# define __floor __floor_generic +#endif + +#include <sysdeps/ieee754/dbl-64/wordsize-64/s_floor.c> diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis3.c b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis3.c new file mode 100644 index 0000000000..0ff49e6802 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf-vis3.c @@ -0,0 +1,23 @@ +/* Float floor function, sparc64 vis3 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#define __floorf __floorf_vis3 + +#include <sysdeps/ieee754/flt-32/s_floorf.c> diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf.c b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf.c new file mode 100644 index 0000000000..2a6c710349 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_floorf.c @@ -0,0 +1,32 @@ +/* Float floor function, sparc64 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_AS_VIS3_SUPPORT +# include <sparc-ifunc.h> +# include <math.h> + +extern float __floorf_vis3 (float); +extern float __floorf_generic (float); + +sparc_libm_ifunc(__floorf, hwcap & HWCAP_SPARC_VIS3 ? __floorf_vis3 : __floorf_generic); +weak_alias (__floorf, floorf) + +# define __floorf __floorf_generic +#endif + +#include <sysdeps/ieee754/flt-32/s_floorf.c> diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_fma-vis3.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_fma-vis3.S new file mode 100644 index 0000000000..343ce37de9 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_fma-vis3.S @@ -0,0 +1,25 @@ +/* fma function, sparc64 vis3 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2012. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY (__fma_vis3) + retl + fmaddd %f0, %f2, %f4, %f0 +END (__fma_vis3) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_fma.c b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_fma.c new file mode 100644 index 0000000000..3f2f1622c8 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_fma.c @@ -0,0 +1,14 @@ +#ifdef HAVE_AS_VIS3_SUPPORT +# include <sparc-ifunc.h> +# include <math.h> + +extern double __fma_vis3 (double, double, double); +extern double __fma_generic (double, double, double); + +sparc_libm_ifunc(__fma, hwcap & HWCAP_SPARC_FMAF ? __fma_vis3 : __fma_generic); +weak_alias (__fma, fma) + +# define __fma __fma_generic +#endif + +#include <sysdeps/ieee754/dbl-64/s_fma.c> diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_fmaf-vis3.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_fmaf-vis3.S new file mode 100644 index 0000000000..c2fa72a211 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_fmaf-vis3.S @@ -0,0 +1,25 @@ +/* fmaf function, sparc64 vis3 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2012. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY (__fmaf_vis3) + retl + fmadds %f1, %f3, %f5, %f0 +END (__fmaf_vis3) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_fmaf.c b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_fmaf.c new file mode 100644 index 0000000000..7a273a3b13 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_fmaf.c @@ -0,0 +1,14 @@ +#ifdef HAVE_AS_VIS3_SUPPORT +# include <sparc-ifunc.h> +# include <math.h> + +extern float __fmaf_vis3 (float, float, float); +extern float __fmaf_generic (float, float, float); + +sparc_libm_ifunc(__fmaf, hwcap & HWCAP_SPARC_FMAF ? __fmaf_vis3 : __fmaf_generic); +weak_alias (__fmaf, fmaf) + +# define __fmaf __fmaf_generic +#endif + +#include <sysdeps/ieee754/dbl-64/s_fmaf.c> diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_isinf-vis3.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_isinf-vis3.S new file mode 100644 index 0000000000..54ff556eb8 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_isinf-vis3.S @@ -0,0 +1,31 @@ +/* isinf(). sparc64 vis3 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY (__isinf_vis3) + movdtox %f0, %g1 + sethi %hi(0x7ff00000), %o2 + sllx %o2, 32, %o2 + sllx %g1, 1, %o4 + srlx %o4, 1, %o5 + srax %g1, 62, %o0 + xor %o5, %o2, %o3 + retl + movrne %o3, %g0, %o0 +END (__isinf_vis3) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_isinf.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_isinf.S new file mode 100644 index 0000000000..ed9b62640d --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_isinf.S @@ -0,0 +1,15 @@ +#include <sparc-ifunc.h> + +SPARC_ASM_VIS3_IFUNC(isinf) + +hidden_def (__isinf) +weak_alias (__isinf, isinf) + +# undef weak_alias +# define weak_alias(a, b) +# undef hidden_def +# define hidden_def(a) + +#define __isinf __isinf_generic + +#include "../s_isinf.S" diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_isinff-vis3.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_isinff-vis3.S new file mode 100644 index 0000000000..853bfc64e4 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_isinff-vis3.S @@ -0,0 +1,30 @@ +/* isinff(). sparc64 vis3 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY (__isinff_vis3) + movstouw %f1, %g1 + sethi %hi(0x7f800000), %o2 + sll %g1, 1, %o4 + srl %o4, 1, %o5 + sra %g1, 30, %o0 + xor %o5, %o2, %o3 + retl + movrne %o3, %g0, %o0 +END (__isinff_vis3) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_isinff.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_isinff.S new file mode 100644 index 0000000000..04517398f8 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_isinff.S @@ -0,0 +1,15 @@ +#include <sparc-ifunc.h> + +SPARC_ASM_VIS3_IFUNC(isinff) + +hidden_def (__isinff) +weak_alias (__isinff, isinff) + +# undef weak_alias +# define weak_alias(a, b) +# undef hidden_def +# define hidden_def(a) + +#define __isinff __isinff_generic + +#include "../s_isinff.S" diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_isnan-vis3.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_isnan-vis3.S new file mode 100644 index 0000000000..6dbb8dee2c --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_isnan-vis3.S @@ -0,0 +1,30 @@ +/* isnan(). sparc64 vis3 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY (__isnan_vis3) + movdtox %f0, %o0 + sethi %hi(0x7ff00000), %g1 + sllx %g1, 32, %g1 + sllx %o0, 1, %o0 + srlx %o0, 1, %o0 + sub %g1, %o0, %o0 + retl + srlx %o0, 63, %o0 +END (__isnan_vis3) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_isnan.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_isnan.S new file mode 100644 index 0000000000..40e985a5d4 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_isnan.S @@ -0,0 +1,15 @@ +#include <sparc-ifunc.h> + +SPARC_ASM_VIS3_IFUNC(isnan) + +hidden_def (__isnan) +weak_alias (__isnan, isnan) + +# undef weak_alias +# define weak_alias(a, b) +# undef hidden_def +# define hidden_def(a) + +#define __isnan __isnan_generic + +#include "../s_isnan.S" diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_isnanf-vis3.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_isnanf-vis3.S new file mode 100644 index 0000000000..46fbf6de9f --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_isnanf-vis3.S @@ -0,0 +1,29 @@ +/* isnanf(). sparc64 vis3 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY (__isnanf_vis3) + movstouw %f1, %o0 + sethi %hi(0x7f800000), %g1 + sll %o0, 1, %o0 + srl %o0, 1, %o0 + sub %g1, %o0, %o0 + retl + srl %o0, 31, %o0 +END (__isnanf_vis3) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_isnanf.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_isnanf.S new file mode 100644 index 0000000000..6b53b69d2e --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_isnanf.S @@ -0,0 +1,15 @@ +#include <sparc-ifunc.h> + +SPARC_ASM_VIS3_IFUNC(isnanf) + +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) + +# undef weak_alias +# define weak_alias(a, b) +# undef hidden_def +# define hidden_def(a) + +#define __isnanf __isnanf_generic + +#include "../s_isnanf.S" diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_lrint-vis3.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_lrint-vis3.S new file mode 100644 index 0000000000..87895371dd --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_lrint-vis3.S @@ -0,0 +1,52 @@ +/* lrint(), sparc64 vis3 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2012. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* VIS instructions are used to facilitate the formation of + easier constants, and the propagation of the sign bit. */ + +#define TWO_FIFTYTWO 0x43300000 /* 2**52 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__lrint_vis3) + sethi %hi(TWO_FIFTYTWO), %o2 + sllx %o2, 32, %o2 + fzero ZERO + + fnegd ZERO, SIGN_BIT + movxtod %o2, %f16 + fabsd %f0, %f14 + + fcmpd %fcc3, %f14, %f16 + + fmovduge %fcc3, ZERO, %f16 + fand %f0, SIGN_BIT, SIGN_BIT + + for %f16, SIGN_BIT, %f16 + faddd %f0, %f16, %f6 + fsubd %f6, %f16, %f0 + fabsd %f0, %f0 + for %f0, SIGN_BIT, %f0 + fdtox %f0, %f4 + retl + movdtox %f4, %o0 +END (__lrint_vis3) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_lrint.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_lrint.S new file mode 100644 index 0000000000..94af8f028c --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_lrint.S @@ -0,0 +1,17 @@ +#include <sparc-ifunc.h> + +SPARC_ASM_VIS3_IFUNC(lrint) + +weak_alias (__lrint, lrint) + +strong_alias (__lrint, __llrint) +weak_alias (__llrint, llrint) + +# undef weak_alias +# define weak_alias(a, b) +# undef strong_alias +# define strong_alias(a, b) + +#define __lrint __lrint_generic + +#include "../s_lrint.S" diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_lrintf-vis3.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_lrintf-vis3.S new file mode 100644 index 0000000000..3ef005d433 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_lrintf-vis3.S @@ -0,0 +1,51 @@ +/* lrintf(), sparc64 vis3 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2012. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* VIS instructions are used to facilitate the formation of + easier constants, and the propagation of the sign bit. */ + +#define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__lrintf_vis3) + sethi %hi(TWO_TWENTYTHREE), %o2 + fzeros ZERO + + fnegs ZERO, SIGN_BIT + movwtos %o2, %f16 + fabss %f1, %f14 + + fcmps %fcc3, %f14, %f16 + + fmovsuge %fcc3, ZERO, %f16 + fands %f1, SIGN_BIT, SIGN_BIT + + fors %f16, SIGN_BIT, %f16 + fadds %f1, %f16, %f5 + fsubs %f5, %f16, %f0 + fabss %f0, %f0 + fors %f0, SIGN_BIT, %f0 + fstox %f0, %f4 + retl + movdtox %f4, %o0 +END (__lrintf_vis3) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_lrintf.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_lrintf.S new file mode 100644 index 0000000000..e6ea4061c3 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_lrintf.S @@ -0,0 +1,17 @@ +#include <sparc-ifunc.h> + +SPARC_ASM_VIS3_IFUNC(lrintf) + +weak_alias (__lrintf, lrintf) + +strong_alias (__lrintf, __llrintf) +weak_alias (__llrintf, llrintf) + +# undef weak_alias +# define weak_alias(a, b) +# undef strong_alias +# define strong_alias(a, b) + +#define __lrintf __lrintf_generic + +#include "../s_lrintf.S" diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_nearbyint-vis3.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_nearbyint-vis3.S new file mode 100644 index 0000000000..67e570a800 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_nearbyint-vis3.S @@ -0,0 +1,62 @@ +/* Round float to int floating-point values without generating + an inexact exception, sparc64 vis3 version. + + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2013. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* We pop constants into the FPU registers using the incoming + argument stack slots, since this avoid having to use any PIC + references. We also thus avoid having to allocate a register + window. + + VIS instructions are used to facilitate the formation of + easier constants, and the propagation of the sign bit. */ + +#define TWO_FIFTYTWO 0x43300000 /* 2**52 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__nearbyint_vis3) + fcmpd %fcc3, %f0, %f0 /* Check for sNaN */ + stx %fsr, [%sp + STACK_BIAS + 144] + sethi %hi(TWO_FIFTYTWO), %o2 + sllx %o2, 32, %o2 + ldx [%sp + STACK_BIAS + 144], %o4 + sethi %hi(0xf8003e0), %o5 + fzero ZERO + or %o5, %lo(0xf8003e0), %o5 + fnegd ZERO, SIGN_BIT + andn %o4, %o5, %o4 + movxtod %o2, %f16 + stx %o4, [%sp + STACK_BIAS + 136] + ldx [%sp + STACK_BIAS + 136], %fsr + fabsd %f0, %f14 + fcmpd %fcc3, %f14, %f16 + fmovduge %fcc3, ZERO, %f16 + fand %f0, SIGN_BIT, SIGN_BIT + for %f16, SIGN_BIT, %f16 + faddd %f0, %f16, %f6 + fsubd %f6, %f16, %f0 + fabsd %f0, %f0 + for %f0, SIGN_BIT, %f0 + retl + ldx [%sp + STACK_BIAS + 144], %fsr +END (__nearbyint_vis3) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_nearbyint.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_nearbyint.S new file mode 100644 index 0000000000..bb75ab3606 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_nearbyint.S @@ -0,0 +1,12 @@ +#include <sparc-ifunc.h> + +SPARC_ASM_VIS3_IFUNC(nearbyint) + +weak_alias (__nearbyint, nearbyint) + +# undef weak_alias +# define weak_alias(a, b) + +#define __nearbyint __nearbyint_generic + +#include "../s_nearbyint.S" diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_nearbyintf-vis3.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_nearbyintf-vis3.S new file mode 100644 index 0000000000..208af37d0d --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_nearbyintf-vis3.S @@ -0,0 +1,61 @@ +/* Round float to int floating-point values without generating + an inexact exception, sparc64 vis3 version. + + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2013. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* We pop constants into the FPU registers using the incoming + argument stack slots, since this avoid having to use any PIC + references. We also thus avoid having to allocate a register + window. + + VIS instructions are used to facilitate the formation of + easier constants, and the propagation of the sign bit. */ + +#define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__nearbyintf_vis3) + fcmps %fcc3, %f1, %f1 /* Check for sNaN */ + stx %fsr, [%sp + STACK_BIAS + 144] + sethi %hi(0xf8003e0), %o5 + sethi %hi(TWO_TWENTYTHREE), %o2 + ldx [%sp + STACK_BIAS + 144], %o4 + or %o5, %lo(0xf8003e0), %o5 + fzeros ZERO + andn %o4, %o5, %o4 + fnegs ZERO, SIGN_BIT + movwtos %o2, %f16 + stx %o4, [%sp + STACK_BIAS + 136] + ldx [%sp + STACK_BIAS + 136], %fsr + fabss %f1, %f14 + fcmps %fcc3, %f14, %f16 + fmovsuge %fcc3, ZERO, %f16 + fands %f1, SIGN_BIT, SIGN_BIT + fors %f16, SIGN_BIT, %f16 + fadds %f1, %f16, %f5 + fsubs %f5, %f16, %f0 + fabss %f0, %f0 + fors %f0, SIGN_BIT, %f0 + retl + ldx [%sp + STACK_BIAS + 144], %fsr +END (__nearbyintf_vis3) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_nearbyintf.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_nearbyintf.S new file mode 100644 index 0000000000..95100c1bfc --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_nearbyintf.S @@ -0,0 +1,12 @@ +#include <sparc-ifunc.h> + +SPARC_ASM_VIS3_IFUNC(nearbyintf) + +weak_alias (__nearbyintf, nearbyintf) + +# undef weak_alias +# define weak_alias(a, b) + +#define __nearbyintf __nearbyintf_generic + +#include "../s_nearbyintf.S" diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_rint-vis3.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_rint-vis3.S new file mode 100644 index 0000000000..495a02222e --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_rint-vis3.S @@ -0,0 +1,50 @@ +/* Round float to int floating-point values, sparc64 vis3 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2012. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* VIS instructions are used to facilitate the formation of + easier constants, and the propagation of the sign bit. */ + +#define TWO_FIFTYTWO 0x43300000 /* 2**52 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__rint_vis3) + sethi %hi(TWO_FIFTYTWO), %o2 + sllx %o2, 32, %o2 + fzero ZERO + + fnegd ZERO, SIGN_BIT + movxtod %o2, %f16 + fabsd %f0, %f14 + + fcmpd %fcc3, %f14, %f16 + + fmovduge %fcc3, ZERO, %f16 + fand %f0, SIGN_BIT, SIGN_BIT + + for %f16, SIGN_BIT, %f16 + faddd %f0, %f16, %f6 + fsubd %f6, %f16, %f0 + fabsd %f0, %f0 + retl + for %f0, SIGN_BIT, %f0 +END (__rint_vis3) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_rint.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_rint.S new file mode 100644 index 0000000000..cc980eb8c7 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_rint.S @@ -0,0 +1,12 @@ +#include <sparc-ifunc.h> + +SPARC_ASM_VIS3_IFUNC(rint) + +weak_alias (__rint, rint) + +# undef weak_alias +# define weak_alias(a, b) + +#define __rint __rint_generic + +#include "../s_rint.S" diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_rintf-vis3.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_rintf-vis3.S new file mode 100644 index 0000000000..521c0b21ec --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_rintf-vis3.S @@ -0,0 +1,49 @@ +/* Round float to int floating-point values, sparc64 vis3 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2012. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* VIS instructions are used to facilitate the formation of + easier constants, and the propagation of the sign bit. */ + +#define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__rintf_vis3) + sethi %hi(TWO_TWENTYTHREE), %o2 + fzeros ZERO + + fnegs ZERO, SIGN_BIT + movwtos %o2, %f16 + fabss %f1, %f14 + + fcmps %fcc3, %f14, %f16 + + fmovsuge %fcc3, ZERO, %f16 + fands %f1, SIGN_BIT, SIGN_BIT + + fors %f16, SIGN_BIT, %f16 + fadds %f1, %f16, %f5 + fsubs %f5, %f16, %f0 + fabss %f0, %f0 + retl + fors %f0, SIGN_BIT, %f0 +END (__rintf_vis3) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_rintf.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_rintf.S new file mode 100644 index 0000000000..38fd936086 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_rintf.S @@ -0,0 +1,12 @@ +#include <sparc-ifunc.h> + +SPARC_ASM_VIS3_IFUNC(rintf) + +weak_alias (__rintf, rintf) + +# undef weak_alias +# define weak_alias(a, b) + +#define __rintf __rintf_generic + +#include "../s_rintf.S" diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_signbit-vis3.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_signbit-vis3.S new file mode 100644 index 0000000000..bce7c9795a --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_signbit-vis3.S @@ -0,0 +1,25 @@ +/* signbit(). sparc64 vis3 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY (__signbit_vis3) + movdtox %f0, %o0 + retl + srlx %o0, 63, %o0 +END (__signbit_vis3) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_signbit.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_signbit.S new file mode 100644 index 0000000000..b8ff64a547 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_signbit.S @@ -0,0 +1,20 @@ +#include <sparc-ifunc.h> + +SPARC_ASM_VIS3_IFUNC(signbit) + +/* On 64-bit the double version will also always work for + long-double-precision since in both cases the word with the + sign bit in it is passed always in register %f0. */ +strong_alias (__signbit, __signbitl) +hidden_def (__signbitl) + +# undef weak_alias +# define weak_alias(a, b) +# undef strong_alias +# define strong_alias(a, b) +# undef hidden_def +# define hidden_def(a) + +#define __signbit __signbit_generic + +#include "../s_signbit.S" diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_signbitf-vis3.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_signbitf-vis3.S new file mode 100644 index 0000000000..7833e725e6 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_signbitf-vis3.S @@ -0,0 +1,25 @@ +/* signbitf(). sparc64 vis3 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY (__signbitf_vis3) + movstouw %f1, %o0 + retl + srl %o0, 31, %o0 +END (__signbitf_vis3) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_signbitf.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_signbitf.S new file mode 100644 index 0000000000..d57e999b90 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_signbitf.S @@ -0,0 +1,10 @@ +#include <sparc-ifunc.h> + +SPARC_ASM_VIS3_IFUNC(signbitf) + +# undef weak_alias +# define weak_alias(a, b) + +#define __signbitf __signbitf_generic + +#include "../s_signbitf.S" diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_trunc-vis3.c b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_trunc-vis3.c new file mode 100644 index 0000000000..38ee29a8d3 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_trunc-vis3.c @@ -0,0 +1,23 @@ +/* trunc function, sparc64 vis3 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#define __trunc __trunc_vis3 + +#include <sysdeps/ieee754/dbl-64/wordsize-64/s_trunc.c> diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_trunc.c b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_trunc.c new file mode 100644 index 0000000000..dc67f423f1 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_trunc.c @@ -0,0 +1,32 @@ +/* trunc function, sparc64 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_AS_VIS3_SUPPORT +# include <sparc-ifunc.h> +# include <math.h> + +extern double __trunc_vis3 (double); +extern double __trunc_generic (double); + +sparc_libm_ifunc(__trunc, hwcap & HWCAP_SPARC_VIS3 ? __trunc_vis3 : __trunc_generic); +weak_alias (__trunc, trunc) + +# define __trunc __trunc_generic +#endif + +#include <sysdeps/ieee754/dbl-64/wordsize-64/s_trunc.c> diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_truncf-vis3.c b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_truncf-vis3.c new file mode 100644 index 0000000000..302aa56047 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_truncf-vis3.c @@ -0,0 +1,23 @@ +/* Float trunc function, sparc64 vis3 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <math.h> + +#define __truncf __truncf_vis3 + +#include <sysdeps/ieee754/flt-32/s_truncf.c> diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_truncf.c b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_truncf.c new file mode 100644 index 0000000000..980a313ae1 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/multiarch/s_truncf.c @@ -0,0 +1,32 @@ +/* Float trunc function, sparc64 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_AS_VIS3_SUPPORT +# include <sparc-ifunc.h> +# include <math.h> + +extern float __truncf_vis3 (float); +extern float __truncf_generic (float); + +sparc_libm_ifunc(__truncf, hwcap & HWCAP_SPARC_VIS3 ? __truncf_vis3 : __truncf_generic); +weak_alias (__truncf, truncf) + +# define __truncf __truncf_generic +#endif + +#include <sysdeps/ieee754/flt-32/s_truncf.c> diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_copysign.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_copysign.S new file mode 100644 index 0000000000..e50d8cd246 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_copysign.S @@ -0,0 +1,30 @@ +/* copysign function, sparc64 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2012. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY (__copysign) + fzeros %f7 + fnegs %f7, %f7 + fands %f2, %f7, %f9 + fandnot2s %f0, %f7, %f0 + retl + fors %f0, %f9, %f0 +END (__copysign) +weak_alias (__copysign, copysign)
\ No newline at end of file diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_copysignf.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_copysignf.S new file mode 100644 index 0000000000..2f24217274 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_copysignf.S @@ -0,0 +1,30 @@ +/* float copysign function, sparc64 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2012. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY (__copysignf) + fzeros %f7 + fnegs %f7, %f7 + fands %f3, %f7, %f9 + fandnot2s %f1, %f7, %f1 + retl + fors %f1, %f9, %f0 +END (__copysignf) +weak_alias (__copysignf, copysignf)
\ No newline at end of file diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_fabs.c b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_fabs.c new file mode 100644 index 0000000000..db5ecf2162 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_fabs.c @@ -0,0 +1,5 @@ +double __fabs (double x) +{ + return __builtin_fabs (x); +} +weak_alias (__fabs, fabs) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_fabsf.c b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_fabsf.c new file mode 100644 index 0000000000..8a218e4942 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_fabsf.c @@ -0,0 +1,5 @@ +float __fabsf (float x) +{ + return __builtin_fabsf (x); +} +weak_alias (__fabsf, fabsf) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_fabsl.c b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_fabsl.c new file mode 100644 index 0000000000..67e9f47071 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_fabsl.c @@ -0,0 +1,5 @@ +long double __fabsl (long double x) +{ + return __builtin_fabsl (x); +} +weak_alias (__fabsl, fabsl) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_finite.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_finite.S new file mode 100644 index 0000000000..28ee074c1d --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_finite.S @@ -0,0 +1,31 @@ +/* finite(). sparc64 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY (__finite) + fabsd %f0, %f0 + st %f0, [%sp + STACK_BIAS + 128] + sethi %hi(0x7ff00000), %o2 + ld [%sp + STACK_BIAS + 128], %o0 + sub %o0, %o2, %o0 + retl + srl %o0, 31, %o0 +END (__finite) +hidden_def (__finite) +weak_alias (__finite, finite) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_finitef.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_finitef.S new file mode 100644 index 0000000000..f3edca3c57 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_finitef.S @@ -0,0 +1,31 @@ +/* finitef(). sparc64 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY (__finitef) + fabss %f1, %f0 + st %f0, [%sp + STACK_BIAS + 128] + sethi %hi(0x7f800000), %o2 + ld [%sp + STACK_BIAS + 128], %o0 + sub %o0, %o2, %o0 + retl + srl %o0, 31, %o0 +END (__finitef) +hidden_def (__finitef) +weak_alias (__finitef, finitef) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_fma.c b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_fma.c new file mode 100644 index 0000000000..8f62605870 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_fma.c @@ -0,0 +1,2 @@ +/* Always use dbl-64 version because long double is emulated in software. */ +#include <sysdeps/ieee754/dbl-64/s_fma.c> diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_isinf.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_isinf.S new file mode 100644 index 0000000000..b333a99f54 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_isinf.S @@ -0,0 +1,34 @@ +/* isinf(). sparc64 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY (__isinf) + std %f0, [%sp + STACK_BIAS + 128] + sethi %hi(0x7ff00000), %o2 + ldx [%sp + STACK_BIAS + 128], %g1 + sllx %o2, 32, %o2 + sllx %g1, 1, %o4 + srlx %o4, 1, %o5 + srax %g1, 62, %o0 + xor %o5, %o2, %o3 + retl + movrne %o3, %g0, %o0 +END (__isinf) +hidden_def (__isinf) +weak_alias (__isinf, isinf) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_isinff.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_isinff.S new file mode 100644 index 0000000000..a5ecf15a9f --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_isinff.S @@ -0,0 +1,33 @@ +/* isinff(). sparc64 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY (__isinff) + st %f1, [%sp + STACK_BIAS + 128] + sethi %hi(0x7f800000), %o2 + lduw [%sp + STACK_BIAS + 128], %g1 + sll %g1, 1, %o4 + srl %o4, 1, %o5 + sra %g1, 30, %o0 + xor %o5, %o2, %o3 + retl + movrne %o3, %g0, %o0 +END (__isinff) +hidden_def (__isinff) +weak_alias (__isinff, isinff) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_isnan.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_isnan.S new file mode 100644 index 0000000000..21084ce379 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_isnan.S @@ -0,0 +1,33 @@ +/* isnan(). sparc64 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY (__isnan) + std %f0, [%sp + STACK_BIAS + 128] + sethi %hi(0x7ff00000), %g1 + ldx [%sp + STACK_BIAS + 128], %o0 + sllx %g1, 32, %g1 + sllx %o0, 1, %o0 + srlx %o0, 1, %o0 + sub %g1, %o0, %o0 + retl + srlx %o0, 63, %o0 +END (__isnan) +hidden_def (__isnan) +weak_alias (__isnan, isnan) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_isnanf.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_isnanf.S new file mode 100644 index 0000000000..a012f42746 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_isnanf.S @@ -0,0 +1,32 @@ +/* isnanf(). sparc64 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY (__isnanf) + st %f1, [%sp + STACK_BIAS + 128] + sethi %hi(0x7f800000), %g1 + lduw [%sp + STACK_BIAS + 128], %o0 + sll %o0, 1, %o0 + srl %o0, 1, %o0 + sub %g1, %o0, %o0 + retl + srl %o0, 31, %o0 +END (__isnanf) +hidden_def (__isnanf) +weak_alias (__isnanf, isnanf) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_llrint.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_llrint.S new file mode 100644 index 0000000000..7c8e941b77 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_llrint.S @@ -0,0 +1 @@ +/* llrint is implemented in s_lrint.S */ diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_llrintf.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_llrintf.S new file mode 100644 index 0000000000..abab3b9a2b --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_llrintf.S @@ -0,0 +1 @@ +/* llrintf is implemented in s_lrint.S */ diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_lrint.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_lrint.S new file mode 100644 index 0000000000..0a3162f947 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_lrint.S @@ -0,0 +1,63 @@ +/* lrint(), sparc64 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2012. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* We pop constants into the FPU registers using the incoming + argument stack slots, since this avoid having to use any PIC + references. We also thus avoid having to allocate a register + window. + + VIS instructions are used to facilitate the formation of + easier constants, and the propagation of the sign bit. */ + +#define TWO_FIFTYTWO 0x43300000 /* 2**52 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__lrint) + sethi %hi(TWO_FIFTYTWO), %o2 + sllx %o2, 32, %o2 + fzero ZERO + + fnegd ZERO, SIGN_BIT + stx %o2, [%sp + STACK_BIAS + 128] + fabsd %f0, %f14 + + ldd [%sp + STACK_BIAS + 128], %f16 + fcmpd %fcc3, %f14, %f16 + + fmovduge %fcc3, ZERO, %f16 + fand %f0, SIGN_BIT, SIGN_BIT + + for %f16, SIGN_BIT, %f16 + faddd %f0, %f16, %f6 + fsubd %f6, %f16, %f0 + fabsd %f0, %f0 + for %f0, SIGN_BIT, %f0 + fdtox %f0, %f4 + std %f4, [%sp + STACK_BIAS + 128] + retl + ldx [%sp + STACK_BIAS + 128], %o0 +END (__lrint) +weak_alias (__lrint, lrint) + +strong_alias (__lrint, __llrint) +weak_alias (__llrint, llrint) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_lrintf.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_lrintf.S new file mode 100644 index 0000000000..5f2405ad9e --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_lrintf.S @@ -0,0 +1,62 @@ +/* lrintf(), sparc64 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2012. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* We pop constants into the FPU registers using the incoming + argument stack slots, since this avoid having to use any PIC + references. We also thus avoid having to allocate a register + window. + + VIS instructions are used to facilitate the formation of + easier constants, and the propagation of the sign bit. */ + +#define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__lrintf) + sethi %hi(TWO_TWENTYTHREE), %o2 + fzeros ZERO + + fnegs ZERO, SIGN_BIT + st %o2, [%sp + STACK_BIAS + 128] + fabss %f1, %f14 + + ld [%sp + STACK_BIAS + 128], %f16 + fcmps %fcc3, %f14, %f16 + + fmovsuge %fcc3, ZERO, %f16 + fands %f1, SIGN_BIT, SIGN_BIT + + fors %f16, SIGN_BIT, %f16 + fadds %f1, %f16, %f5 + fsubs %f5, %f16, %f0 + fabss %f0, %f0 + fors %f0, SIGN_BIT, %f0 + fstox %f0, %f4 + std %f4, [%sp + STACK_BIAS + 128] + retl + ldx [%sp + STACK_BIAS + 128], %o0 +END (__lrintf) +weak_alias (__lrintf, lrintf) + +strong_alias (__lrintf, __llrintf) +weak_alias (__llrintf, llrintf) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_nearbyint.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_nearbyint.S new file mode 100644 index 0000000000..05ed2bce40 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_nearbyint.S @@ -0,0 +1,64 @@ +/* Round float to int floating-point values without generating + an inexact exception, sparc64 version. + + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2013. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* We pop constants into the FPU registers using the incoming + argument stack slots, since this avoid having to use any PIC + references. We also thus avoid having to allocate a register + window. + + VIS instructions are used to facilitate the formation of + easier constants, and the propagation of the sign bit. */ + +#define TWO_FIFTYTWO 0x43300000 /* 2**52 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__nearbyint) + fcmpd %fcc3, %f0, %f0 /* Check for sNaN */ + stx %fsr, [%sp + STACK_BIAS + 144] + sethi %hi(TWO_FIFTYTWO), %o2 + sllx %o2, 32, %o2 + ldx [%sp + STACK_BIAS + 144], %o4 + sethi %hi(0xf8003e0), %o5 + fzero ZERO + or %o5, %lo(0xf8003e0), %o5 + fnegd ZERO, SIGN_BIT + andn %o4, %o5, %o4 + stx %o2, [%sp + STACK_BIAS + 128] + stx %o4, [%sp + STACK_BIAS + 136] + ldx [%sp + STACK_BIAS + 136], %fsr + fabsd %f0, %f14 + ldd [%sp + STACK_BIAS + 128], %f16 + fcmpd %fcc3, %f14, %f16 + fmovduge %fcc3, ZERO, %f16 + fand %f0, SIGN_BIT, SIGN_BIT + for %f16, SIGN_BIT, %f16 + faddd %f0, %f16, %f6 + fsubd %f6, %f16, %f0 + fabsd %f0, %f0 + for %f0, SIGN_BIT, %f0 + retl + ldx [%sp + STACK_BIAS + 144], %fsr +END (__nearbyint) +weak_alias (__nearbyint, nearbyint) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_nearbyintf.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_nearbyintf.S new file mode 100644 index 0000000000..55e3639a87 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_nearbyintf.S @@ -0,0 +1,63 @@ +/* Round float to int floating-point values without generating + an inexact exception, sparc64 version. + + Copyright (C) 2013-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2013. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* We pop constants into the FPU registers using the incoming + argument stack slots, since this avoid having to use any PIC + references. We also thus avoid having to allocate a register + window. + + VIS instructions are used to facilitate the formation of + easier constants, and the propagation of the sign bit. */ + +#define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__nearbyintf) + fcmps %fcc3, %f1, %f1 /* Check for sNaN */ + stx %fsr, [%sp + STACK_BIAS + 144] + sethi %hi(0xf8003e0), %o5 + sethi %hi(TWO_TWENTYTHREE), %o2 + ldx [%sp + STACK_BIAS + 144], %o4 + or %o5, %lo(0xf8003e0), %o5 + fzeros ZERO + andn %o4, %o5, %o4 + fnegs ZERO, SIGN_BIT + st %o2, [%sp + STACK_BIAS + 128] + stx %o4, [%sp + STACK_BIAS + 136] + ldx [%sp + STACK_BIAS + 136], %fsr + fabss %f1, %f14 + ld [%sp + STACK_BIAS + 128], %f16 + fcmps %fcc3, %f14, %f16 + fmovsuge %fcc3, ZERO, %f16 + fands %f1, SIGN_BIT, SIGN_BIT + fors %f16, SIGN_BIT, %f16 + fadds %f1, %f16, %f5 + fsubs %f5, %f16, %f0 + fabss %f0, %f0 + fors %f0, SIGN_BIT, %f0 + retl + ldx [%sp + STACK_BIAS + 144], %fsr +END (__nearbyintf) +weak_alias (__nearbyintf, nearbyintf) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_rint.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_rint.S new file mode 100644 index 0000000000..8d0134c90c --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_rint.S @@ -0,0 +1,57 @@ +/* Round float to int floating-point values, sparc64 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2012. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* We pop constants into the FPU registers using the incoming + argument stack slots, since this avoid having to use any PIC + references. We also thus avoid having to allocate a register + window. + + VIS instructions are used to facilitate the formation of + easier constants, and the propagation of the sign bit. */ + +#define TWO_FIFTYTWO 0x43300000 /* 2**52 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__rint) + sethi %hi(TWO_FIFTYTWO), %o2 + sllx %o2, 32, %o2 + fzero ZERO + + fnegd ZERO, SIGN_BIT + stx %o2, [%sp + STACK_BIAS + 128] + fabsd %f0, %f14 + + ldd [%sp + STACK_BIAS + 128], %f16 + fcmpd %fcc3, %f14, %f16 + + fmovduge %fcc3, ZERO, %f16 + fand %f0, SIGN_BIT, SIGN_BIT + + for %f16, SIGN_BIT, %f16 + faddd %f0, %f16, %f6 + fsubd %f6, %f16, %f0 + fabsd %f0, %f0 + retl + for %f0, SIGN_BIT, %f0 +END (__rint) +weak_alias (__rint, rint) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_rintf.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_rintf.S new file mode 100644 index 0000000000..a2f9261382 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_rintf.S @@ -0,0 +1,56 @@ +/* Round float to int floating-point values, sparc64 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2012. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + /* We pop constants into the FPU registers using the incoming + argument stack slots, since this avoid having to use any PIC + references. We also thus avoid having to allocate a register + window. + + VIS instructions are used to facilitate the formation of + easier constants, and the propagation of the sign bit. */ + +#define TWO_TWENTYTHREE 0x4b000000 /* 2**23 */ + +#define ZERO %f10 /* 0.0 */ +#define SIGN_BIT %f12 /* -0.0 */ + +ENTRY (__rintf) + sethi %hi(TWO_TWENTYTHREE), %o2 + fzeros ZERO + + fnegs ZERO, SIGN_BIT + st %o2, [%sp + STACK_BIAS + 128] + fabss %f1, %f14 + + ld [%sp + STACK_BIAS + 128], %f16 + fcmps %fcc3, %f14, %f16 + + fmovsuge %fcc3, ZERO, %f16 + fands %f1, SIGN_BIT, SIGN_BIT + + fors %f16, SIGN_BIT, %f16 + fadds %f1, %f16, %f5 + fsubs %f5, %f16, %f0 + fabss %f0, %f0 + retl + fors %f0, SIGN_BIT, %f0 +END (__rintf) +weak_alias (__rintf, rintf) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_signbit.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_signbit.S new file mode 100644 index 0000000000..979917b5d0 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_signbit.S @@ -0,0 +1,32 @@ +/* signbit(). sparc64 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY (__signbit) + st %f0, [%sp + STACK_BIAS + 128] + ld [%sp + STACK_BIAS + 128], %o0 + retl + srl %o0, 31, %o0 +END (__signbit) + +/* On 64-bit the double version will also always work for + long-double-precision since in both cases the word with the + sign bit in it is passed always in register %f0. */ +strong_alias (__signbit, __signbitl) +hidden_def (__signbitl) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_signbitf.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_signbitf.S new file mode 100644 index 0000000000..cb96983868 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_signbitf.S @@ -0,0 +1,26 @@ +/* signbitf(). sparc64 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY (__signbitf) + st %f1, [%sp + STACK_BIAS + 128] + ld [%sp + STACK_BIAS + 128], %o0 + retl + srl %o0, 31, %o0 +END (__signbitf) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_signbitl.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_signbitl.S new file mode 100644 index 0000000000..f5e5fb9253 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/s_signbitl.S @@ -0,0 +1 @@ +/* signbitl is implemented in s_signbit.S */ diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/w_sqrt_compat.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/w_sqrt_compat.S new file mode 100644 index 0000000000..de95e52e8e --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/w_sqrt_compat.S @@ -0,0 +1,48 @@ +/* sqrt function. sparc64 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY (__sqrt) + fzero %f8 + fcmpd %fcc2, %f0, %f8 + fbl,pn %fcc2, 1f + nop +8: retl + fsqrtd %f0, %f0 +1: +#ifdef SHARED + SETUP_PIC_REG_LEAF(o5, g1) + sethi %gdop_hix22(_LIB_VERSION), %g1 + xor %g1, %gdop_lox10(_LIB_VERSION), %g1 + ldx [%o5 + %g1], %g1, %gdop(_LIB_VERSION) +#else + sethi %hi(_LIB_VERSION), %g1 + or %g1, %lo(_LIB_VERSION), %g1 +#endif + ld [%g1], %g1 + cmp %g1, -1 + be,pt %icc, 8b + fmovd %f0, %f2 + mov 26, %o2 + mov %o7, %g1 + call __kernel_standard + mov %g1, %o7 +END (__sqrt) + +weak_alias (__sqrt, sqrt) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/fpu/w_sqrtf_compat.S b/REORG.TODO/sysdeps/sparc/sparc64/fpu/w_sqrtf_compat.S new file mode 100644 index 0000000000..0c253fe43a --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/fpu/w_sqrtf_compat.S @@ -0,0 +1,48 @@ +/* sqrtf function. sparc64 version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY (__sqrtf) + fzeros %f8 + fcmps %fcc2, %f1, %f8 + fbl,pn %fcc2, 1f + nop +8: retl + fsqrts %f1, %f0 +1: +#ifdef SHARED + SETUP_PIC_REG_LEAF(o5, g1) + sethi %gdop_hix22(_LIB_VERSION), %g1 + xor %g1, %gdop_lox10(_LIB_VERSION), %g1 + ldx [%o5 + %g1], %g1, %gdop(_LIB_VERSION) +#else + sethi %hi(_LIB_VERSION), %g1 + or %g1, %lo(_LIB_VERSION), %g1 +#endif + ld [%g1], %g1 + cmp %g1, -1 + be,pt %icc, 8b + fmovs %f1, %f3 + mov 126, %o2 + mov %o7, %g1 + call __kernel_standard_f + mov %g1, %o7 +END (__sqrtf) + +weak_alias (__sqrtf, sqrtf) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/hp-timing.h b/REORG.TODO/sysdeps/sparc/sparc64/hp-timing.h new file mode 100644 index 0000000000..433dd28d06 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/hp-timing.h @@ -0,0 +1,33 @@ +/* High precision, low overhead timing functions. sparc64 version. + Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@redhat.com>, 2001. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _HP_TIMING_H +#define _HP_TIMING_H 1 + +#define HP_TIMING_AVAIL (1) +#define HP_SMALL_TIMING_AVAIL (1) +#define HP_TIMING_INLINE (1) + +typedef unsigned long int hp_timing_t; + +#define HP_TIMING_NOW(Var) __asm__ __volatile__ ("rd %%tick, %0" : "=r" (Var)) + +#include <hp-timing-common.h> + +#endif /* hp-timing.h */ diff --git a/REORG.TODO/sysdeps/sparc/sparc64/jmpbuf-unwind.h b/REORG.TODO/sysdeps/sparc/sparc64/jmpbuf-unwind.h new file mode 100644 index 0000000000..eb2e2e2fb4 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/jmpbuf-unwind.h @@ -0,0 +1,37 @@ +/* Copyright (C) 2005-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net>, 2005. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <setjmp.h> +#include <stdint.h> +#include <unwind.h> + +/* Test if longjmp to JMPBUF would unwind the frame + containing a local variable at ADDRESS. */ +#define _JMPBUF_UNWINDS(jmpbuf, address, demangle) \ + ((unsigned long int) (address) \ + < (jmpbuf)->__uc_mcontext.__mc_gregs[MC_O6] + 2047) + +#define _JMPBUF_CFA_UNWINDS_ADJ(_jmpbuf, _context, _adj) \ + _JMPBUF_UNWINDS_ADJ (_jmpbuf, (void *) _Unwind_GetCFA (_context), _adj) + +#define _JMPBUF_UNWINDS_ADJ(_jmpbuf, _address, _adj) \ + ((uintptr_t) (_address) - (_adj) \ + < (uintptr_t) (_jmpbuf)[0].__uc_mcontext.__mc_gregs[MC_O6] + 2047 - (_adj)) + +/* We use the normal lobngjmp for unwinding. */ +#define __libc_unwind_longjmp(buf, val) __libc_longjmp (buf, val) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/lshift.S b/REORG.TODO/sysdeps/sparc/sparc64/lshift.S new file mode 100644 index 0000000000..74ce43bb01 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/lshift.S @@ -0,0 +1,95 @@ +/* SPARC v9 __mpn_lshift -- + + Copyright (C) 1996-2017 Free Software Foundation, Inc. + + This file is part of the GNU MP Library. + + The GNU MP Library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or (at your + option) any later version. + + The GNU MP Library is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with the GNU MP Library; see the file COPYING.LIB. If not, + see <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* INPUT PARAMETERS + res_ptr %o0 + src_ptr %o1 + size %o2 + cnt %o3 */ + + .register %g2, #scratch + .register %g3, #scratch + +ENTRY(__mpn_lshift) + sllx %o2,3,%g1 + add %o1,%g1,%o1 ! make %o1 point at end of src + ldx [%o1-8],%g2 ! load first limb + sub %g0,%o3,%o5 ! negate shift count + add %o0,%g1,%o0 ! make %o0 point at end of res + add %o2,-1,%o2 + andcc %o2,4-1,%g4 ! number of limbs in first loop + srlx %g2,%o5,%g1 ! compute function result + be,pn %xcc,.L0 ! if multiple of 4 limbs, skip first loop + mov %g1,%g5 + + sub %o2,%g4,%o2 ! adjust count for main loop + +.Loop0: ldx [%o1-16],%g3 + add %o0,-8,%o0 + add %o1,-8,%o1 + sllx %g2,%o3,%o4 + addcc %g4,-1,%g4 + srlx %g3,%o5,%g1 + mov %g3,%g2 + or %o4,%g1,%o4 + bne,pt %xcc,.Loop0 + stx %o4,[%o0+0] + +.L0: brz,pn %o2,.Lend + nop + +.Loop: ldx [%o1-16],%g3 + add %o0,-32,%o0 + sllx %g2,%o3,%o4 + addcc %o2,-4,%o2 + srlx %g3,%o5,%g1 + + ldx [%o1-24],%g2 + sllx %g3,%o3,%g4 + or %o4,%g1,%o4 + stx %o4,[%o0+24] + srlx %g2,%o5,%g1 + + ldx [%o1-32],%g3 + sllx %g2,%o3,%o4 + or %g4,%g1,%g4 + stx %g4,[%o0+16] + srlx %g3,%o5,%g1 + + ldx [%o1-40],%g2 + sllx %g3,%o3,%g4 + or %o4,%g1,%o4 + stx %o4,[%o0+8] + srlx %g2,%o5,%g1 + + add %o1,-32,%o1 + or %g4,%g1,%g4 + bne,pt %xcc,.Loop + stx %g4,[%o0+0] + +.Lend: sllx %g2,%o3,%g2 + stx %g2,[%o0-8] + + jmpl %o7+8, %g0 + mov %g5,%o0 + +END(__mpn_lshift) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/memchr.S b/REORG.TODO/sysdeps/sparc/sparc64/memchr.S new file mode 100644 index 0000000000..e82075510b --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/memchr.S @@ -0,0 +1,260 @@ +/* memchr (str, ch, n) -- Return pointer to first occurrence of CH in STR less + than N. + For SPARC v9. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and + Jakub Jelinek <jj@ultra.linux.cz>. + This version is developed using the same algorithm as the fast C + version which carries the following introduction: + Based on strlen implementation by Torbjorn Granlund (tege@sics.se), + with help from Dan Sahlin (dan@sics.se) and + commentary by Jim Blandy (jimb@ai.mit.edu); + adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu), + and implemented by Roland McGrath (roland@ai.mit.edu). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <asm/asi.h> +#ifndef XCC +#define XCC xcc +#define USE_BPR + .register %g2, #scratch + .register %g3, #scratch +#endif + + /* Normally, this uses + ((xword - 0x0101010101010101) & 0x8080808080808080) test + to find out if any byte in xword could be zero. This is fast, but + also gives false alarm for any byte in range 0x81-0xff. It does + not matter for correctness, as if this test tells us there could + be some zero byte, we check it byte by byte, but if bytes with + high bits set are common in the strings, then this will give poor + performance. You can #define EIGHTBIT_NOT_RARE and the algorithm + will use one tick slower, but more precise test + ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080), + which does not give any false alarms (but if some bits are set, + one cannot assume from it which bytes are zero and which are not). + It is yet to be measured, what is the correct default for glibc + in these days for an average user. + */ + + .text + .align 32 +ENTRY(__memchr) + and %o1, 0xff, %o1 /* IEU0 Group */ +#ifdef USE_BPR + brz,pn %o2, 12f /* CTI+IEU1 */ +#else + tst %o2 /* IEU1 */ + be,pn %XCC, 12f /* CTI */ +#endif + sll %o1, 8, %g3 /* IEU0 Group */ + addcc %o0, %o2, %o2 /* IEU1 */ + movcs %XCC, -1, %o2 /* IEU0 Group */ + + sethi %hi(0x01010101), %g1 /* IEU0 Group */ + or %g3, %o1, %g3 /* IEU1 */ + ldub [%o0], %o3 /* Load */ + sllx %g3, 16, %g5 /* IEU0 Group */ + + or %g1, %lo(0x01010101), %g1 /* IEU1 */ + sllx %g1, 32, %g2 /* IEU0 Group */ + or %g3, %g5, %g3 /* IEU1 */ + sllx %g3, 32, %g5 /* IEU0 Group */ + + cmp %o3, %o1 /* IEU1 */ + be,pn %xcc, 13f /* CTI */ + or %g1, %g2, %g1 /* IEU0 Group */ + andcc %o0, 7, %g0 /* IEU1 */ + + bne,a,pn %icc, 21f /* CTI */ + add %o0, 1, %o0 /* IEU0 Group */ + ldx [%o0], %o3 /* Load Group */ + sllx %g1, 7, %g2 /* IEU0 */ + + or %g3, %g5, %g3 /* IEU1 */ +1: add %o0, 8, %o0 /* IEU0 Group */ + xor %o3, %g3, %o4 /* IEU1 */ + /* %g1 = 0101010101010101 * + * %g2 = 8080088080808080 * + * %g3 = c c c c c c c c * + * %o3 = value * + * %o4 = value XOR c */ +2: cmp %o0, %o2 /* IEU1 Group */ + + bgu,pn %XCC, 11f /* CTI */ + ldxa [%o0] ASI_PNF, %o3 /* Load */ + sub %o4, %g1, %o5 /* IEU0 Group */ + add %o0, 8, %o0 /* IEU1 */ +#ifdef EIGHTBIT_NOT_RARE + andn %o5, %o4, %o5 /* IEU0 Group */ +#endif + + andcc %o5, %g2, %g0 /* IEU1 Group */ + be,a,pt %xcc, 2b /* CTI */ + xor %o3, %g3, %o4 /* IEU0 */ + srlx %o4, 56, %g5 /* IEU0 */ + + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 3f /* CTI */ + srlx %o4, 48, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 4f /* CTI */ + srlx %o4, 40, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 5f /* CTI */ + + srlx %o4, 32, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 6f /* CTI */ + srlx %o4, 24, %g5 /* IEU0 */ + + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 7f /* CTI */ + srlx %o4, 16, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 8f /* CTI */ + srlx %o4, 8, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 9f /* CTI */ + + andcc %o4, 0xff, %g0 /* IEU1 Group */ + bne,pt %icc, 2b /* CTI */ + xor %o3, %g3, %o4 /* IEU0 */ + retl /* CTI+IEU1 Group */ + + add %o0, -9, %o0 /* IEU0 */ + + .align 16 +3: retl /* CTI+IEU1 Group */ + add %o0, -16, %o0 /* IEU0 */ +4: retl /* CTI+IEU1 Group */ + add %o0, -15, %o0 /* IEU0 */ + +5: retl /* CTI+IEU1 Group */ + add %o0, -14, %o0 /* IEU0 */ +6: retl /* CTI+IEU1 Group */ + add %o0, -13, %o0 /* IEU0 */ + +7: retl /* CTI+IEU1 Group */ + add %o0, -12, %o0 /* IEU0 */ +8: retl /* CTI+IEU1 Group */ + add %o0, -11, %o0 /* IEU0 */ + +9: retl /* CTI+IEU1 Group */ + add %o0, -10, %o0 /* IEU0 */ +11: sub %o4, %g1, %o5 /* IEU0 Group */ + sub %o0, 8, %o0 /* IEU1 */ + + andcc %o5, %g2, %g0 /* IEU1 Group */ + be,pt %xcc, 12f /* CTI */ + sub %o2, %o0, %o2 /* IEU0 */ + tst %o2 /* IEU1 Group */ + + be,pn %XCC, 12f /* CTI */ + srlx %o4, 56, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 13f /* CTI */ + + cmp %o2, 1 /* IEU0 */ + be,pn %XCC, 12f /* CTI Group */ + srlx %o4, 48, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 14f /* CTI */ + cmp %o2, 2 /* IEU1 Group */ + be,pn %XCC, 12f /* CTI */ + srlx %o4, 40, %g5 /* IEU0 */ + + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 15f /* CTI */ + cmp %o2, 3 /* IEU1 Group */ + be,pn %XCC, 12f /* CTI */ + + srlx %o4, 32, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 16f /* CTI */ + cmp %o2, 4 /* IEU1 Group */ + + be,pn %XCC, 12f /* CTI */ + srlx %o4, 24, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 17f /* CTI */ + + cmp %o2, 5 /* IEU1 Group */ + be,pn %XCC, 12f /* CTI */ + srlx %o4, 16, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 18f /* CTI */ + cmp %o2, 6 /* IEU1 Group */ + be,pn %XCC, 12f /* CTI */ + srlx %o4, 8, %g5 /* IEU0 */ + + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 19f /* CTI */ + nop /* IEU0 */ +12: retl /* CTI+IEU1 Group */ + + clr %o0 /* IEU0 */ + nop /* Stub */ +13: retl /* CTI+IEU1 Group */ + nop /* IEU0 */ + +14: retl /* CTI+IEU1 Group */ + add %o0, 1, %o0 /* IEU0 */ +15: retl /* CTI+IEU1 Group */ + add %o0, 2, %o0 /* IEU0 */ + +16: retl /* CTI+IEU1 Group */ + add %o0, 3, %o0 /* IEU0 */ +17: retl /* CTI+IEU1 Group */ + add %o0, 4, %o0 /* IEU0 */ + +18: retl /* CTI+IEU1 Group */ + add %o0, 5, %o0 /* IEU0 */ +19: retl /* CTI+IEU1 Group */ + add %o0, 6, %o0 /* IEU0 */ + +21: cmp %o0, %o2 /* IEU1 */ + be,pn %XCC, 12b /* CTI */ + sllx %g1, 7, %g2 /* IEU0 Group */ + ldub [%o0], %o3 /* Load */ + + or %g3, %g5, %g3 /* IEU1 */ +22: andcc %o0, 7, %g0 /* IEU1 Group */ + be,a,pn %icc, 1b /* CTI */ + ldx [%o0], %o3 /* Load */ + + cmp %o3, %o1 /* IEU1 Group */ + be,pn %xcc, 23f /* CTI */ + add %o0, 1, %o0 /* IEU0 */ + cmp %o0, %o2 /* IEU1 Group */ + + bne,a,pt %XCC, 22b /* CTI */ + ldub [%o0], %o3 /* Load */ + retl /* CTI+IEU1 Group */ + clr %o0 /* IEU0 */ + +23: retl /* CTI+IEU1 Group */ + add %o0, -1, %o0 /* IEU0 */ +END(__memchr) + +weak_alias (__memchr, memchr) +libc_hidden_builtin_def (memchr) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/memcmp.S b/REORG.TODO/sysdeps/sparc/sparc64/memcmp.S new file mode 100644 index 0000000000..00ff2eec6b --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/memcmp.S @@ -0,0 +1,142 @@ +/* Compare two memory blocks for differences in the first COUNT bytes. + For SPARC v9. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and + Jakub Jelinek <jj@ultra.linux.cz>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <asm/asi.h> +#ifndef XCC +#define XCC xcc +#define USE_BPR + .register %g2, #scratch + .register %g3, #scratch +#endif + + .text + .align 32 +ENTRY(memcmp) +#ifdef USE_BPR + brz,pn %o2, 3f /* CTI+IEU1 Group */ +#else + tst %o2 /* IEU1 Group */ + be,pn %XCC, 3f /* CTI */ +#endif + andcc %o0, 7, %g0 /* IEU1 Group */ + bne,pn %icc, 8f /* CTI */ +1: andcc %o1, 7, %g1 /* IEU1 Group */ + + bne,pn %icc, 10f /* CTI */ + mov 64, %g3 /* IEU0 */ + ldx [%o0], %g1 /* Load Group */ + sub %o1, %o0, %o1 /* IEU0 */ + + ldx [%o0 + %o1], %g2 /* Load Group */ + add %o0, 8, %o0 /* IEU0 */ +2: mov %g1, %o3 /* IEU0 Group */ + subcc %o2, 8, %o2 /* IEU1 */ + + bcs,pn %XCC, 5f /* CTI */ + ldxa [%o0] ASI_PNF, %g1 /* Load Group */ + mov %g2, %o4 /* IEU0 */ + ldxa [%o0 + %o1] ASI_PNF, %g2 /* Load Group */ + + cmp %o3, %o4 /* IEU1 */ + be,pt %xcc, 2b /* CTI */ + add %o0, 8, %o0 /* IEU0 */ +7: mov -1, %o0 /* IEU1 */ + + retl /* CTI+IEU1 Group */ + movgu %xcc, 1, %o0 /* Single Group */ +3: retl /* CTI+IEU1 Group */ + clr %o0 /* IEU0 */ + + .align 16 +5: mov %g2, %o4 /* IEU0 */ +6: cmp %o2, -8 /* IEU1 */ + be,pn %XCC, 3b /* CTI */ + sub %g0, %o2, %o2 /* IEU0 Group */ + + sllx %o2, 3, %o2 /* IEU0 Group */ + srlx %o3, %o2, %o3 /* IEU0 Group */ + srlx %o4, %o2, %o4 /* IEU0 Group */ + clr %o0 /* IEU1 */ + + cmp %o3, %o4 /* IEU1 Group */ + movgu %xcc, 1, %o0 /* Single Group */ + retl /* CTI+IEU1 Group */ + movlu %xcc, -1, %o0 /* Single Group */ + +8: ldub [%o0], %o3 /* Load */ + add %o0, 1, %o0 /* IEU0 */ + ldub [%o1], %o4 /* Load Group */ + add %o1, 1, %o1 /* IEU0 */ + +9: cmp %o3, %o4 /* IEU1 Group */ + bne,pn %xcc, 12f /* CTI */ + subcc %o2, 1, %o2 /* IEU1 Group */ + be,pn %XCC, 3b /* CTI */ + + lduba [%o0] ASI_PNF, %o3 /* Load */ + andcc %o0, 7, %g0 /* IEU1 Group */ + be,pn %icc, 1b /* CTI */ + lduba [%o1] ASI_PNF, %o4 /* Load */ + + add %o0, 1, %o0 /* IEU0 Group */ + ba,pt %xcc, 9b /* CTI */ + add %o1, 1, %o1 /* IEU1 */ + + .align 16 +12: mov -1, %o0 /* IEU0 Group */ + cmp %o3, %o4 /* IEU1 */ + retl /* CTI+IEU1 Group */ + movgu %xcc, 1, %o0 /* Single Group */ + + .align 16 + nop /* Stub */ +10: sllx %g1, 3, %g2 /* IEU0 Group */ + sub %o1, %g1, %o1 /* IEU1 */ + sub %g3, %g2, %g3 /* IEU0 Group */ + + ldxa [%o0] ASI_PNF, %g5 /* Load */ + sub %o1, %o0, %o1 /* IEU1 */ + ldxa [%o0 + %o1] ASI_PNF, %g4 /* Load Group */ + add %o0, 8, %o0 /* IEU0 */ + +11: sllx %g4, %g2, %o4 /* IEU0 Group */ + ldxa [%o0 + %o1] ASI_PNF, %g4 /* Load */ + srlx %g4, %g3, %o5 /* IEU0 Group */ + mov %g5, %o3 /* IEU1 */ + + ldxa [%o0] ASI_PNF, %g5 /* Load */ + subcc %o2, 8, %o2 /* IEU1 Group */ + bcs,pn %XCC, 6b /* CTI */ + or %o4, %o5, %o4 /* IEU0 */ + + cmp %o3, %o4 /* IEU1 Group */ + be,pt %xcc, 11b /* CTI */ + add %o0, 8, %o0 /* IEU0 */ + mov -1, %o0 /* IEU0 */ + + retl /* CTI+IEU1 Group */ + movgu %xcc, 1, %o0 /* Single Group */ +END(memcmp) + +#undef bcmp +weak_alias (memcmp, bcmp) +libc_hidden_builtin_def (memcmp) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/memcpy.S b/REORG.TODO/sysdeps/sparc/sparc64/memcpy.S new file mode 100644 index 0000000000..c1c0bf64d1 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/memcpy.S @@ -0,0 +1,580 @@ +/* Copy SIZE bytes from SRC to DEST. + For UltraSPARC. + Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller (davem@caip.rutgers.edu) and + Jakub Jelinek (jakub@redhat.com). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <asm/asi.h> +#ifndef XCC +#define USE_BPR + .register %g2, #scratch + .register %g3, #scratch + .register %g6, #scratch +#define XCC xcc +#endif +#define FPRS_FEF 4 + +#define FREG_FROB(f1, f2, f3, f4, f5, f6, f7, f8, f9) \ + faligndata %f1, %f2, %f48; \ + faligndata %f2, %f3, %f50; \ + faligndata %f3, %f4, %f52; \ + faligndata %f4, %f5, %f54; \ + faligndata %f5, %f6, %f56; \ + faligndata %f6, %f7, %f58; \ + faligndata %f7, %f8, %f60; \ + faligndata %f8, %f9, %f62; + +#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \ + ldda [%src] %asi, %fdest; \ + add %src, 0x40, %src; \ + add %dest, 0x40, %dest; \ + subcc %len, 0x40, %len; \ + be,pn %xcc, jmptgt; \ + stda %fsrc, [%dest - 0x40] %asi; + +#define LOOP_CHUNK1(src, dest, len, branch_dest) \ + MAIN_LOOP_CHUNK(src, dest, f0, f48, len, branch_dest) +#define LOOP_CHUNK2(src, dest, len, branch_dest) \ + MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest) +#define LOOP_CHUNK3(src, dest, len, branch_dest) \ + MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) + +#define STORE_SYNC(dest, fsrc) \ + stda %fsrc, [%dest] %asi; \ + add %dest, 0x40, %dest; + +#define STORE_JUMP(dest, fsrc, target) \ + stda %fsrc, [%dest] %asi; \ + add %dest, 0x40, %dest; \ + ba,pt %xcc, target; + +#define VISLOOP_PAD nop; nop; nop; nop; \ + nop; nop; nop; nop; \ + nop; nop; nop; nop; \ + nop; nop; nop; + +#define FINISH_VISCHUNK(dest, f0, f1, left) \ + subcc %left, 8, %left; \ + bl,pn %xcc, 205f; \ + faligndata %f0, %f1, %f48; \ + std %f48, [%dest]; \ + add %dest, 8, %dest; + +#define UNEVEN_VISCHUNK(dest, f0, f1, left) \ + subcc %left, 8, %left; \ + bl,pn %xcc, 205f; \ + fsrc2 %f0, %f1; \ + ba,a,pt %xcc, 204f; + + /* Macros for non-VIS memcpy code. */ +#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \ + ldx [%src + offset + 0x00], %t0; \ + ldx [%src + offset + 0x08], %t1; \ + ldx [%src + offset + 0x10], %t2; \ + ldx [%src + offset + 0x18], %t3; \ + stw %t0, [%dst + offset + 0x04]; \ + srlx %t0, 32, %t0; \ + stw %t0, [%dst + offset + 0x00]; \ + stw %t1, [%dst + offset + 0x0c]; \ + srlx %t1, 32, %t1; \ + stw %t1, [%dst + offset + 0x08]; \ + stw %t2, [%dst + offset + 0x14]; \ + srlx %t2, 32, %t2; \ + stw %t2, [%dst + offset + 0x10]; \ + stw %t3, [%dst + offset + 0x1c]; \ + srlx %t3, 32, %t3; \ + stw %t3, [%dst + offset + 0x18]; + +#define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ + ldx [%src + offset + 0x00], %t0; \ + ldx [%src + offset + 0x08], %t1; \ + ldx [%src + offset + 0x10], %t2; \ + ldx [%src + offset + 0x18], %t3; \ + stx %t0, [%dst + offset + 0x00]; \ + stx %t1, [%dst + offset + 0x08]; \ + stx %t2, [%dst + offset + 0x10]; \ + stx %t3, [%dst + offset + 0x18]; \ + ldx [%src + offset + 0x20], %t0; \ + ldx [%src + offset + 0x28], %t1; \ + ldx [%src + offset + 0x30], %t2; \ + ldx [%src + offset + 0x38], %t3; \ + stx %t0, [%dst + offset + 0x20]; \ + stx %t1, [%dst + offset + 0x28]; \ + stx %t2, [%dst + offset + 0x30]; \ + stx %t3, [%dst + offset + 0x38]; + +#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ + ldx [%src - offset - 0x10], %t0; \ + ldx [%src - offset - 0x08], %t1; \ + stw %t0, [%dst - offset - 0x0c]; \ + srlx %t0, 32, %t2; \ + stw %t2, [%dst - offset - 0x10]; \ + stw %t1, [%dst - offset - 0x04]; \ + srlx %t1, 32, %t3; \ + stw %t3, [%dst - offset - 0x08]; + +#define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1) \ + ldx [%src - offset - 0x10], %t0; \ + ldx [%src - offset - 0x08], %t1; \ + stx %t0, [%dst - offset - 0x10]; \ + stx %t1, [%dst - offset - 0x08]; + + .text + .align 32 +ENTRY(__memcpy_large) +200: be,pt %xcc, 201f /* CTI */ + andcc %o0, 0x38, %g5 /* IEU1 Group */ + mov 8, %g1 /* IEU0 */ + sub %g1, %g2, %g2 /* IEU0 Group */ + andcc %o0, 1, %g0 /* IEU1 */ + be,pt %icc, 2f /* CTI */ + sub %o2, %g2, %o2 /* IEU0 Group */ +1: ldub [%o1], %o5 /* Load Group */ + add %o1, 1, %o1 /* IEU0 */ + add %o0, 1, %o0 /* IEU1 */ + subcc %g2, 1, %g2 /* IEU1 Group */ + be,pn %xcc, 3f /* CTI */ + stb %o5, [%o0 - 1] /* Store */ +2: ldub [%o1], %o5 /* Load Group */ + add %o0, 2, %o0 /* IEU0 */ + ldub [%o1 + 1], %g3 /* Load Group */ + subcc %g2, 2, %g2 /* IEU1 Group */ + stb %o5, [%o0 - 2] /* Store */ + add %o1, 2, %o1 /* IEU0 */ + bne,pt %xcc, 2b /* CTI Group */ + stb %g3, [%o0 - 1] /* Store */ +3: andcc %o0, 0x38, %g5 /* IEU1 Group */ +201: be,pt %icc, 202f /* CTI */ + mov 64, %g1 /* IEU0 */ + fsrc2 %f0, %f2 /* FPU */ + sub %g1, %g5, %g5 /* IEU0 Group */ + alignaddr %o1, %g0, %g1 /* GRU Group */ + ldd [%g1], %f4 /* Load Group */ + sub %o2, %g5, %o2 /* IEU0 */ +1: ldd [%g1 + 0x8], %f6 /* Load Group */ + add %g1, 0x8, %g1 /* IEU0 Group */ + subcc %g5, 8, %g5 /* IEU1 */ + faligndata %f4, %f6, %f0 /* GRU Group */ + std %f0, [%o0] /* Store */ + add %o1, 8, %o1 /* IEU0 Group */ + be,pn %xcc, 202f /* CTI */ + add %o0, 8, %o0 /* IEU1 */ + ldd [%g1 + 0x8], %f4 /* Load Group */ + add %g1, 8, %g1 /* IEU0 */ + subcc %g5, 8, %g5 /* IEU1 */ + faligndata %f6, %f4, %f0 /* GRU Group */ + std %f0, [%o0] /* Store */ + add %o1, 8, %o1 /* IEU0 */ + bne,pt %xcc, 1b /* CTI Group */ + add %o0, 8, %o0 /* IEU0 */ +202: membar #LoadStore | #StoreStore | #StoreLoad /* LSU Group */ + wr %g0, ASI_BLK_P, %asi /* LSU Group */ + subcc %o2, 0x40, %g6 /* IEU1 Group */ + mov %o1, %g1 /* IEU0 */ + andncc %g6, (0x40 - 1), %g6 /* IEU1 Group */ + srl %g1, 3, %g2 /* IEU0 */ + sub %o2, %g6, %g3 /* IEU0 Group */ + andn %o1, (0x40 - 1), %o1 /* IEU1 */ + and %g2, 7, %g2 /* IEU0 Group */ + andncc %g3, 0x7, %g3 /* IEU1 */ + fsrc2 %f0, %f2 /* FPU */ + sub %g3, 0x10, %g3 /* IEU0 Group */ + sub %o2, %g6, %o2 /* IEU1 */ + alignaddr %g1, %g0, %g0 /* GRU Group */ + add %g1, %g6, %g1 /* IEU0 Group */ + subcc %o2, %g3, %o2 /* IEU1 */ + ldda [%o1 + 0x00] %asi, %f0 /* LSU Group */ + add %g1, %g3, %g1 /* IEU0 */ + ldda [%o1 + 0x40] %asi, %f16 /* LSU Group */ + sub %g6, 0x80, %g6 /* IEU0 */ + ldda [%o1 + 0x80] %asi, %f32 /* LSU Group */ + /* Clk1 Group 8-( */ + /* Clk2 Group 8-( */ + /* Clk3 Group 8-( */ + /* Clk4 Group 8-( */ +203: rd %pc, %g5 /* PDU Group 8-( */ + addcc %g5, %lo(300f - 203b), %g5 /* IEU1 Group */ + sll %g2, 9, %g2 /* IEU0 */ + jmpl %g5 + %g2, %g0 /* CTI Group brk forced*/ + addcc %o1, 0xc0, %o1 /* IEU1 Group */ + + .align 512 /* OK, here comes the fun part... */ +300: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) LOOP_CHUNK1(o1, o0, g6, 301f) + FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) LOOP_CHUNK2(o1, o0, g6, 302f) + FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) LOOP_CHUNK3(o1, o0, g6, 303f) + b,pt %xcc, 300b+4; faligndata %f0, %f2, %f48 +301: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) STORE_JUMP(o0, f48, 400f) membar #Sync +302: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) STORE_JUMP(o0, f48, 416f) membar #Sync +303: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) STORE_JUMP(o0, f48, 432f) membar #Sync + VISLOOP_PAD +310: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) LOOP_CHUNK1(o1, o0, g6, 311f) + FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) LOOP_CHUNK2(o1, o0, g6, 312f) + FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) LOOP_CHUNK3(o1, o0, g6, 313f) + b,pt %xcc, 310b+4; faligndata %f2, %f4, %f48 +311: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) STORE_JUMP(o0, f48, 402f) membar #Sync +312: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) STORE_JUMP(o0, f48, 418f) membar #Sync +313: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) STORE_JUMP(o0, f48, 434f) membar #Sync + VISLOOP_PAD +320: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) LOOP_CHUNK1(o1, o0, g6, 321f) + FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) LOOP_CHUNK2(o1, o0, g6, 322f) + FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) LOOP_CHUNK3(o1, o0, g6, 323f) + b,pt %xcc, 320b+4; faligndata %f4, %f6, %f48 +321: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) STORE_JUMP(o0, f48, 404f) membar #Sync +322: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) STORE_JUMP(o0, f48, 420f) membar #Sync +323: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) STORE_JUMP(o0, f48, 436f) membar #Sync + VISLOOP_PAD +330: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) LOOP_CHUNK1(o1, o0, g6, 331f) + FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) LOOP_CHUNK2(o1, o0, g6, 332f) + FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) LOOP_CHUNK3(o1, o0, g6, 333f) + b,pt %xcc, 330b+4; faligndata %f6, %f8, %f48 +331: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) STORE_JUMP(o0, f48, 406f) membar #Sync +332: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) STORE_JUMP(o0, f48, 422f) membar #Sync +333: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) STORE_JUMP(o0, f48, 438f) membar #Sync + VISLOOP_PAD +340: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) LOOP_CHUNK1(o1, o0, g6, 341f) + FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) LOOP_CHUNK2(o1, o0, g6, 342f) + FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) LOOP_CHUNK3(o1, o0, g6, 343f) + b,pt %xcc, 340b+4; faligndata %f8, %f10, %f48 +341: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) STORE_JUMP(o0, f48, 408f) membar #Sync +342: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) STORE_JUMP(o0, f48, 424f) membar #Sync +343: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) STORE_JUMP(o0, f48, 440f) membar #Sync + VISLOOP_PAD +350: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) LOOP_CHUNK1(o1, o0, g6, 351f) + FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) LOOP_CHUNK2(o1, o0, g6, 352f) + FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) LOOP_CHUNK3(o1, o0, g6, 353f) + b,pt %xcc, 350b+4; faligndata %f10, %f12, %f48 +351: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) STORE_JUMP(o0, f48, 410f) membar #Sync +352: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) STORE_JUMP(o0, f48, 426f) membar #Sync +353: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) STORE_JUMP(o0, f48, 442f) membar #Sync + VISLOOP_PAD +360: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) LOOP_CHUNK1(o1, o0, g6, 361f) + FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) LOOP_CHUNK2(o1, o0, g6, 362f) + FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) LOOP_CHUNK3(o1, o0, g6, 363f) + b,pt %xcc, 360b+4; faligndata %f12, %f14, %f48 +361: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) STORE_JUMP(o0, f48, 412f) membar #Sync +362: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) STORE_JUMP(o0, f48, 428f) membar #Sync +363: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) STORE_JUMP(o0, f48, 444f) membar #Sync + VISLOOP_PAD +370: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) LOOP_CHUNK1(o1, o0, g6, 371f) + FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) LOOP_CHUNK2(o1, o0, g6, 372f) + FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) LOOP_CHUNK3(o1, o0, g6, 373f) + b,pt %xcc, 370b+4; faligndata %f14, %f16, %f48 +371: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) STORE_JUMP(o0, f48, 414f) membar #Sync +372: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) STORE_JUMP(o0, f48, 430f) membar #Sync +373: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) STORE_SYNC(o0, f48) membar #Sync + FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) STORE_JUMP(o0, f48, 446f) membar #Sync + VISLOOP_PAD +400: FINISH_VISCHUNK(o0, f0, f2, g3) +402: FINISH_VISCHUNK(o0, f2, f4, g3) +404: FINISH_VISCHUNK(o0, f4, f6, g3) +406: FINISH_VISCHUNK(o0, f6, f8, g3) +408: FINISH_VISCHUNK(o0, f8, f10, g3) +410: FINISH_VISCHUNK(o0, f10, f12, g3) +412: FINISH_VISCHUNK(o0, f12, f14, g3) +414: UNEVEN_VISCHUNK(o0, f14, f0, g3) +416: FINISH_VISCHUNK(o0, f16, f18, g3) +418: FINISH_VISCHUNK(o0, f18, f20, g3) +420: FINISH_VISCHUNK(o0, f20, f22, g3) +422: FINISH_VISCHUNK(o0, f22, f24, g3) +424: FINISH_VISCHUNK(o0, f24, f26, g3) +426: FINISH_VISCHUNK(o0, f26, f28, g3) +428: FINISH_VISCHUNK(o0, f28, f30, g3) +430: UNEVEN_VISCHUNK(o0, f30, f0, g3) +432: FINISH_VISCHUNK(o0, f32, f34, g3) +434: FINISH_VISCHUNK(o0, f34, f36, g3) +436: FINISH_VISCHUNK(o0, f36, f38, g3) +438: FINISH_VISCHUNK(o0, f38, f40, g3) +440: FINISH_VISCHUNK(o0, f40, f42, g3) +442: FINISH_VISCHUNK(o0, f42, f44, g3) +444: FINISH_VISCHUNK(o0, f44, f46, g3) +446: UNEVEN_VISCHUNK(o0, f46, f0, g3) +204: ldd [%o1], %f2 /* Load Group */ + add %o1, 8, %o1 /* IEU0 */ + subcc %g3, 8, %g3 /* IEU1 */ + faligndata %f0, %f2, %f8 /* GRU Group */ + std %f8, [%o0] /* Store */ + bl,pn %xcc, 205f /* CTI */ + add %o0, 8, %o0 /* IEU0 Group */ + ldd [%o1], %f0 /* Load Group */ + add %o1, 8, %o1 /* IEU0 */ + subcc %g3, 8, %g3 /* IEU1 */ + faligndata %f2, %f0, %f8 /* GRU Group */ + std %f8, [%o0] /* Store */ + bge,pt %xcc, 204b /* CTI */ + add %o0, 8, %o0 /* IEU0 Group */ +205: brz,pt %o2, 207f /* CTI Group */ + mov %g1, %o1 /* IEU0 */ +206: ldub [%o1], %g5 /* LOAD */ + add %o1, 1, %o1 /* IEU0 */ + add %o0, 1, %o0 /* IEU1 */ + subcc %o2, 1, %o2 /* IEU1 */ + bne,pt %xcc, 206b /* CTI */ + stb %g5, [%o0 - 1] /* Store Group */ +207: membar #StoreLoad | #StoreStore /* LSU Group */ + wr %g0, FPRS_FEF, %fprs + retl + mov %g4, %o0 + +208: andcc %o2, 1, %g0 /* IEU1 Group */ + be,pt %icc, 2f+4 /* CTI */ +1: ldub [%o1], %g5 /* LOAD Group */ + add %o1, 1, %o1 /* IEU0 */ + add %o0, 1, %o0 /* IEU1 */ + subcc %o2, 1, %o2 /* IEU1 Group */ + be,pn %xcc, 209f /* CTI */ + stb %g5, [%o0 - 1] /* Store */ +2: ldub [%o1], %g5 /* LOAD Group */ + add %o0, 2, %o0 /* IEU0 */ + ldub [%o1 + 1], %o5 /* LOAD Group */ + add %o1, 2, %o1 /* IEU0 */ + subcc %o2, 2, %o2 /* IEU1 Group */ + stb %g5, [%o0 - 2] /* Store */ + bne,pt %xcc, 2b /* CTI */ + stb %o5, [%o0 - 1] /* Store */ +209: retl + mov %g4, %o0 +END(__memcpy_large) + +ENTRY(__mempcpy) + ba,pt %xcc, 210f + add %o0, %o2, %g4 +END(__mempcpy) + + .align 32 +ENTRY(memcpy) + mov %o0, %g4 /* IEU0 Group */ +210: +#ifndef USE_BPR + srl %o2, 0, %o2 /* IEU1 */ +#endif + brz,pn %o2, 209b /* CTI Group */ +218: cmp %o2, 15 /* IEU1 */ + bleu,pn %xcc, 208b /* CTI Group */ + cmp %o2, (64 * 6) /* IEU1 */ + bgeu,pn %xcc, 200b /* CTI Group */ + andcc %o0, 7, %g2 /* IEU1 */ + sub %o0, %o1, %g5 /* IEU0 */ + andcc %g5, 3, %o5 /* IEU1 Group */ + bne,pn %xcc, 212f /* CTI */ + andcc %o1, 3, %g0 /* IEU1 Group */ + be,a,pt %xcc, 216f /* CTI */ + andcc %o1, 4, %g0 /* IEU1 Group */ + andcc %o1, 1, %g0 /* IEU1 Group */ + be,pn %xcc, 4f /* CTI */ + andcc %o1, 2, %g0 /* IEU1 Group */ + ldub [%o1], %g2 /* Load Group */ + add %o1, 1, %o1 /* IEU0 */ + add %o0, 1, %o0 /* IEU1 */ + sub %o2, 1, %o2 /* IEU0 Group */ + bne,pn %xcc, 5f /* CTI Group */ + stb %g2, [%o0 - 1] /* Store */ +4: lduh [%o1], %g2 /* Load Group */ + add %o1, 2, %o1 /* IEU0 */ + add %o0, 2, %o0 /* IEU1 */ + sub %o2, 2, %o2 /* IEU0 */ + sth %g2, [%o0 - 2] /* Store Group + bubble */ +5: andcc %o1, 4, %g0 /* IEU1 */ +216: be,a,pn %xcc, 2f /* CTI */ + andcc %o2, -128, %g6 /* IEU1 Group */ + lduw [%o1], %g5 /* Load Group */ + add %o1, 4, %o1 /* IEU0 */ + add %o0, 4, %o0 /* IEU1 */ + sub %o2, 4, %o2 /* IEU0 Group */ + stw %g5, [%o0 - 4] /* Store */ + andcc %o2, -128, %g6 /* IEU1 Group */ +2: be,pn %xcc, 215f /* CTI */ + andcc %o0, 4, %g0 /* IEU1 Group */ + be,pn %xcc, 82f + 4 /* CTI Group */ +5: MOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5) + MOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5) + MOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5) + MOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5) +35: subcc %g6, 128, %g6 /* IEU1 Group */ + add %o1, 128, %o1 /* IEU0 */ + bne,pt %xcc, 5b /* CTI */ + add %o0, 128, %o0 /* IEU0 Group */ +215: andcc %o2, 0x70, %g6 /* IEU1 Group */ +41: be,pn %xcc, 80f /* CTI */ + andcc %o2, 8, %g0 /* IEU1 Group */ + /* Clk1 8-( */ + /* Clk2 8-( */ + /* Clk3 8-( */ + /* Clk4 8-( */ +79: rd %pc, %o5 /* PDU Group */ + sll %g6, 1, %g5 /* IEU0 Group */ + add %o1, %g6, %o1 /* IEU1 */ + sub %o5, %g5, %o5 /* IEU0 Group */ + jmpl %o5 + %lo(80f - 79b), %g0 /* CTI Group brk forced*/ + add %o0, %g6, %o0 /* IEU0 Group */ +36: MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5) + MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5) + MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5) + MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5) + MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5) + MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5) + MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5) +80: be,pt %xcc, 81f /* CTI */ + andcc %o2, 4, %g0 /* IEU1 */ + ldx [%o1], %g2 /* Load Group */ + add %o0, 8, %o0 /* IEU0 */ + stw %g2, [%o0 - 0x4] /* Store Group */ + add %o1, 8, %o1 /* IEU1 */ + srlx %g2, 32, %g2 /* IEU0 Group */ + stw %g2, [%o0 - 0x8] /* Store */ +81: be,pt %xcc, 1f /* CTI */ + andcc %o2, 2, %g0 /* IEU1 Group */ + lduw [%o1], %g2 /* Load Group */ + add %o1, 4, %o1 /* IEU0 */ + stw %g2, [%o0] /* Store Group */ + add %o0, 4, %o0 /* IEU0 */ +1: be,pt %xcc, 1f /* CTI */ + andcc %o2, 1, %g0 /* IEU1 Group */ + lduh [%o1], %g2 /* Load Group */ + add %o1, 2, %o1 /* IEU0 */ + sth %g2, [%o0] /* Store Group */ + add %o0, 2, %o0 /* IEU0 */ +1: be,pt %xcc, 211f /* CTI */ + nop /* IEU1 */ + ldub [%o1], %g2 /* Load Group */ + stb %g2, [%o0] /* Store Group + bubble */ +211: retl + mov %g4, %o0 + +82: MOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5) + MOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5) +37: subcc %g6, 128, %g6 /* IEU1 Group */ + add %o1, 128, %o1 /* IEU0 */ + bne,pt %xcc, 82b /* CTI */ + add %o0, 128, %o0 /* IEU0 Group */ + andcc %o2, 0x70, %g6 /* IEU1 */ + be,pn %xcc, 84f /* CTI */ + andcc %o2, 8, %g0 /* IEU1 Group */ + /* Clk1 8-( */ + /* Clk2 8-( */ + /* Clk3 8-( */ + /* Clk4 8-( */ +83: rd %pc, %o5 /* PDU Group */ + add %o1, %g6, %o1 /* IEU0 Group */ + sub %o5, %g6, %o5 /* IEU1 */ + jmpl %o5 + %lo(84f - 83b), %g0 /* CTI Group brk forced*/ + add %o0, %g6, %o0 /* IEU0 Group */ +38: MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3) + MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3) + MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3) + MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3) + MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3) + MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3) + MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3) +84: be,pt %xcc, 85f /* CTI Group */ + andcc %o2, 4, %g0 /* IEU1 */ + ldx [%o1], %g2 /* Load Group */ + add %o0, 8, %o0 /* IEU0 */ + add %o1, 8, %o1 /* IEU0 Group */ + stx %g2, [%o0 - 0x8] /* Store */ +85: be,pt %xcc, 1f /* CTI */ + andcc %o2, 2, %g0 /* IEU1 Group */ + lduw [%o1], %g2 /* Load Group */ + add %o0, 4, %o0 /* IEU0 */ + add %o1, 4, %o1 /* IEU0 Group */ + stw %g2, [%o0 - 0x4] /* Store */ +1: be,pt %xcc, 1f /* CTI */ + andcc %o2, 1, %g0 /* IEU1 Group */ + lduh [%o1], %g2 /* Load Group */ + add %o0, 2, %o0 /* IEU0 */ + add %o1, 2, %o1 /* IEU0 Group */ + sth %g2, [%o0 - 0x2] /* Store */ +1: be,pt %xcc, 1f /* CTI */ + nop /* IEU0 Group */ + ldub [%o1], %g2 /* Load Group */ + stb %g2, [%o0] /* Store Group + bubble */ +1: retl + mov %g4, %o0 + +212: brz,pt %g2, 2f /* CTI Group */ + mov 8, %g1 /* IEU0 */ + sub %g1, %g2, %g2 /* IEU0 Group */ + sub %o2, %g2, %o2 /* IEU0 Group */ +1: ldub [%o1], %g5 /* Load Group */ + add %o1, 1, %o1 /* IEU0 */ + add %o0, 1, %o0 /* IEU1 */ + subcc %g2, 1, %g2 /* IEU1 Group */ + bne,pt %xcc, 1b /* CTI */ + stb %g5, [%o0 - 1] /* Store */ +2: andn %o2, 7, %g5 /* IEU0 Group */ + and %o2, 7, %o2 /* IEU1 */ + fsrc2 %f0, %f2 /* FPU */ + alignaddr %o1, %g0, %g1 /* GRU Group */ + ldd [%g1], %f4 /* Load Group */ +1: ldd [%g1 + 0x8], %f6 /* Load Group */ + add %g1, 0x8, %g1 /* IEU0 Group */ + subcc %g5, 8, %g5 /* IEU1 */ + faligndata %f4, %f6, %f0 /* GRU Group */ + std %f0, [%o0] /* Store */ + add %o1, 8, %o1 /* IEU0 Group */ + be,pn %xcc, 213f /* CTI */ + add %o0, 8, %o0 /* IEU1 */ + ldd [%g1 + 0x8], %f4 /* Load Group */ + add %g1, 8, %g1 /* IEU0 */ + subcc %g5, 8, %g5 /* IEU1 */ + faligndata %f6, %f4, %f0 /* GRU Group */ + std %f0, [%o0] /* Store */ + add %o1, 8, %o1 /* IEU0 */ + bne,pn %xcc, 1b /* CTI Group */ + add %o0, 8, %o0 /* IEU0 */ +213: brz,pn %o2, 214f /* CTI Group */ + nop /* IEU0 */ + ldub [%o1], %g5 /* LOAD */ + add %o1, 1, %o1 /* IEU0 */ + add %o0, 1, %o0 /* IEU1 */ + subcc %o2, 1, %o2 /* IEU1 */ + bne,pt %xcc, 206b /* CTI */ + stb %g5, [%o0 - 1] /* Store Group */ +214: wr %g0, FPRS_FEF, %fprs + retl + mov %g4, %o0 +END(memcpy) + +libc_hidden_builtin_def (memcpy) + +libc_hidden_def (__mempcpy) +weak_alias (__mempcpy, mempcpy) +libc_hidden_builtin_def (mempcpy) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/memset.S b/REORG.TODO/sysdeps/sparc/sparc64/memset.S new file mode 100644 index 0000000000..5664436a51 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/memset.S @@ -0,0 +1,314 @@ +/* Set a block of memory to some byte value. + For UltraSPARC. + Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller (davem@caip.rutgers.edu) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <asm/asi.h> +#ifndef XCC +#define XCC xcc +#define USE_BPR +#endif +#define FPRS_FEF 4 + +#define SET_BLOCKS(base, offset, source) \ + stx source, [base - offset - 0x18]; \ + stx source, [base - offset - 0x10]; \ + stx source, [base - offset - 0x08]; \ + stx source, [base - offset - 0x00]; + + /* Well, memset is a lot easier to get right than bcopy... */ + .text + .align 32 +ENTRY(memset) + andcc %o1, 0xff, %o1 + mov %o0, %o5 + be,a,pt %icc, 50f +#ifndef USE_BPR + srl %o2, 0, %o1 +#else + mov %o2, %o1 +#endif + cmp %o2, 7 +#ifndef USE_BPR + srl %o2, 0, %o2 +#endif + bleu,pn %XCC, 17f + andcc %o0, 3, %g5 + be,pt %xcc, 4f + and %o1, 0xff, %o1 + cmp %g5, 3 + be,pn %xcc, 2f + stb %o1, [%o0 + 0x00] + cmp %g5, 2 + be,pt %xcc, 2f + stb %o1, [%o0 + 0x01] + stb %o1, [%o0 + 0x02] +2: sub %g5, 4, %g5 + sub %o0, %g5, %o0 + add %o2, %g5, %o2 +4: sllx %o1, 8, %g1 + andcc %o0, 4, %g0 + or %o1, %g1, %o1 + sllx %o1, 16, %g1 + or %o1, %g1, %o1 + be,pt %xcc, 2f + sllx %o1, 32, %g1 + stw %o1, [%o0] + sub %o2, 4, %o2 + add %o0, 4, %o0 +2: cmp %o2, 128 + or %o1, %g1, %o1 + blu,pn %xcc, 9f + andcc %o0, 0x38, %g5 + be,pn %icc, 6f + mov 64, %o4 + andcc %o0, 8, %g0 + be,pn %icc, 1f + sub %o4, %g5, %o4 + stx %o1, [%o0] + add %o0, 8, %o0 +1: andcc %o4, 16, %g0 + be,pn %icc, 1f + sub %o2, %o4, %o2 + stx %o1, [%o0] + stx %o1, [%o0 + 8] + add %o0, 16, %o0 +1: andcc %o4, 32, %g0 + be,pn %icc, 7f + andncc %o2, 0x3f, %o3 + stw %o1, [%o0] + stw %o1, [%o0 + 4] + stw %o1, [%o0 + 8] + stw %o1, [%o0 + 12] + stw %o1, [%o0 + 16] + stw %o1, [%o0 + 20] + stw %o1, [%o0 + 24] + stw %o1, [%o0 + 28] + add %o0, 32, %o0 +7: be,pn %xcc, 9f + nop + ldd [%o0 - 8], %f0 +18: wr %g0, ASI_BLK_P, %asi + membar #StoreStore | #LoadStore + andcc %o3, 0xc0, %g5 + and %o2, 0x3f, %o2 + fsrc2 %f0, %f2 + fsrc2 %f0, %f4 + andn %o3, 0xff, %o3 + fsrc2 %f0, %f6 + cmp %g5, 64 + fsrc2 %f0, %f8 + fsrc2 %f0, %f10 + fsrc2 %f0, %f12 + brz,pn %g5, 10f + fsrc2 %f0, %f14 + be,pn %icc, 2f + stda %f0, [%o0 + 0x00] %asi + cmp %g5, 128 + be,pn %icc, 2f + stda %f0, [%o0 + 0x40] %asi + stda %f0, [%o0 + 0x80] %asi +2: brz,pn %o3, 12f + add %o0, %g5, %o0 +10: stda %f0, [%o0 + 0x00] %asi + stda %f0, [%o0 + 0x40] %asi + stda %f0, [%o0 + 0x80] %asi + stda %f0, [%o0 + 0xc0] %asi +11: subcc %o3, 256, %o3 + bne,pt %xcc, 10b + add %o0, 256, %o0 +12: wr %g0, FPRS_FEF, %fprs + membar #StoreLoad | #StoreStore +9: andcc %o2, 0x78, %g5 + be,pn %xcc, 13f + andcc %o2, 7, %o2 +14: rd %pc, %o4 + srl %g5, 1, %o3 + sub %o4, %o3, %o4 + jmpl %o4 + (13f - 14b), %g0 + add %o0, %g5, %o0 +12: SET_BLOCKS (%o0, 0x68, %o1) + SET_BLOCKS (%o0, 0x48, %o1) + SET_BLOCKS (%o0, 0x28, %o1) + SET_BLOCKS (%o0, 0x08, %o1) +13: be,pn %xcc, 8f + andcc %o2, 4, %g0 + be,pn %xcc, 1f + andcc %o2, 2, %g0 + stw %o1, [%o0] + add %o0, 4, %o0 +1: be,pn %xcc, 1f + andcc %o2, 1, %g0 + sth %o1, [%o0] + add %o0, 2, %o0 +1: bne,a,pn %xcc, 8f + stb %o1, [%o0] +8: retl + mov %o5, %o0 +17: brz,pn %o2, 0f +8: add %o0, 1, %o0 + subcc %o2, 1, %o2 + bne,pt %xcc, 8b + stb %o1, [%o0 - 1] +0: retl + mov %o5, %o0 + +6: stx %o1, [%o0] + andncc %o2, 0x3f, %o3 + be,pn %xcc, 9b + nop + ba,pt %xcc, 18b + ldd [%o0], %f0 +END(memset) +libc_hidden_builtin_def (memset) + +#define ZERO_BLOCKS(base, offset, source) \ + stx source, [base - offset - 0x38]; \ + stx source, [base - offset - 0x30]; \ + stx source, [base - offset - 0x28]; \ + stx source, [base - offset - 0x20]; \ + stx source, [base - offset - 0x18]; \ + stx source, [base - offset - 0x10]; \ + stx source, [base - offset - 0x08]; \ + stx source, [base - offset - 0x00]; + + .text + .align 32 +ENTRY(__bzero) +#ifndef USE_BPR + srl %o1, 0, %o1 +#endif + mov %o0, %o5 +50: cmp %o1, 7 + bleu,pn %xcc, 17f + andcc %o0, 3, %o2 + be,a,pt %xcc, 4f + andcc %o0, 4, %g0 + cmp %o2, 3 + be,pn %xcc, 2f + stb %g0, [%o0 + 0x00] + cmp %o2, 2 + be,pt %xcc, 2f + stb %g0, [%o0 + 0x01] + stb %g0, [%o0 + 0x02] +2: sub %o2, 4, %o2 + sub %o0, %o2, %o0 + add %o1, %o2, %o1 + andcc %o0, 4, %g0 +4: be,pt %xcc, 2f + cmp %o1, 128 + stw %g0, [%o0] + sub %o1, 4, %o1 + add %o0, 4, %o0 +2: blu,pn %xcc, 9f + andcc %o0, 0x38, %o2 + be,pn %icc, 6f + mov 64, %o4 + andcc %o0, 8, %g0 + be,pn %icc, 1f + sub %o4, %o2, %o4 + stx %g0, [%o0] + add %o0, 8, %o0 +1: andcc %o4, 16, %g0 + be,pn %icc, 1f + sub %o1, %o4, %o1 + stx %g0, [%o0] + stx %g0, [%o0 + 8] + add %o0, 16, %o0 +1: andcc %o4, 32, %g0 + be,pn %icc, 7f + andncc %o1, 0x3f, %o3 + stx %g0, [%o0] + stx %g0, [%o0 + 8] + stx %g0, [%o0 + 16] + stx %g0, [%o0 + 24] + add %o0, 32, %o0 +6: andncc %o1, 0x3f, %o3 +7: be,pn %xcc, 9f + wr %g0, ASI_BLK_P, %asi + membar #StoreLoad | #StoreStore | #LoadStore + fzero %f0 + andcc %o3, 0xc0, %o2 + and %o1, 0x3f, %o1 + fzero %f2 + andn %o3, 0xff, %o3 + faddd %f0, %f2, %f4 + fmuld %f0, %f2, %f6 + cmp %o2, 64 + faddd %f0, %f2, %f8 + fmuld %f0, %f2, %f10 + faddd %f0, %f2, %f12 + brz,pn %o2, 10f + fmuld %f0, %f2, %f14 + be,pn %icc, 2f + stda %f0, [%o0 + 0x00] %asi + cmp %o2, 128 + be,pn %icc, 2f + stda %f0, [%o0 + 0x40] %asi + stda %f0, [%o0 + 0x80] %asi +2: brz,pn %o3, 12f + add %o0, %o2, %o0 +10: stda %f0, [%o0 + 0x00] %asi + stda %f0, [%o0 + 0x40] %asi + stda %f0, [%o0 + 0x80] %asi + stda %f0, [%o0 + 0xc0] %asi +11: subcc %o3, 256, %o3 + bne,pt %xcc, 10b + add %o0, 256, %o0 +12: wr %g0, FPRS_FEF, %fprs + membar #StoreLoad | #StoreStore +9: andcc %o1, 0xf8, %o2 + be,pn %xcc, 13f + andcc %o1, 7, %o1 +14: rd %pc, %o4 + srl %o2, 1, %o3 + sub %o4, %o3, %o4 + jmpl %o4 + (13f - 14b), %g0 + add %o0, %o2, %o0 +12: ZERO_BLOCKS (%o0, 0xc8, %g0) + ZERO_BLOCKS (%o0, 0x88, %g0) + ZERO_BLOCKS (%o0, 0x48, %g0) + ZERO_BLOCKS (%o0, 0x08, %g0) +13: be,pn %xcc, 8f + andcc %o1, 4, %g0 + be,pn %xcc, 1f + andcc %o1, 2, %g0 + stw %g0, [%o0] + add %o0, 4, %o0 +1: be,pn %xcc, 1f + andcc %o1, 1, %g0 + sth %g0, [%o0] + add %o0, 2, %o0 +1: bne,a,pn %xcc, 8f + stb %g0, [%o0] +8: retl + mov %o5, %o0 +17: be,pn %xcc, 13b + orcc %o1, 0, %g0 + be,pn %xcc, 0f +8: add %o0, 1, %o0 + subcc %o1, 1, %o1 + bne,pt %xcc, 8b + stb %g0, [%o0 - 1] +0: retl + mov %o5, %o0 +END(__bzero) + +weak_alias (__bzero, bzero) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/mul_1.S b/REORG.TODO/sysdeps/sparc/sparc64/mul_1.S new file mode 100644 index 0000000000..0a32882c1c --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/mul_1.S @@ -0,0 +1,82 @@ +/* SPARC v9 __mpn_mul_1 -- Multiply a limb vector with a single limb and + store the product in a second limb vector. + + Copyright (C) 1995-2017 Free Software Foundation, Inc. + + This file is part of the GNU MP Library. + + The GNU MP Library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or (at your + option) any later version. + + The GNU MP Library is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with the GNU MP Library; see the file COPYING.LIB. If not, + see <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + +/* INPUT PARAMETERS + res_ptr o0 + s1_ptr o1 + size o2 + s2_limb o3 */ + +ENTRY(__mpn_mul_1) + !#PROLOGUE# 0 + save %sp,-192,%sp + !#PROLOGUE# 1 + + sub %g0,%i2,%o7 + sllx %o7,3,%g5 + sub %i1,%g5,%o3 + sub %i0,%g5,%o4 + mov 0,%o0 ! zero cy_limb + + srl %i3,0,%o1 ! extract low 32 bits of s2_limb + srlx %i3,32,%i3 ! extract high 32 bits of s2_limb + mov 1,%o2 + sllx %o2,32,%o2 ! o2 = 0x100000000 + + ! hi ! + ! mid-1 ! + ! mid-2 ! + ! lo ! +.Loop: + sllx %o7,3,%g1 + ldx [%o3+%g1],%g5 + srl %g5,0,%i0 ! zero hi bits + srlx %g5,32,%g5 + mulx %o1,%i0,%i4 ! lo product + mulx %i3,%i0,%i1 ! mid-1 product + mulx %o1,%g5,%l2 ! mid-2 product + mulx %i3,%g5,%i5 ! hi product + srlx %i4,32,%i0 ! extract high 32 bits of lo product... + add %i1,%i0,%i1 ! ...and add it to the mid-1 product + addcc %i1,%l2,%i1 ! add mid products + mov 0,%l0 ! we need the carry from that add... + movcs %xcc,%o2,%l0 ! ...compute it and... + add %i5,%l0,%i5 ! ...add to bit 32 of the hi product + sllx %i1,32,%i0 ! align low bits of mid product + srl %i4,0,%g5 ! zero high 32 bits of lo product + add %i0,%g5,%i0 ! combine into low 64 bits of result + srlx %i1,32,%i1 ! extract high bits of mid product... + add %i5,%i1,%i1 ! ...and add them to the high result + addcc %i0,%o0,%i0 ! add cy_limb to low 64 bits of result + mov 0,%g5 + movcs %xcc,1,%g5 + addcc %o7,1,%o7 + stx %i0,[%o4+%g1] + bne,pt %xcc,.Loop + add %i1,%g5,%o0 ! compute new cy_limb + + jmpl %i7+8,%g0 + restore %o0,%g0,%o0 + +END(__mpn_mul_1) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/multiarch/Makefile b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/Makefile new file mode 100644 index 0000000000..55b757f9ad --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/Makefile @@ -0,0 +1,21 @@ +ifeq ($(subdir),crypt) +libcrypt-sysdep_routines += md5-crop sha256-crop sha512-crop +endif + +ifeq ($(subdir),locale) +localedef-aux += md5-crop +endif + +ifeq ($(subdir),string) +sysdep_routines += memcpy-ultra3 memcpy-niagara1 memcpy-niagara2 \ + memset-niagara1 memcpy-niagara4 memset-niagara4 +endif + +ifeq ($(subdir),stdlib) +sysdep_routines += mul_1-vis3 addmul_1-vis3 submul_1-vis3 add_n-vis3 sub_n-vis3 +endif + +ifeq ($(subdir),math) +gmp-sysdep_routines = mul_1-vis3 addmul_1-vis3 submul_1-vis3 add_n-vis3 \ + sub_n-vis3 +endif diff --git a/REORG.TODO/sysdeps/sparc/sparc64/multiarch/add_n-vis3.S b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/add_n-vis3.S new file mode 100644 index 0000000000..c038bcbd6e --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/add_n-vis3.S @@ -0,0 +1,67 @@ +! SPARC v9 64-bit VIS3 __mpn_add_n -- Add two limb vectors of the same length > 0 and +! store sum in a third limb vector. +! +! Copyright (C) 2013-2017 Free Software Foundation, Inc. +! This file is part of the GNU C Library. +! Contributed by David S. Miller <davem@davemloft.net> +! +! The GNU C Library is free software; you can redistribute it and/or +! modify it under the terms of the GNU Lesser General Public +! License as published by the Free Software Foundation; either +! version 2.1 of the License, or (at your option) any later version. +! +! The GNU C Library is distributed in the hope that it will be useful, +! but WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +! Lesser General Public License for more details. +! +! You should have received a copy of the GNU Lesser General Public +! License along with the GNU C Library; if not, see +! <http://www.gnu.org/licenses/>. + +#include <sysdep.h> + +#define res_ptr %o0 +#define s1_ptr %o1 +#define s2_ptr %o2 +#define sz %o3 +#define tmp1 %g1 +#define tmp2 %g2 +#define tmp3 %g3 +#define tmp4 %o4 + + .register %g2,#scratch + .register %g3,#scratch +ENTRY(__mpn_add_n_vis3) + subcc sz, 1, sz + be .Lfinal_limb + cmp %g0, 0 + +.Lloop: + ldx [s2_ptr + 0x00], tmp1 + add s2_ptr, 0x10, s2_ptr + ldx [s1_ptr + 0x00], tmp2 + add s1_ptr, 0x10, s1_ptr + ldx [s2_ptr - 0x08], tmp3 + add res_ptr, 0x10, res_ptr + ldx [s1_ptr - 0x08], tmp4 + sub sz, 2, sz + addxccc tmp1, tmp2, tmp1 + stx tmp1, [res_ptr - 0x10] + addxccc tmp3, tmp4, tmp3 + brgz sz, .Lloop + stx tmp3, [res_ptr - 0x08] + + brlz,pt sz, .Lfinish + nop + +.Lfinal_limb: + ldx [s2_ptr + 0x00], tmp1 + ldx [s1_ptr + 0x00], tmp2 + addxccc tmp1, tmp2, tmp1 + stx tmp1, [res_ptr + 0x00] + +.Lfinish: + retl + addxc %g0, %g0, %o0 +END(__mpn_add_n_vis3) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/multiarch/add_n.S b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/add_n.S new file mode 100644 index 0000000000..9ffaf7865b --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/add_n.S @@ -0,0 +1,56 @@ +/* Multiple versions of add_n + + Copyright (C) 2013-2017 Free Software Foundation, Inc. + Contributed by David S. Miller (davem@davemloft.net) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY(__mpn_add_n) + .type __mpn_add_n, @gnu_indirect_function +# ifdef SHARED + SETUP_PIC_REG_LEAF(o3, o5) +# endif + set HWCAP_SPARC_VIS3, %o1 + andcc %o0, %o1, %g0 + be 1f + nop +# ifdef SHARED + sethi %gdop_hix22(__mpn_add_n_vis3), %o1 + xor %o1, %gdop_lox10(__mpn_add_n_vis3), %o1 +# else + set __mpn_add_n_vis3, %o1 +# endif + ba 10f + nop +1: +# ifdef SHARED + sethi %gdop_hix22(__mpn_add_n_generic), %o1 + xor %o1, %gdop_lox10(__mpn_add_n_generic), %o1 +# else + set __mpn_add_n_generic, %o1 +# endif +10: +# ifdef SHARED + add %o3, %o1, %o1 +# endif + retl + mov %o1, %o0 +END(__mpn_add_n) + +#define __mpn_add_n __mpn_add_n_generic +#include "../add_n.S" diff --git a/REORG.TODO/sysdeps/sparc/sparc64/multiarch/addmul_1-vis3.S b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/addmul_1-vis3.S new file mode 100644 index 0000000000..64671f5079 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/addmul_1-vis3.S @@ -0,0 +1,87 @@ +! SPARC v9 64-bit VIS3 __mpn_addmul_1 -- Multiply a limb vector with a +! limb and add the result to a second limb vector. +! +! Copyright (C) 2013-2017 Free Software Foundation, Inc. +! This file is part of the GNU C Library. +! Contributed by David S. Miller <davem@davemloft.net> +! +! The GNU C Library is free software; you can redistribute it and/or +! modify it under the terms of the GNU Lesser General Public +! License as published by the Free Software Foundation; either +! version 2.1 of the License, or (at your option) any later version. +! +! The GNU C Library is distributed in the hope that it will be useful, +! but WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +! Lesser General Public License for more details. +! +! You should have received a copy of the GNU Lesser General Public +! License along with the GNU C Library; if not, see +! <http://www.gnu.org/licenses/>. + +#include <sysdep.h> + +#define res_ptr %i0 +#define s1_ptr %i1 +#define sz %i2 +#define s2_limb %i3 +#define carry %o5 +#define tmp1 %g1 +#define tmp2 %g2 +#define tmp3 %g3 +#define tmp4 %o4 +#define tmp5 %l0 +#define tmp6 %l1 +#define tmp7 %l2 +#define tmp8 %l3 + + .register %g2,#scratch + .register %g3,#scratch +ENTRY(__mpn_addmul_1_vis3) + save %sp, -176, %sp + subcc sz, 1, sz + be .Lfinal_limb + clr carry + +.Lloop: + ldx [s1_ptr + 0x00], tmp1 + ldx [res_ptr + 0x00], tmp3 + ldx [s1_ptr + 0x08], tmp2 + ldx [res_ptr + 0x08], tmp4 + mulx tmp1, s2_limb, tmp5 + add s1_ptr, 0x10, s1_ptr + umulxhi tmp1, s2_limb, tmp6 + add res_ptr, 0x10, res_ptr + mulx tmp2, s2_limb, tmp7 + sub sz, 2, sz + umulxhi tmp2, s2_limb, tmp8 + addcc carry, tmp5, tmp5 + addxc %g0, tmp6, carry + addcc tmp3, tmp5, tmp5 + addxc %g0, carry, carry + stx tmp5, [res_ptr - 0x10] + addcc carry, tmp7, tmp7 + addxc %g0, tmp8, carry + addcc tmp4, tmp7, tmp7 + addxc %g0, carry, carry + brgz sz, .Lloop + stx tmp7, [res_ptr - 0x08] + + brlz,pt sz, .Lfinish + nop + +.Lfinal_limb: + ldx [s1_ptr + 0x00], tmp1 + ldx [res_ptr + 0x00], tmp3 + mulx tmp1, s2_limb, tmp5 + umulxhi tmp1, s2_limb, tmp6 + addcc carry, tmp5, tmp5 + addxc %g0, tmp6, carry + addcc tmp3, tmp5, tmp5 + addxc %g0, carry, carry + stx tmp5, [res_ptr + 0x00] + +.Lfinish: + jmpl %i7 + 8, %g0 + restore carry, 0, %o0 +END(__mpn_addmul_1_vis3) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/multiarch/addmul_1.S b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/addmul_1.S new file mode 100644 index 0000000000..dcb1da184c --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/addmul_1.S @@ -0,0 +1,56 @@ +/* Multiple versions of addmul_1 + + Copyright (C) 2013-2017 Free Software Foundation, Inc. + Contributed by David S. Miller (davem@davemloft.net) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY(__mpn_addmul_1) + .type __mpn_addmul_1, @gnu_indirect_function +# ifdef SHARED + SETUP_PIC_REG_LEAF(o3, o5) +# endif + set HWCAP_SPARC_VIS3, %o1 + andcc %o0, %o1, %g0 + be 1f + nop +# ifdef SHARED + sethi %gdop_hix22(__mpn_addmul_1_vis3), %o1 + xor %o1, %gdop_lox10(__mpn_addmul_1_vis3), %o1 +# else + set __mpn_addmul_1_vis3, %o1 +# endif + ba 10f + nop +1: +# ifdef SHARED + sethi %gdop_hix22(__mpn_addmul_1_generic), %o1 + xor %o1, %gdop_lox10(__mpn_addmul_1_generic), %o1 +# else + set __mpn_addmul_1_generic, %o1 +# endif +10: +# ifdef SHARED + add %o3, %o1, %o1 +# endif + retl + mov %o1, %o0 +END(__mpn_addmul_1) + +#define __mpn_addmul_1 __mpn_addmul_1_generic +#include "../addmul_1.S" diff --git a/REORG.TODO/sysdeps/sparc/sparc64/multiarch/ifunc-impl-list.c b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/ifunc-impl-list.c new file mode 100644 index 0000000000..a97bc455a8 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/ifunc-impl-list.c @@ -0,0 +1,75 @@ +/* Enumerate available IFUNC implementations of a function. sparc version. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <assert.h> +#include <string.h> +#include <wchar.h> +#include <ldsodefs.h> +#include <sysdep.h> +#include <ifunc-impl-list.h> + +/* Fill ARRAY of MAX elements with IFUNC implementations for function + NAME and return the number of valid entries. */ + +size_t +__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + size_t max) +{ + size_t i = 0; + int hwcap; + + hwcap = GLRO(dl_hwcap); + + IFUNC_IMPL (i, name, memcpy, + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & HWCAP_SPARC_CRYPTO, + __memcpy_niagara4) + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & HWCAP_SPARC_N2, + __memcpy_niagara2) + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & HWCAP_SPARC_BLKINIT, + __memcpy_niagara1) + IFUNC_IMPL_ADD (array, i, memcpy, hwcap & HWCAP_SPARC_ULTRA3, + __memcpy_ultra3) + IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ultra1)); + + IFUNC_IMPL (i, name, mempcpy, + IFUNC_IMPL_ADD (array, i, mempcpy, hwcap & HWCAP_SPARC_CRYPTO, + __mempcpy_niagara4) + IFUNC_IMPL_ADD (array, i, mempcpy, hwcap & HWCAP_SPARC_N2, + __mempcpy_niagara2) + IFUNC_IMPL_ADD (array, i, mempcpy, hwcap & HWCAP_SPARC_BLKINIT, + __mempcpy_niagara1) + IFUNC_IMPL_ADD (array, i, mempcpy, hwcap & HWCAP_SPARC_ULTRA3, + __mempcpy_ultra3) + IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_ultra1)); + + IFUNC_IMPL (i, name, bzero, + IFUNC_IMPL_ADD (array, i, bzero, hwcap & HWCAP_SPARC_CRYPTO, + __bzero_niagara4) + IFUNC_IMPL_ADD (array, i, bzero, hwcap & HWCAP_SPARC_BLKINIT, + __bzero_niagara1) + IFUNC_IMPL_ADD (array, i, bzero, 1, __bzero_ultra1)); + + IFUNC_IMPL (i, name, memset, + IFUNC_IMPL_ADD (array, i, memset, hwcap & HWCAP_SPARC_CRYPTO, + __memset_niagara4) + IFUNC_IMPL_ADD (array, i, memset, hwcap & HWCAP_SPARC_BLKINIT, + __memset_niagara1) + IFUNC_IMPL_ADD (array, i, memset, 1, __memset_ultra1)); + + return i; +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/multiarch/md5-block.c b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/md5-block.c new file mode 100644 index 0000000000..7c1a3a368f --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/md5-block.c @@ -0,0 +1,29 @@ +#include <sparc-ifunc.h> + +#define __md5_process_block __md5_process_block_generic +extern void __md5_process_block_generic (const void *buffer, size_t len, + struct md5_ctx *ctx); + +#include <crypt/md5-block.c> + +#undef __md5_process_block + +extern void __md5_process_block_crop (const void *buffer, size_t len, + struct md5_ctx *ctx); +static bool cpu_supports_md5(int hwcap) +{ + unsigned long cfr; + + if (!(hwcap & HWCAP_SPARC_CRYPTO)) + return false; + + __asm__ ("rd %%asr26, %0" : "=r" (cfr)); + if (cfr & (1 << 4)) + return true; + + return false; +} + +extern void __md5_process_block (const void *buffer, size_t len, + struct md5_ctx *ctx); +sparc_libc_ifunc(__md5_process_block, cpu_supports_md5(hwcap) ? __md5_process_block_crop : __md5_process_block_generic); diff --git a/REORG.TODO/sysdeps/sparc/sparc64/multiarch/md5-crop.S b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/md5-crop.S new file mode 100644 index 0000000000..e8810da83e --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/md5-crop.S @@ -0,0 +1,110 @@ +/* MD5 using sparc crypto opcodes. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller (davem@davemloft.net) + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define ASI_PL 0x88 + +#define MD5 \ + .word 0x81b02800; + + .text + .align 32 +ENTRY(__md5_process_block_crop) + /* %o0=buffer, %o1=len, %o2=CTX */ + ld [%o2 + 0x10], %g1 + add %g1, %o1, %o4 + st %o4, [%o2 + 0x10] + clr %o5 + cmp %o4, %g1 + movlu %icc, 1, %o5 +#ifdef __arch64__ + srlx %o1, 32, %o4 + add %o5, %o4, %o5 +#endif + ld [%o2 + 0x14], %o4 + add %o4, %o5, %o4 + st %o4, [%o2 + 0x14] + lda [%o2] ASI_PL, %f0 + add %o2, 0x4, %g1 + lda [%g1] ASI_PL, %f1 + add %o2, 0x8, %g1 + andcc %o0, 0x7, %g0 + lda [%g1] ASI_PL, %f2 + add %o2, 0xc, %g1 + bne,pn %xcc, 10f + lda [%g1] ASI_PL, %f3 + +1: + ldd [%o0 + 0x00], %f8 + ldd [%o0 + 0x08], %f10 + ldd [%o0 + 0x10], %f12 + ldd [%o0 + 0x18], %f14 + ldd [%o0 + 0x20], %f16 + ldd [%o0 + 0x28], %f18 + ldd [%o0 + 0x30], %f20 + ldd [%o0 + 0x38], %f22 + + MD5 + + subcc %o1, 64, %o1 + bne,pt %xcc, 1b + add %o0, 0x40, %o0 + +5: + sta %f0, [%o2] ASI_PL + add %o2, 0x4, %g1 + sta %f1, [%g1] ASI_PL + add %o2, 0x8, %g1 + sta %f2, [%g1] ASI_PL + add %o2, 0xc, %g1 + retl + sta %f3, [%g1] ASI_PL +10: + alignaddr %o0, %g0, %o0 + + ldd [%o0 + 0x00], %f10 +1: + ldd [%o0 + 0x08], %f12 + ldd [%o0 + 0x10], %f14 + ldd [%o0 + 0x18], %f16 + ldd [%o0 + 0x20], %f18 + ldd [%o0 + 0x28], %f20 + ldd [%o0 + 0x30], %f22 + ldd [%o0 + 0x38], %f24 + ldd [%o0 + 0x40], %f26 + + faligndata %f10, %f12, %f8 + faligndata %f12, %f14, %f10 + faligndata %f14, %f16, %f12 + faligndata %f16, %f18, %f14 + faligndata %f18, %f20, %f16 + faligndata %f20, %f22, %f18 + faligndata %f22, %f24, %f20 + faligndata %f24, %f26, %f22 + + MD5 + + subcc %o1, 64, %o1 + fsrc2 %f26, %f10 + bne,pt %xcc, 1b + add %o0, 0x40, %o0 + + ba,a,pt %xcc, 5b +END(__md5_process_block_crop) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S new file mode 100644 index 0000000000..ccf42446e8 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S @@ -0,0 +1,347 @@ +/* Copy SIZE bytes from SRC to DEST. For SUN4V Niagara. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller (davem@davemloft.net) + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define ASI_BLK_INIT_QUAD_LDD_P 0xe2 +#define ASI_P 0x80 +#define ASI_PNF 0x82 + +#define LOAD(type,addr,dest) type##a [addr] ASI_P, dest +#define LOAD_TWIN(addr_reg,dest0,dest1) \ + ldda [addr_reg] ASI_BLK_INIT_QUAD_LDD_P, dest0 + +#define STORE(type,src,addr) type src, [addr] +#define STORE_INIT(src,addr) stxa src, [addr] %asi + +#ifndef XCC +#define USE_BPR +#define XCC xcc +#endif + +#if IS_IN (libc) + + .register %g2,#scratch + .register %g3,#scratch + .register %g6,#scratch + + .text + +ENTRY(__mempcpy_niagara1) + ba,pt %XCC, 101f + add %o0, %o2, %g5 +END(__mempcpy_niagara1) + + .align 32 +ENTRY(__memcpy_niagara1) +100: /* %o0=dst, %o1=src, %o2=len */ + mov %o0, %g5 +101: +# ifndef USE_BPR + srl %o2, 0, %o2 +# endif + cmp %o2, 0 + be,pn %XCC, 85f +218: or %o0, %o1, %o3 + cmp %o2, 16 + blu,a,pn %XCC, 80f + or %o3, %o2, %o3 + + /* 2 blocks (128 bytes) is the minimum we can do the block + * copy with. We need to ensure that we'll iterate at least + * once in the block copy loop. At worst we'll need to align + * the destination to a 64-byte boundary which can chew up + * to (64 - 1) bytes from the length before we perform the + * block copy loop. + */ + cmp %o2, (2 * 64) + blu,pt %XCC, 70f + andcc %o3, 0x7, %g0 + + /* %o0: dst + * %o1: src + * %o2: len (known to be >= 128) + * + * The block copy loops will use %o4/%o5,%g2/%g3 as + * temporaries while copying the data. + */ + + LOAD(prefetch, %o1, #one_read) + wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi + + /* Align destination on 64-byte boundary. */ + andcc %o0, (64 - 1), %o4 + be,pt %XCC, 2f + sub %o4, 64, %o4 + sub %g0, %o4, %o4 ! bytes to align dst + sub %o2, %o4, %o2 +1: subcc %o4, 1, %o4 + LOAD(ldub, %o1, %g1) + STORE(stb, %g1, %o0) + add %o1, 1, %o1 + bne,pt %XCC, 1b + add %o0, 1, %o0 + + /* If the source is on a 16-byte boundary we can do + * the direct block copy loop. If it is 8-byte aligned + * we can do the 16-byte loads offset by -8 bytes and the + * init stores offset by one register. + * + * If the source is not even 8-byte aligned, we need to do + * shifting and masking (basically integer faligndata). + * + * The careful bit with init stores is that if we store + * to any part of the cache line we have to store the whole + * cacheline else we can end up with corrupt L2 cache line + * contents. Since the loop works on 64-bytes of 64-byte + * aligned store data at a time, this is easy to ensure. + */ +2: + andcc %o1, (16 - 1), %o4 + andn %o2, (64 - 1), %g1 ! block copy loop iterator + sub %o2, %g1, %o2 ! final sub-block copy bytes + be,pt %XCC, 50f + cmp %o4, 8 + be,a,pt %XCC, 10f + sub %o1, 0x8, %o1 + + /* Neither 8-byte nor 16-byte aligned, shift and mask. */ + mov %g1, %o4 + and %o1, 0x7, %g1 + sll %g1, 3, %g1 + mov 64, %o3 + andn %o1, 0x7, %o1 + LOAD(ldx, %o1, %g2) + sub %o3, %g1, %o3 + sllx %g2, %g1, %g2 + +#define SWIVEL_ONE_DWORD(SRC, TMP1, TMP2, PRE_VAL, PRE_SHIFT, POST_SHIFT, DST)\ + LOAD(ldx, SRC, TMP1); \ + srlx TMP1, PRE_SHIFT, TMP2; \ + or TMP2, PRE_VAL, TMP2; \ + STORE_INIT(TMP2, DST); \ + sllx TMP1, POST_SHIFT, PRE_VAL; + +1: add %o1, 0x8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x00) + add %o1, 0x8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x08) + add %o1, 0x8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x10) + add %o1, 0x8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x18) + add %o1, 32, %o1 + LOAD(prefetch, %o1, #one_read) + sub %o1, 32 - 8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x20) + add %o1, 8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x28) + add %o1, 8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x30) + add %o1, 8, %o1 + SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x38) + subcc %o4, 64, %o4 + bne,pt %XCC, 1b + add %o0, 64, %o0 + +#undef SWIVEL_ONE_DWORD + + srl %g1, 3, %g1 + ba,pt %XCC, 60f + add %o1, %g1, %o1 + +10: /* Destination is 64-byte aligned, source was only 8-byte + * aligned but it has been subtracted by 8 and we perform + * one twin load ahead, then add 8 back into source when + * we finish the loop. + */ + LOAD_TWIN(%o1, %o4, %o5) +1: add %o1, 16, %o1 + LOAD_TWIN(%o1, %g2, %g3) + add %o1, 16 + 32, %o1 + LOAD(prefetch, %o1, #one_read) + sub %o1, 32, %o1 + STORE_INIT(%o5, %o0 + 0x00) ! initializes cache line + STORE_INIT(%g2, %o0 + 0x08) + LOAD_TWIN(%o1, %o4, %o5) + add %o1, 16, %o1 + STORE_INIT(%g3, %o0 + 0x10) + STORE_INIT(%o4, %o0 + 0x18) + LOAD_TWIN(%o1, %g2, %g3) + add %o1, 16, %o1 + STORE_INIT(%o5, %o0 + 0x20) + STORE_INIT(%g2, %o0 + 0x28) + LOAD_TWIN(%o1, %o4, %o5) + STORE_INIT(%g3, %o0 + 0x30) + STORE_INIT(%o4, %o0 + 0x38) + subcc %g1, 64, %g1 + bne,pt %XCC, 1b + add %o0, 64, %o0 + + ba,pt %XCC, 60f + add %o1, 0x8, %o1 + +50: /* Destination is 64-byte aligned, and source is 16-byte + * aligned. + */ +1: LOAD_TWIN(%o1, %o4, %o5) + add %o1, 16, %o1 + LOAD_TWIN(%o1, %g2, %g3) + add %o1, 16 + 32, %o1 + LOAD(prefetch, %o1, #one_read) + sub %o1, 32, %o1 + STORE_INIT(%o4, %o0 + 0x00) ! initializes cache line + STORE_INIT(%o5, %o0 + 0x08) + LOAD_TWIN(%o1, %o4, %o5) + add %o1, 16, %o1 + STORE_INIT(%g2, %o0 + 0x10) + STORE_INIT(%g3, %o0 + 0x18) + LOAD_TWIN(%o1, %g2, %g3) + add %o1, 16, %o1 + STORE_INIT(%o4, %o0 + 0x20) + STORE_INIT(%o5, %o0 + 0x28) + STORE_INIT(%g2, %o0 + 0x30) + STORE_INIT(%g3, %o0 + 0x38) + subcc %g1, 64, %g1 + bne,pt %XCC, 1b + add %o0, 64, %o0 + /* fall through */ + +60: + /* %o2 contains any final bytes still needed to be copied + * over. If anything is left, we copy it one byte at a time. + */ + wr %g0, ASI_PNF, %asi + brz,pt %o2, 85f + sub %o0, %o1, %o3 + ba,a,pt %XCC, 90f + + .align 64 +70: /* 16 < len <= 64 */ + bne,pn %XCC, 75f + sub %o0, %o1, %o3 + +72: + andn %o2, 0xf, %o4 + and %o2, 0xf, %o2 +1: subcc %o4, 0x10, %o4 + LOAD(ldx, %o1, %o5) + add %o1, 0x08, %o1 + LOAD(ldx, %o1, %g1) + sub %o1, 0x08, %o1 + STORE(stx, %o5, %o1 + %o3) + add %o1, 0x8, %o1 + STORE(stx, %g1, %o1 + %o3) + bgu,pt %XCC, 1b + add %o1, 0x8, %o1 +73: andcc %o2, 0x8, %g0 + be,pt %XCC, 1f + nop + sub %o2, 0x8, %o2 + LOAD(ldx, %o1, %o5) + STORE(stx, %o5, %o1 + %o3) + add %o1, 0x8, %o1 +1: andcc %o2, 0x4, %g0 + be,pt %XCC, 1f + nop + sub %o2, 0x4, %o2 + LOAD(lduw, %o1, %o5) + STORE(stw, %o5, %o1 + %o3) + add %o1, 0x4, %o1 +1: cmp %o2, 0 + be,pt %XCC, 85f + nop + ba,pt %XCC, 90f + nop + +75: + andcc %o0, 0x7, %g1 + sub %g1, 0x8, %g1 + be,pn %icc, 2f + sub %g0, %g1, %g1 + sub %o2, %g1, %o2 + +1: subcc %g1, 1, %g1 + LOAD(ldub, %o1, %o5) + STORE(stb, %o5, %o1 + %o3) + bgu,pt %icc, 1b + add %o1, 1, %o1 + +2: add %o1, %o3, %o0 + andcc %o1, 0x7, %g1 + bne,pt %icc, 8f + sll %g1, 3, %g1 + + cmp %o2, 16 + bgeu,pt %icc, 72b + nop + ba,a,pt %XCC, 73b + +8: mov 64, %o3 + andn %o1, 0x7, %o1 + LOAD(ldx, %o1, %g2) + sub %o3, %g1, %o3 + andn %o2, 0x7, %o4 + sllx %g2, %g1, %g2 +1: add %o1, 0x8, %o1 + LOAD(ldx, %o1, %g3) + subcc %o4, 0x8, %o4 + srlx %g3, %o3, %o5 + or %o5, %g2, %o5 + STORE(stx, %o5, %o0) + add %o0, 0x8, %o0 + bgu,pt %icc, 1b + sllx %g3, %g1, %g2 + + srl %g1, 3, %g1 + andcc %o2, 0x7, %o2 + be,pn %icc, 85f + add %o1, %g1, %o1 + ba,pt %XCC, 90f + sub %o0, %o1, %o3 + + .align 64 +80: /* 0 < len <= 16 */ + andcc %o3, 0x3, %g0 + bne,pn %XCC, 90f + sub %o0, %o1, %o3 + +1: + subcc %o2, 4, %o2 + LOAD(lduw, %o1, %g1) + STORE(stw, %g1, %o1 + %o3) + bgu,pt %XCC, 1b + add %o1, 4, %o1 + +85: retl + mov %g5, %o0 + + .align 32 +90: + subcc %o2, 1, %o2 + LOAD(ldub, %o1, %g1) + STORE(stb, %g1, %o1 + %o3) + bgu,pt %XCC, 90b + add %o1, 1, %o1 + retl + mov %g5, %o0 + +END(__memcpy_niagara1) + +#endif diff --git a/REORG.TODO/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S new file mode 100644 index 0000000000..798b3c80fe --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S @@ -0,0 +1,498 @@ +/* Copy SIZE bytes from SRC to DEST. For SUN4V Niagara-2. + Copyright (C) 2007-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller (davem@davemloft.net) + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define ASI_BLK_INIT_QUAD_LDD_P 0xe2 +#define ASI_BLK_P 0xf0 +#define ASI_P 0x80 +#define ASI_PNF 0x82 + +#define FPRS_FEF 0x04 + +#define VISEntryHalf \ + rd %fprs, %o5; \ + wr %g0, FPRS_FEF, %fprs + +#define VISExitHalf \ + and %o5, FPRS_FEF, %o5; \ + wr %o5, 0x0, %fprs + +#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P + +#define LOAD(type,addr,dest) type [addr], dest +#define LOAD_BLK(addr,dest) ldda [addr] ASI_BLK_P, dest +#define STORE(type,src,addr) type src, [addr] +#define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_P +#define STORE_INIT(src,addr) stxa src, [addr] STORE_ASI + +#ifndef XCC +#define USE_BPR +#define XCC xcc +#endif + +#define FREG_FROB(x0, x1, x2, x3, x4, x5, x6, x7, x8) \ + faligndata %x0, %x1, %f0; \ + faligndata %x1, %x2, %f2; \ + faligndata %x2, %x3, %f4; \ + faligndata %x3, %x4, %f6; \ + faligndata %x4, %x5, %f8; \ + faligndata %x5, %x6, %f10; \ + faligndata %x6, %x7, %f12; \ + faligndata %x7, %x8, %f14; + +#define FREG_MOVE_1(x0) \ + fsrc2 %x0, %f0; +#define FREG_MOVE_2(x0, x1) \ + fsrc2 %x0, %f0; \ + fsrc2 %x1, %f2; +#define FREG_MOVE_3(x0, x1, x2) \ + fsrc2 %x0, %f0; \ + fsrc2 %x1, %f2; \ + fsrc2 %x2, %f4; +#define FREG_MOVE_4(x0, x1, x2, x3) \ + fsrc2 %x0, %f0; \ + fsrc2 %x1, %f2; \ + fsrc2 %x2, %f4; \ + fsrc2 %x3, %f6; +#define FREG_MOVE_5(x0, x1, x2, x3, x4) \ + fsrc2 %x0, %f0; \ + fsrc2 %x1, %f2; \ + fsrc2 %x2, %f4; \ + fsrc2 %x3, %f6; \ + fsrc2 %x4, %f8; +#define FREG_MOVE_6(x0, x1, x2, x3, x4, x5) \ + fsrc2 %x0, %f0; \ + fsrc2 %x1, %f2; \ + fsrc2 %x2, %f4; \ + fsrc2 %x3, %f6; \ + fsrc2 %x4, %f8; \ + fsrc2 %x5, %f10; +#define FREG_MOVE_7(x0, x1, x2, x3, x4, x5, x6) \ + fsrc2 %x0, %f0; \ + fsrc2 %x1, %f2; \ + fsrc2 %x2, %f4; \ + fsrc2 %x3, %f6; \ + fsrc2 %x4, %f8; \ + fsrc2 %x5, %f10; \ + fsrc2 %x6, %f12; +#define FREG_MOVE_8(x0, x1, x2, x3, x4, x5, x6, x7) \ + fsrc2 %x0, %f0; \ + fsrc2 %x1, %f2; \ + fsrc2 %x2, %f4; \ + fsrc2 %x3, %f6; \ + fsrc2 %x4, %f8; \ + fsrc2 %x5, %f10; \ + fsrc2 %x6, %f12; \ + fsrc2 %x7, %f14; +#define FREG_LOAD_1(base, x0) \ + LOAD(ldd, base + 0x00, %x0) +#define FREG_LOAD_2(base, x0, x1) \ + LOAD(ldd, base + 0x00, %x0); \ + LOAD(ldd, base + 0x08, %x1); +#define FREG_LOAD_3(base, x0, x1, x2) \ + LOAD(ldd, base + 0x00, %x0); \ + LOAD(ldd, base + 0x08, %x1); \ + LOAD(ldd, base + 0x10, %x2); +#define FREG_LOAD_4(base, x0, x1, x2, x3) \ + LOAD(ldd, base + 0x00, %x0); \ + LOAD(ldd, base + 0x08, %x1); \ + LOAD(ldd, base + 0x10, %x2); \ + LOAD(ldd, base + 0x18, %x3); +#define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \ + LOAD(ldd, base + 0x00, %x0); \ + LOAD(ldd, base + 0x08, %x1); \ + LOAD(ldd, base + 0x10, %x2); \ + LOAD(ldd, base + 0x18, %x3); \ + LOAD(ldd, base + 0x20, %x4); +#define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \ + LOAD(ldd, base + 0x00, %x0); \ + LOAD(ldd, base + 0x08, %x1); \ + LOAD(ldd, base + 0x10, %x2); \ + LOAD(ldd, base + 0x18, %x3); \ + LOAD(ldd, base + 0x20, %x4); \ + LOAD(ldd, base + 0x28, %x5); +#define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \ + LOAD(ldd, base + 0x00, %x0); \ + LOAD(ldd, base + 0x08, %x1); \ + LOAD(ldd, base + 0x10, %x2); \ + LOAD(ldd, base + 0x18, %x3); \ + LOAD(ldd, base + 0x20, %x4); \ + LOAD(ldd, base + 0x28, %x5); \ + LOAD(ldd, base + 0x30, %x6); + +#if IS_IN (libc) + + .register %g2,#scratch + .register %g3,#scratch + .register %g6,#scratch + + .text + +ENTRY(__mempcpy_niagara2) + ba,pt %XCC, 101f + add %o0, %o2, %g5 +END(__mempcpy_niagara2) + + .align 32 +ENTRY(__memcpy_niagara2) +100: /* %o0=dst, %o1=src, %o2=len */ + mov %o0, %g5 +101: +# ifndef USE_BPR + srl %o2, 0, %o2 +# endif + cmp %o2, 0 + be,pn %XCC, 85f +218: or %o0, %o1, %o3 + cmp %o2, 16 + blu,a,pn %XCC, 80f + or %o3, %o2, %o3 + + /* 2 blocks (128 bytes) is the minimum we can do the block + * copy with. We need to ensure that we'll iterate at least + * once in the block copy loop. At worst we'll need to align + * the destination to a 64-byte boundary which can chew up + * to (64 - 1) bytes from the length before we perform the + * block copy loop. + * + * However, the cut-off point, performance wise, is around + * 4 64-byte blocks. + */ + cmp %o2, (4 * 64) + blu,pt %XCC, 75f + andcc %o3, 0x7, %g0 + + /* %o0: dst + * %o1: src + * %o2: len (known to be >= 128) + * + * The block copy loops can use %o4, %g2, %g3 as + * temporaries while copying the data. %o5 must + * be preserved between VISEntryHalf and VISExitHalf + */ + + LOAD(prefetch, %o1 + 0x000, #one_read) + LOAD(prefetch, %o1 + 0x040, #one_read) + LOAD(prefetch, %o1 + 0x080, #one_read) + + /* Align destination on 64-byte boundary. */ + andcc %o0, (64 - 1), %o4 + be,pt %XCC, 2f + sub %o4, 64, %o4 + sub %g0, %o4, %o4 ! bytes to align dst + sub %o2, %o4, %o2 +1: subcc %o4, 1, %o4 + LOAD(ldub, %o1, %g1) + STORE(stb, %g1, %o0) + add %o1, 1, %o1 + bne,pt %XCC, 1b + add %o0, 1, %o0 + +2: + /* Clobbers o5/g1/g2/g3/g7/icc/xcc. We must preserve + * o5 from here until we hit VISExitHalf. + */ + VISEntryHalf + + membar #Sync + alignaddr %o1, %g0, %g0 + + add %o1, (64 - 1), %o4 + andn %o4, (64 - 1), %o4 + andn %o2, (64 - 1), %g1 + sub %o2, %g1, %o2 + + and %o1, (64 - 1), %g2 + add %o1, %g1, %o1 + sub %o0, %o4, %g3 + brz,pt %g2, 190f + cmp %g2, 32 + blu,a 5f + cmp %g2, 16 + cmp %g2, 48 + blu,a 4f + cmp %g2, 40 + cmp %g2, 56 + blu 170f + nop + ba,a,pt %xcc, 180f + +4: /* 32 <= low bits < 48 */ + blu 150f + nop + ba,a,pt %xcc, 160f +5: /* 0 < low bits < 32 */ + blu,a 6f + cmp %g2, 8 + cmp %g2, 24 + blu 130f + nop + ba,a,pt %xcc, 140f +6: /* 0 < low bits < 16 */ + bgeu 120f + nop + /* fall through for 0 < low bits < 8 */ +110: sub %o4, 64, %g2 + LOAD_BLK(%g2, %f0) +1: STORE_INIT(%g0, %o4 + %g3) + LOAD_BLK(%o4, %f16) + FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16) + STORE_BLK(%f0, %o4 + %g3) + FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 + bne,pt %XCC, 1b + LOAD(prefetch, %o4 + 64, #one_read) + ba,pt %xcc, 195f + nop + +120: sub %o4, 56, %g2 + FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12) +1: STORE_INIT(%g0, %o4 + %g3) + LOAD_BLK(%o4, %f16) + FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18) + STORE_BLK(%f0, %o4 + %g3) + FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 + bne,pt %XCC, 1b + LOAD(prefetch, %o4 + 64, #one_read) + ba,pt %xcc, 195f + nop + +130: sub %o4, 48, %g2 + FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10) +1: STORE_INIT(%g0, %o4 + %g3) + LOAD_BLK(%o4, %f16) + FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20) + STORE_BLK(%f0, %o4 + %g3) + FREG_MOVE_6(f20, f22, f24, f26, f28, f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 + bne,pt %XCC, 1b + LOAD(prefetch, %o4 + 64, #one_read) + ba,pt %xcc, 195f + nop + +140: sub %o4, 40, %g2 + FREG_LOAD_5(%g2, f0, f2, f4, f6, f8) +1: STORE_INIT(%g0, %o4 + %g3) + LOAD_BLK(%o4, %f16) + FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22) + STORE_BLK(%f0, %o4 + %g3) + FREG_MOVE_5(f22, f24, f26, f28, f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 + bne,pt %XCC, 1b + LOAD(prefetch, %o4 + 64, #one_read) + ba,pt %xcc, 195f + nop + +150: sub %o4, 32, %g2 + FREG_LOAD_4(%g2, f0, f2, f4, f6) +1: STORE_INIT(%g0, %o4 + %g3) + LOAD_BLK(%o4, %f16) + FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24) + STORE_BLK(%f0, %o4 + %g3) + FREG_MOVE_4(f24, f26, f28, f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 + bne,pt %XCC, 1b + LOAD(prefetch, %o4 + 64, #one_read) + ba,pt %xcc, 195f + nop + +160: sub %o4, 24, %g2 + FREG_LOAD_3(%g2, f0, f2, f4) +1: STORE_INIT(%g0, %o4 + %g3) + LOAD_BLK(%o4, %f16) + FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26) + STORE_BLK(%f0, %o4 + %g3) + FREG_MOVE_3(f26, f28, f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 + bne,pt %XCC, 1b + LOAD(prefetch, %o4 + 64, #one_read) + ba,pt %xcc, 195f + nop + +170: sub %o4, 16, %g2 + FREG_LOAD_2(%g2, f0, f2) +1: STORE_INIT(%g0, %o4 + %g3) + LOAD_BLK(%o4, %f16) + FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28) + STORE_BLK(%f0, %o4 + %g3) + FREG_MOVE_2(f28, f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 + bne,pt %XCC, 1b + LOAD(prefetch, %o4 + 64, #one_read) + ba,pt %xcc, 195f + nop + +180: sub %o4, 8, %g2 + FREG_LOAD_1(%g2, f0) +1: STORE_INIT(%g0, %o4 + %g3) + LOAD_BLK(%o4, %f16) + FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30) + STORE_BLK(%f0, %o4 + %g3) + FREG_MOVE_1(f30) + subcc %g1, 64, %g1 + add %o4, 64, %o4 + bne,pt %XCC, 1b + LOAD(prefetch, %o4 + 64, #one_read) + ba,pt %xcc, 195f + nop + +190: +1: STORE_INIT(%g0, %o4 + %g3) + subcc %g1, 64, %g1 + LOAD_BLK(%o4, %f0) + STORE_BLK(%f0, %o4 + %g3) + add %o4, 64, %o4 + bne,pt %XCC, 1b + LOAD(prefetch, %o4 + 64, #one_read) + +195: + add %o4, %g3, %o0 + membar #Sync + + VISExitHalf + + /* %o2 contains any final bytes still needed to be copied + * over. If anything is left, we copy it one byte at a time. + */ + brz,pt %o2, 85f + sub %o0, %o1, %o3 + ba,a,pt %XCC, 90f + + .align 64 +75: /* 16 < len <= 64 */ + bne,pn %XCC, 75f + sub %o0, %o1, %o3 + +72: + andn %o2, 0xf, %o4 + and %o2, 0xf, %o2 +1: subcc %o4, 0x10, %o4 + LOAD(ldx, %o1, %o5) + add %o1, 0x08, %o1 + LOAD(ldx, %o1, %g1) + sub %o1, 0x08, %o1 + STORE(stx, %o5, %o1 + %o3) + add %o1, 0x8, %o1 + STORE(stx, %g1, %o1 + %o3) + bgu,pt %XCC, 1b + add %o1, 0x8, %o1 +73: andcc %o2, 0x8, %g0 + be,pt %XCC, 1f + nop + sub %o2, 0x8, %o2 + LOAD(ldx, %o1, %o5) + STORE(stx, %o5, %o1 + %o3) + add %o1, 0x8, %o1 +1: andcc %o2, 0x4, %g0 + be,pt %XCC, 1f + nop + sub %o2, 0x4, %o2 + LOAD(lduw, %o1, %o5) + STORE(stw, %o5, %o1 + %o3) + add %o1, 0x4, %o1 +1: cmp %o2, 0 + be,pt %XCC, 85f + nop + ba,pt %xcc, 90f + nop + +75: + andcc %o0, 0x7, %g1 + sub %g1, 0x8, %g1 + be,pn %icc, 2f + sub %g0, %g1, %g1 + sub %o2, %g1, %o2 + +1: subcc %g1, 1, %g1 + LOAD(ldub, %o1, %o5) + STORE(stb, %o5, %o1 + %o3) + bgu,pt %icc, 1b + add %o1, 1, %o1 + +2: add %o1, %o3, %o0 + andcc %o1, 0x7, %g1 + bne,pt %icc, 8f + sll %g1, 3, %g1 + + cmp %o2, 16 + bgeu,pt %icc, 72b + nop + ba,a,pt %xcc, 73b + +8: mov 64, %o3 + andn %o1, 0x7, %o1 + LOAD(ldx, %o1, %g2) + sub %o3, %g1, %o3 + andn %o2, 0x7, %o4 + sllx %g2, %g1, %g2 +1: add %o1, 0x8, %o1 + LOAD(ldx, %o1, %g3) + subcc %o4, 0x8, %o4 + srlx %g3, %o3, %o5 + or %o5, %g2, %o5 + STORE(stx, %o5, %o0) + add %o0, 0x8, %o0 + bgu,pt %icc, 1b + sllx %g3, %g1, %g2 + + srl %g1, 3, %g1 + andcc %o2, 0x7, %o2 + be,pn %icc, 85f + add %o1, %g1, %o1 + ba,pt %xcc, 90f + sub %o0, %o1, %o3 + + .align 64 +80: /* 0 < len <= 16 */ + andcc %o3, 0x3, %g0 + bne,pn %XCC, 90f + sub %o0, %o1, %o3 + +1: + subcc %o2, 4, %o2 + LOAD(lduw, %o1, %g1) + STORE(stw, %g1, %o1 + %o3) + bgu,pt %XCC, 1b + add %o1, 4, %o1 + +85: retl + mov %g5, %o0 + + .align 32 +90: + subcc %o2, 1, %o2 + LOAD(ldub, %o1, %g1) + STORE(stb, %g1, %o1 + %o3) + bgu,pt %XCC, 90b + add %o1, 1, %o1 + retl + mov %g5, %o0 + +END(__memcpy_niagara2) + +#endif diff --git a/REORG.TODO/sysdeps/sparc/sparc64/multiarch/memcpy-niagara4.S b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/memcpy-niagara4.S new file mode 100644 index 0000000000..709b398364 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/memcpy-niagara4.S @@ -0,0 +1,332 @@ +/* Copy SIZE bytes from SRC to DEST. For SUN4V Niagara-4. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller (davem@davemloft.net) + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define ASI_BLK_INIT_QUAD_LDD_P 0xe2 + +#define FPRS_FEF 0x04 + +/* On T4 it is very expensive to access ASRs like %fprs and + * %asi, avoiding a read or a write can save ~50 cycles. + */ +#define FPU_ENTER \ + rd %fprs, %o5; \ + andcc %o5, FPRS_FEF, %g0; \ + be,a,pn %icc, 999f; \ + wr %g0, FPRS_FEF, %fprs; \ + 999: + +#define VISEntryHalf FPU_ENTER +#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs + +#define GLOBAL_SPARE %g5 + +#define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P +#define EX_LD(x) x +#define EX_ST(x) x +#define EX_RETVAL(x) x +#define LOAD(type,addr,dest) type [addr], dest +#define STORE(type,src,addr) type src, [addr] +#define STORE_INIT(src,addr) stxa src, [addr] STORE_ASI + +#if IS_IN (libc) + + .register %g2,#scratch + .register %g3,#scratch + .register %g6,#scratch + + .text + +ENTRY(__mempcpy_niagara4) + ba,pt %icc, 101f + add %o0, %o2, %o3 +END(__mempcpy_niagara4) + + .align 32 +ENTRY(__memcpy_niagara4) +100: /* %o0=dst, %o1=src, %o2=len */ + mov %o0, %o3 +101: +#ifndef __arch64__ + srl %o2, 0, %o2 +#endif + brz,pn %o2, .Lexit + cmp %o2, 3 + ble,pn %icc, .Ltiny + cmp %o2, 19 + ble,pn %icc, .Lsmall + or %o0, %o1, %g2 + cmp %o2, 128 + bl,pn %icc, .Lmedium + nop + +.Llarge:/* len >= 0x80 */ + /* First get dest 8 byte aligned. */ + sub %g0, %o0, %g1 + and %g1, 0x7, %g1 + brz,pt %g1, 51f + sub %o2, %g1, %o2 + +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) + add %o1, 1, %o1 + subcc %g1, 1, %g1 + add %o0, 1, %o0 + bne,pt %icc, 1b + EX_ST(STORE(stb, %g2, %o0 - 0x01)) + +51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong) + LOAD(prefetch, %o1 + 0x080, #n_reads_strong) + LOAD(prefetch, %o1 + 0x0c0, #n_reads_strong) + LOAD(prefetch, %o1 + 0x100, #n_reads_strong) + LOAD(prefetch, %o1 + 0x140, #n_reads_strong) + LOAD(prefetch, %o1 + 0x180, #n_reads_strong) + LOAD(prefetch, %o1 + 0x1c0, #n_reads_strong) + LOAD(prefetch, %o1 + 0x200, #n_reads_strong) + + /* Check if we can use the straight fully aligned + * loop, or we require the alignaddr/faligndata variant. + */ + andcc %o1, 0x7, %o5 + bne,pn %icc, .Llarge_src_unaligned + sub %g0, %o0, %g1 + + /* Legitimize the use of initializing stores by getting dest + * to be 64-byte aligned. + */ + and %g1, 0x3f, %g1 + brz,pt %g1, .Llarge_aligned + sub %o2, %g1, %o2 + +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2)) + add %o1, 8, %o1 + subcc %g1, 8, %g1 + add %o0, 8, %o0 + bne,pt %icc, 1b + EX_ST(STORE(stx, %g2, %o0 - 0x08)) + +.Llarge_aligned: + /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */ + andn %o2, 0x3f, %o4 + sub %o2, %o4, %o2 + +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) + add %o1, 0x40, %o1 + EX_LD(LOAD(ldx, %o1 - 0x38, %g2)) + subcc %o4, 0x40, %o4 + EX_LD(LOAD(ldx, %o1 - 0x30, %g3)) + EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE)) + EX_LD(LOAD(ldx, %o1 - 0x20, %o5)) + EX_ST(STORE_INIT(%g1, %o0)) + add %o0, 0x08, %o0 + EX_ST(STORE_INIT(%g2, %o0)) + add %o0, 0x08, %o0 + EX_LD(LOAD(ldx, %o1 - 0x18, %g2)) + EX_ST(STORE_INIT(%g3, %o0)) + add %o0, 0x08, %o0 + EX_LD(LOAD(ldx, %o1 - 0x10, %g3)) + EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) + add %o0, 0x08, %o0 + EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE)) + EX_ST(STORE_INIT(%o5, %o0)) + add %o0, 0x08, %o0 + EX_ST(STORE_INIT(%g2, %o0)) + add %o0, 0x08, %o0 + EX_ST(STORE_INIT(%g3, %o0)) + add %o0, 0x08, %o0 + EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) + add %o0, 0x08, %o0 + bne,pt %icc, 1b + LOAD(prefetch, %o1 + 0x200, #n_reads_strong) + + membar #StoreLoad | #StoreStore + + brz,pn %o2, .Lexit + cmp %o2, 19 + ble,pn %icc, .Lsmall_unaligned + nop + ba,a,pt %icc, .Lmedium_noprefetch + +.Lexit: retl + mov EX_RETVAL(%o3), %o0 + +.Llarge_src_unaligned: + andn %o2, 0x3f, %o4 + sub %o2, %o4, %o2 + VISEntryHalf + alignaddr %o1, %g0, %g1 + add %o1, %o4, %o1 + EX_LD(LOAD(ldd, %g1 + 0x00, %f0)) +1: EX_LD(LOAD(ldd, %g1 + 0x08, %f2)) + subcc %o4, 0x40, %o4 + EX_LD(LOAD(ldd, %g1 + 0x10, %f4)) + EX_LD(LOAD(ldd, %g1 + 0x18, %f6)) + EX_LD(LOAD(ldd, %g1 + 0x20, %f8)) + EX_LD(LOAD(ldd, %g1 + 0x28, %f10)) + EX_LD(LOAD(ldd, %g1 + 0x30, %f12)) + EX_LD(LOAD(ldd, %g1 + 0x38, %f14)) + faligndata %f0, %f2, %f16 + EX_LD(LOAD(ldd, %g1 + 0x40, %f0)) + faligndata %f2, %f4, %f18 + add %g1, 0x40, %g1 + faligndata %f4, %f6, %f20 + faligndata %f6, %f8, %f22 + faligndata %f8, %f10, %f24 + faligndata %f10, %f12, %f26 + faligndata %f12, %f14, %f28 + faligndata %f14, %f0, %f30 + EX_ST(STORE(std, %f16, %o0 + 0x00)) + EX_ST(STORE(std, %f18, %o0 + 0x08)) + EX_ST(STORE(std, %f20, %o0 + 0x10)) + EX_ST(STORE(std, %f22, %o0 + 0x18)) + EX_ST(STORE(std, %f24, %o0 + 0x20)) + EX_ST(STORE(std, %f26, %o0 + 0x28)) + EX_ST(STORE(std, %f28, %o0 + 0x30)) + EX_ST(STORE(std, %f30, %o0 + 0x38)) + add %o0, 0x40, %o0 + bne,pt %icc, 1b + LOAD(prefetch, %g1 + 0x200, #n_reads_strong) + VISExitHalf + + brz,pn %o2, .Lexit + cmp %o2, 19 + ble,pn %icc, .Lsmall_unaligned + nop + ba,a,pt %icc, .Lmedium_unaligned + +.Lmedium: + LOAD(prefetch, %o1 + 0x40, #n_reads_strong) + andcc %g2, 0x7, %g0 + bne,pn %icc, .Lmedium_unaligned + nop +.Lmedium_noprefetch: + andncc %o2, 0x20 - 1, %o5 + be,pn %icc, 2f + sub %o2, %o5, %o2 +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) + EX_LD(LOAD(ldx, %o1 + 0x08, %g2)) + EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE)) + EX_LD(LOAD(ldx, %o1 + 0x18, %o4)) + add %o1, 0x20, %o1 + subcc %o5, 0x20, %o5 + EX_ST(STORE(stx, %g1, %o0 + 0x00)) + EX_ST(STORE(stx, %g2, %o0 + 0x08)) + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10)) + EX_ST(STORE(stx, %o4, %o0 + 0x18)) + bne,pt %icc, 1b + add %o0, 0x20, %o0 +2: andcc %o2, 0x18, %o5 + be,pt %icc, 3f + sub %o2, %o5, %o2 +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) + add %o1, 0x08, %o1 + add %o0, 0x08, %o0 + subcc %o5, 0x08, %o5 + bne,pt %icc, 1b + EX_ST(STORE(stx, %g1, %o0 - 0x08)) +3: brz,pt %o2, .Lexit + cmp %o2, 0x04 + bl,pn %icc, .Ltiny + nop + EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) + add %o1, 0x04, %o1 + add %o0, 0x04, %o0 + subcc %o2, 0x04, %o2 + bne,pn %icc, .Ltiny + EX_ST(STORE(stw, %g1, %o0 - 0x04)) + ba,a,pt %icc, .Lexit +.Lmedium_unaligned: + /* First get dest 8 byte aligned. */ + sub %g0, %o0, %g1 + and %g1, 0x7, %g1 + brz,pt %g1, 2f + sub %o2, %g1, %o2 + +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) + add %o1, 1, %o1 + subcc %g1, 1, %g1 + add %o0, 1, %o0 + bne,pt %icc, 1b + EX_ST(STORE(stb, %g2, %o0 - 0x01)) +2: + and %o1, 0x7, %g1 + brz,pn %g1, .Lmedium_noprefetch + sll %g1, 3, %g1 + mov 64, %g2 + sub %g2, %g1, %g2 + andn %o1, 0x7, %o1 + EX_LD(LOAD(ldx, %o1 + 0x00, %o4)) + sllx %o4, %g1, %o4 + andn %o2, 0x08 - 1, %o5 + sub %o2, %o5, %o2 +1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3)) + add %o1, 0x08, %o1 + subcc %o5, 0x08, %o5 + srlx %g3, %g2, GLOBAL_SPARE + or GLOBAL_SPARE, %o4, GLOBAL_SPARE + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00)) + add %o0, 0x08, %o0 + bne,pt %icc, 1b + sllx %g3, %g1, %o4 + srl %g1, 3, %g1 + add %o1, %g1, %o1 + brz,pn %o2, .Lexit + nop + ba,pt %icc, .Lsmall_unaligned + +.Ltiny: + EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) + subcc %o2, 1, %o2 + be,pn %icc, .Lexit + EX_ST(STORE(stb, %g1, %o0 + 0x00)) + EX_LD(LOAD(ldub, %o1 + 0x01, %g1)) + subcc %o2, 1, %o2 + be,pn %icc, .Lexit + EX_ST(STORE(stb, %g1, %o0 + 0x01)) + EX_LD(LOAD(ldub, %o1 + 0x02, %g1)) + ba,pt %icc, .Lexit + EX_ST(STORE(stb, %g1, %o0 + 0x02)) + +.Lsmall: + andcc %g2, 0x3, %g0 + bne,pn %icc, .Lsmall_unaligned + andn %o2, 0x4 - 1, %o5 + sub %o2, %o5, %o2 +1: + EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) + add %o1, 0x04, %o1 + subcc %o5, 0x04, %o5 + add %o0, 0x04, %o0 + bne,pt %icc, 1b + EX_ST(STORE(stw, %g1, %o0 - 0x04)) + brz,pt %o2, .Lexit + nop + ba,a,pt %icc, .Ltiny + +.Lsmall_unaligned: +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) + add %o1, 1, %o1 + add %o0, 1, %o0 + subcc %o2, 1, %o2 + bne,pt %icc, 1b + EX_ST(STORE(stb, %g1, %o0 - 0x01)) + ba,a,pt %icc, .Lexit +END(__memcpy_niagara4) + +#endif diff --git a/REORG.TODO/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S new file mode 100644 index 0000000000..b8f5c3cb8f --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S @@ -0,0 +1,325 @@ +/* Copy SIZE bytes from SRC to DEST. + For UltraSPARC-III. + Copyright (C) 2001-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller (davem@redhat.com) + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define ASI_BLK_P 0xf0 +#define FPRS_FEF 0x04 +#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs +#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs + +#ifndef XCC +#define USE_BPR +#define XCC xcc +#endif + +#if IS_IN (libc) + + .register %g2,#scratch + .register %g3,#scratch + .register %g6,#scratch + + .text + +ENTRY(__mempcpy_ultra3) + ba,pt %XCC, 101f + add %o0, %o2, %g5 +END(__mempcpy_ultra3) + + /* Special/non-trivial issues of this code: + * + * 1) %o5 is preserved from VISEntryHalf to VISExitHalf + * 2) Only low 32 FPU registers are used so that only the + * lower half of the FPU register set is dirtied by this + * code. This is especially important in the kernel. + * 3) This code never prefetches cachelines past the end + * of the source buffer. + * + * The cheetah's flexible spine, oversized liver, enlarged heart, + * slender muscular body, and claws make it the swiftest hunter + * in Africa and the fastest animal on land. Can reach speeds + * of up to 2.4GB per second. + */ + .align 32 +ENTRY(__memcpy_ultra3) + +100: /* %o0=dst, %o1=src, %o2=len */ + mov %o0, %g5 +101: + cmp %o2, 0 + be,pn %XCC, out +218: or %o0, %o1, %o3 + cmp %o2, 16 + bleu,a,pn %XCC, small_copy + or %o3, %o2, %o3 + + cmp %o2, 256 + blu,pt %XCC, medium_copy + andcc %o3, 0x7, %g0 + + ba,pt %xcc, enter + andcc %o0, 0x3f, %g2 + + /* Here len >= 256 and condition codes reflect execution + * of "andcc %o0, 0x7, %g2", done by caller. + */ + .align 64 +enter: + /* Is 'dst' already aligned on an 64-byte boundary? */ + be,pt %XCC, 2f + + /* Compute abs((dst & 0x3f) - 0x40) into %g2. This is the number + * of bytes to copy to make 'dst' 64-byte aligned. We pre- + * subtract this from 'len'. + */ + sub %g2, 0x40, %g2 + sub %g0, %g2, %g2 + sub %o2, %g2, %o2 + + /* Copy %g2 bytes from src to dst, one byte at a time. */ +1: ldub [%o1 + 0x00], %o3 + add %o1, 0x1, %o1 + add %o0, 0x1, %o0 + subcc %g2, 0x1, %g2 + + bg,pt %XCC, 1b + stb %o3, [%o0 + -1] + +2: VISEntryHalf + and %o1, 0x7, %g1 + ba,pt %xcc, begin + alignaddr %o1, %g0, %o1 + + .align 64 +begin: + prefetch [%o1 + 0x000], #one_read + prefetch [%o1 + 0x040], #one_read + andn %o2, (0x40 - 1), %o4 + prefetch [%o1 + 0x080], #one_read + prefetch [%o1 + 0x0c0], #one_read + ldd [%o1 + 0x000], %f0 + prefetch [%o1 + 0x100], #one_read + ldd [%o1 + 0x008], %f2 + prefetch [%o1 + 0x140], #one_read + ldd [%o1 + 0x010], %f4 + prefetch [%o1 + 0x180], #one_read + faligndata %f0, %f2, %f16 + ldd [%o1 + 0x018], %f6 + faligndata %f2, %f4, %f18 + ldd [%o1 + 0x020], %f8 + faligndata %f4, %f6, %f20 + ldd [%o1 + 0x028], %f10 + faligndata %f6, %f8, %f22 + + ldd [%o1 + 0x030], %f12 + faligndata %f8, %f10, %f24 + ldd [%o1 + 0x038], %f14 + faligndata %f10, %f12, %f26 + ldd [%o1 + 0x040], %f0 + + sub %o4, 0x80, %o4 + add %o1, 0x40, %o1 + ba,pt %xcc, loop + srl %o4, 6, %o3 + + .align 64 +loop: + ldd [%o1 + 0x008], %f2 + faligndata %f12, %f14, %f28 + ldd [%o1 + 0x010], %f4 + faligndata %f14, %f0, %f30 + stda %f16, [%o0] ASI_BLK_P + ldd [%o1 + 0x018], %f6 + faligndata %f0, %f2, %f16 + + ldd [%o1 + 0x020], %f8 + faligndata %f2, %f4, %f18 + ldd [%o1 + 0x028], %f10 + faligndata %f4, %f6, %f20 + ldd [%o1 + 0x030], %f12 + faligndata %f6, %f8, %f22 + ldd [%o1 + 0x038], %f14 + faligndata %f8, %f10, %f24 + + ldd [%o1 + 0x040], %f0 + prefetch [%o1 + 0x180], #one_read + faligndata %f10, %f12, %f26 + subcc %o3, 0x01, %o3 + add %o1, 0x40, %o1 + bg,pt %XCC, loop + add %o0, 0x40, %o0 + + /* Finally we copy the last full 64-byte block. */ +loopfini: + ldd [%o1 + 0x008], %f2 + faligndata %f12, %f14, %f28 + ldd [%o1 + 0x010], %f4 + faligndata %f14, %f0, %f30 + stda %f16, [%o0] ASI_BLK_P + ldd [%o1 + 0x018], %f6 + faligndata %f0, %f2, %f16 + ldd [%o1 + 0x020], %f8 + faligndata %f2, %f4, %f18 + ldd [%o1 + 0x028], %f10 + faligndata %f4, %f6, %f20 + ldd [%o1 + 0x030], %f12 + faligndata %f6, %f8, %f22 + ldd [%o1 + 0x038], %f14 + faligndata %f8, %f10, %f24 + cmp %g1, 0 + be,pt %XCC, 1f + add %o0, 0x40, %o0 + ldd [%o1 + 0x040], %f0 +1: faligndata %f10, %f12, %f26 + faligndata %f12, %f14, %f28 + faligndata %f14, %f0, %f30 + stda %f16, [%o0] ASI_BLK_P + add %o0, 0x40, %o0 + add %o1, 0x40, %o1 + membar #Sync + + /* Now we copy the (len modulo 64) bytes at the end. + * Note how we borrow the %f0 loaded above. + * + * Also notice how this code is careful not to perform a + * load past the end of the src buffer. + */ +loopend: + and %o2, 0x3f, %o2 + andcc %o2, 0x38, %g2 + be,pn %XCC, endcruft + subcc %g2, 0x8, %g2 + be,pn %XCC, endcruft + cmp %g1, 0 + + be,a,pt %XCC, 1f + ldd [%o1 + 0x00], %f0 + +1: ldd [%o1 + 0x08], %f2 + add %o1, 0x8, %o1 + sub %o2, 0x8, %o2 + subcc %g2, 0x8, %g2 + faligndata %f0, %f2, %f8 + std %f8, [%o0 + 0x00] + be,pn %XCC, endcruft + add %o0, 0x8, %o0 + ldd [%o1 + 0x08], %f0 + add %o1, 0x8, %o1 + sub %o2, 0x8, %o2 + subcc %g2, 0x8, %g2 + faligndata %f2, %f0, %f8 + std %f8, [%o0 + 0x00] + bne,pn %XCC, 1b + add %o0, 0x8, %o0 + + /* If anything is left, we copy it one byte at a time. + * Note that %g1 is (src & 0x3) saved above before the + * alignaddr was performed. + */ +endcruft: + cmp %o2, 0 + add %o1, %g1, %o1 + VISExitHalf + be,pn %XCC, out + sub %o0, %o1, %o3 + + andcc %g1, 0x7, %g0 + bne,pn %icc, small_copy_unaligned + andcc %o2, 0x8, %g0 + be,pt %icc, 1f + nop + ldx [%o1], %o5 + stx %o5, [%o1 + %o3] + add %o1, 0x8, %o1 + +1: andcc %o2, 0x4, %g0 + be,pt %icc, 1f + nop + lduw [%o1], %o5 + stw %o5, [%o1 + %o3] + add %o1, 0x4, %o1 + +1: andcc %o2, 0x2, %g0 + be,pt %icc, 1f + nop + lduh [%o1], %o5 + sth %o5, [%o1 + %o3] + add %o1, 0x2, %o1 + +1: andcc %o2, 0x1, %g0 + be,pt %icc, out + nop + ldub [%o1], %o5 + ba,pt %xcc, out + stb %o5, [%o1 + %o3] + +medium_copy: /* 16 < len <= 64 */ + bne,pn %XCC, small_copy_unaligned + sub %o0, %o1, %o3 + +medium_copy_aligned: + andn %o2, 0x7, %o4 + and %o2, 0x7, %o2 +1: subcc %o4, 0x8, %o4 + ldx [%o1], %o5 + stx %o5, [%o1 + %o3] + bgu,pt %XCC, 1b + add %o1, 0x8, %o1 + andcc %o2, 0x4, %g0 + be,pt %XCC, 1f + nop + sub %o2, 0x4, %o2 + lduw [%o1], %o5 + stw %o5, [%o1 + %o3] + add %o1, 0x4, %o1 +1: cmp %o2, 0 + be,pt %XCC, out + nop + ba,pt %xcc, small_copy_unaligned + nop + +small_copy: /* 0 < len <= 16 */ + andcc %o3, 0x3, %g0 + bne,pn %XCC, small_copy_unaligned + sub %o0, %o1, %o3 + +small_copy_aligned: + subcc %o2, 4, %o2 + lduw [%o1], %g1 + stw %g1, [%o1 + %o3] + bgu,pt %XCC, small_copy_aligned + add %o1, 4, %o1 + +out: retl + mov %g5, %o0 + + .align 32 +small_copy_unaligned: + subcc %o2, 1, %o2 + ldub [%o1], %g1 + stb %g1, [%o1 + %o3] + bgu,pt %XCC, small_copy_unaligned + add %o1, 1, %o1 + retl + mov %g5, %o0 + +END(__memcpy_ultra3) + +#endif
\ No newline at end of file diff --git a/REORG.TODO/sysdeps/sparc/sparc64/multiarch/memcpy.S b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/memcpy.S new file mode 100644 index 0000000000..b6396eeae5 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/memcpy.S @@ -0,0 +1,167 @@ +/* Multiple versions of memcpy + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by David S. Miller (davem@davemloft.net) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#if IS_IN (libc) + .text +ENTRY(memcpy) + .type memcpy, @gnu_indirect_function +# ifdef SHARED + SETUP_PIC_REG_LEAF(o3, o5) +# endif + set HWCAP_SPARC_CRYPTO, %o1 + andcc %o0, %o1, %g0 + be 1f + andcc %o0, HWCAP_SPARC_N2, %g0 +# ifdef SHARED + sethi %gdop_hix22(__memcpy_niagara4), %o1 + xor %o1, %gdop_lox10(__memcpy_niagara4), %o1 +# else + set __memcpy_niagara4, %o1 +# endif + ba 10f + nop +1: be 1f + andcc %o0, HWCAP_SPARC_BLKINIT, %g0 +# ifdef SHARED + sethi %gdop_hix22(__memcpy_niagara2), %o1 + xor %o1, %gdop_lox10(__memcpy_niagara2), %o1 +# else + set __memcpy_niagara2, %o1 +# endif + ba 10f + nop +1: be 1f + andcc %o0, HWCAP_SPARC_ULTRA3, %g0 +# ifdef SHARED + sethi %gdop_hix22(__memcpy_niagara1), %o1 + xor %o1, %gdop_lox10(__memcpy_niagara1), %o1 +# else + set __memcpy_niagara1, %o1 +# endif + ba 10f + nop +1: be 9f + nop +# ifdef SHARED + sethi %gdop_hix22(__memcpy_ultra3), %o1 + xor %o1, %gdop_lox10(__memcpy_ultra3), %o1 +# else + set __memcpy_ultra3, %o1 +# endif + ba 10f + nop +9: +# ifdef SHARED + sethi %gdop_hix22(__memcpy_ultra1), %o1 + xor %o1, %gdop_lox10(__memcpy_ultra1), %o1 +# else + set __memcpy_ultra1, %o1 +# endif +10: +# ifdef SHARED + add %o3, %o1, %o1 +# endif + retl + mov %o1, %o0 +END(memcpy) + +ENTRY(__mempcpy) + .type __mempcpy, @gnu_indirect_function +# ifdef SHARED + SETUP_PIC_REG_LEAF(o3, o5) +# endif + set HWCAP_SPARC_CRYPTO, %o1 + andcc %o0, %o1, %g0 + be 1f + andcc %o0, HWCAP_SPARC_N2, %g0 +# ifdef SHARED + sethi %gdop_hix22(__mempcpy_niagara4), %o1 + xor %o1, %gdop_lox10(__mempcpy_niagara4), %o1 +# else + set __mempcpy_niagara4, %o1 +# endif + ba 10f + nop +1: be 1f + andcc %o0, HWCAP_SPARC_BLKINIT, %g0 +# ifdef SHARED + sethi %gdop_hix22(__mempcpy_niagara2), %o1 + xor %o1, %gdop_lox10(__mempcpy_niagara2), %o1 +# else + set __mempcpy_niagara2, %o1 +# endif + ba 10f + nop +1: be 1f + andcc %o0, HWCAP_SPARC_ULTRA3, %g0 +# ifdef SHARED + sethi %gdop_hix22(__mempcpy_niagara1), %o1 + xor %o1, %gdop_lox10(__mempcpy_niagara1), %o1 +# else + set __mempcpy_niagara1, %o1 +# endif + ba 10f + nop +1: be 9f + nop +# ifdef SHARED + sethi %gdop_hix22(__mempcpy_ultra3), %o1 + xor %o1, %gdop_lox10(__mempcpy_ultra3), %o1 +# else + set __mempcpy_ultra3, %o1 +# endif + ba 10f + nop +9: +# ifdef SHARED + sethi %gdop_hix22(__mempcpy_ultra1), %o1 + xor %o1, %gdop_lox10(__mempcpy_ultra1), %o1 +# else + set __mempcpy_ultra1, %o1 +# endif +10: +# ifdef SHARED + add %o3, %o1, %o1 +# endif + retl + mov %o1, %o0 +END(__mempcpy) + +libc_hidden_builtin_def (memcpy) + +libc_hidden_def (__mempcpy) +weak_alias (__mempcpy, mempcpy) +libc_hidden_builtin_def (mempcpy) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) +#undef weak_alias +#define weak_alias(x, y) +#undef libc_hidden_def +#define libc_hidden_def(name) + +#define memcpy __memcpy_ultra1 +#define __mempcpy __mempcpy_ultra1 + +#endif + +#include "../memcpy.S" diff --git a/REORG.TODO/sysdeps/sparc/sparc64/multiarch/memset-niagara1.S b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/memset-niagara1.S new file mode 100644 index 0000000000..45b2251691 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/memset-niagara1.S @@ -0,0 +1,177 @@ +/* Set a block of memory to some byte value. For SUN4V Niagara. + Copyright (C) 2006-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller (davem@davemloft.net) + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define ASI_BLK_INIT_QUAD_LDD_P 0xe2 +#define ASI_P 0x80 +#define ASI_PNF 0x82 + +#ifndef XCC +#define USE_BPR +#define XCC xcc +#endif + +#if IS_IN (libc) + + .register %g2,#scratch + + .text + .align 32 + +ENTRY(__memset_niagara1) + /* %o0=buf, %o1=pat, %o2=len */ + and %o1, 0xff, %o3 + mov %o2, %o1 + sllx %o3, 8, %g1 + or %g1, %o3, %o2 + sllx %o2, 16, %g1 + or %g1, %o2, %o2 + sllx %o2, 32, %g1 + ba,pt %XCC, 1f + or %g1, %o2, %o2 +END(__memset_niagara1) + +ENTRY(__bzero_niagara1) + clr %o2 +1: +# ifndef USE_BRP + srl %o1, 0, %o1 +# endif + brz,pn %o1, 90f + mov %o0, %o3 + + wr %g0, ASI_P, %asi + + cmp %o1, 15 + blu,pn %XCC, 70f + andcc %o0, 0x7, %g1 + be,pt %XCC, 2f + mov 8, %g2 + sub %g2, %g1, %g1 + sub %o1, %g1, %o1 +1: stba %o2, [%o0 + 0x00] %asi + subcc %g1, 1, %g1 + bne,pt %XCC, 1b + add %o0, 1, %o0 +2: cmp %o1, 128 + blu,pn %XCC, 60f + andcc %o0, (64 - 1), %g1 + be,pt %XCC, 40f + mov 64, %g2 + sub %g2, %g1, %g1 + sub %o1, %g1, %o1 +1: stxa %o2, [%o0 + 0x00] %asi + subcc %g1, 8, %g1 + bne,pt %XCC, 1b + add %o0, 8, %o0 + +40: + wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi + andn %o1, (64 - 1), %g1 + sub %o1, %g1, %o1 + + andn %g1, (256 - 1), %g2 + brz,pt %g2, 50f + and %g1, (256 - 1), %g1 + +45: + stxa %o2, [%o0 + 0x00] %asi + stxa %o2, [%o0 + 0x08] %asi + stxa %o2, [%o0 + 0x10] %asi + stxa %o2, [%o0 + 0x18] %asi + stxa %o2, [%o0 + 0x20] %asi + stxa %o2, [%o0 + 0x28] %asi + stxa %o2, [%o0 + 0x30] %asi + stxa %o2, [%o0 + 0x38] %asi + stxa %o2, [%o0 + 0x40] %asi + stxa %o2, [%o0 + 0x48] %asi + stxa %o2, [%o0 + 0x50] %asi + stxa %o2, [%o0 + 0x58] %asi + stxa %o2, [%o0 + 0x60] %asi + stxa %o2, [%o0 + 0x68] %asi + stxa %o2, [%o0 + 0x70] %asi + stxa %o2, [%o0 + 0x78] %asi + stxa %o2, [%o0 + 0x80] %asi + stxa %o2, [%o0 + 0x88] %asi + stxa %o2, [%o0 + 0x90] %asi + stxa %o2, [%o0 + 0x98] %asi + stxa %o2, [%o0 + 0xa0] %asi + stxa %o2, [%o0 + 0xa8] %asi + stxa %o2, [%o0 + 0xb0] %asi + stxa %o2, [%o0 + 0xb8] %asi + stxa %o2, [%o0 + 0xc0] %asi + stxa %o2, [%o0 + 0xc8] %asi + stxa %o2, [%o0 + 0xd0] %asi + stxa %o2, [%o0 + 0xd8] %asi + stxa %o2, [%o0 + 0xe0] %asi + stxa %o2, [%o0 + 0xe8] %asi + stxa %o2, [%o0 + 0xf0] %asi + stxa %o2, [%o0 + 0xf8] %asi + subcc %g2, 256, %g2 + bne,pt %XCC, 45b + add %o0, 256, %o0 + + brz,pn %g1, 55f + nop + +50: + stxa %o2, [%o0 + 0x00] %asi + stxa %o2, [%o0 + 0x08] %asi + stxa %o2, [%o0 + 0x10] %asi + stxa %o2, [%o0 + 0x18] %asi + stxa %o2, [%o0 + 0x20] %asi + stxa %o2, [%o0 + 0x28] %asi + stxa %o2, [%o0 + 0x30] %asi + stxa %o2, [%o0 + 0x38] %asi + subcc %g1, 64, %g1 + bne,pt %XCC, 50b + add %o0, 64, %o0 + +55: + wr %g0, ASI_P, %asi + brz,pn %o1, 80f +60: + andncc %o1, 0x7, %g1 + be,pn %XCC, 2f + sub %o1, %g1, %o1 +1: stxa %o2, [%o0 + 0x00] %asi + subcc %g1, 8, %g1 + bne,pt %XCC, 1b + add %o0, 8, %o0 +2: brz,pt %o1, 80f + nop + +70: +1: stba %o2, [%o0 + 0x00] %asi + subcc %o1, 1, %o1 + bne,pt %XCC, 1b + add %o0, 1, %o0 + + /* fallthrough */ + +80: + wr %g0, ASI_PNF, %asi + +90: + retl + mov %o3, %o0 +END(__bzero_niagara1) + +#endif diff --git a/REORG.TODO/sysdeps/sparc/sparc64/multiarch/memset-niagara4.S b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/memset-niagara4.S new file mode 100644 index 0000000000..c04a07a7f9 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/memset-niagara4.S @@ -0,0 +1,124 @@ +/* Set a block of memory to some byte value. For SUN4V Niagara-4. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller (davem@davemloft.net) + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define ASI_BLK_INIT_QUAD_LDD_P 0xe2 + +#if IS_IN (libc) + + .register %g2, #scratch + .register %g3, #scratch + + .text + .align 32 + +ENTRY(__memset_niagara4) + andcc %o1, 0xff, %o4 + be,pt %icc, 1f + mov %o2, %o1 + sllx %o4, 8, %g1 + or %g1, %o4, %o2 + sllx %o2, 16, %g1 + or %g1, %o2, %o2 + sllx %o2, 32, %g1 + ba,pt %icc, 1f + or %g1, %o2, %o4 +END(__memset_niagara4) + + .align 32 +ENTRY(__bzero_niagara4) + clr %o4 +1: cmp %o1, 16 + ble %icc, .Ltiny + mov %o0, %o3 + sub %g0, %o0, %g1 + and %g1, 0x7, %g1 + brz,pt %g1, .Laligned8 + sub %o1, %g1, %o1 +1: stb %o4, [%o0 + 0x00] + subcc %g1, 1, %g1 + bne,pt %icc, 1b + add %o0, 1, %o0 +.Laligned8: + cmp %o1, 64 + (64 - 8) + ble .Lmedium + sub %g0, %o0, %g1 + andcc %g1, (64 - 1), %g1 + brz,pn %g1, .Laligned64 + sub %o1, %g1, %o1 +1: stx %o4, [%o0 + 0x00] + subcc %g1, 8, %g1 + bne,pt %icc, 1b + add %o0, 0x8, %o0 +.Laligned64: + andn %o1, 64 - 1, %g1 + sub %o1, %g1, %o1 + brnz,pn %o4, .Lnon_bzero_loop + mov 0x20, %g2 +1: stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P + subcc %g1, 0x40, %g1 + stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P + bne,pt %icc, 1b + add %o0, 0x40, %o0 +.Lpostloop: + cmp %o1, 8 + bl,pn %icc, .Ltiny + membar #StoreStore|#StoreLoad +.Lmedium: + andn %o1, 0x7, %g1 + sub %o1, %g1, %o1 +1: stx %o4, [%o0 + 0x00] + subcc %g1, 0x8, %g1 + bne,pt %icc, 1b + add %o0, 0x08, %o0 + andcc %o1, 0x4, %g1 + be,pt %icc, .Ltiny + sub %o1, %g1, %o1 + stw %o4, [%o0 + 0x00] + add %o0, 0x4, %o0 +.Ltiny: + cmp %o1, 0 + be,pn %icc, .Lexit +1: subcc %o1, 1, %o1 + stb %o4, [%o0 + 0x00] + bne,pt %icc, 1b + add %o0, 1, %o0 +.Lexit: + retl + mov %o3, %o0 +.Lnon_bzero_loop: + mov 0x08, %g3 + mov 0x28, %o5 +1: stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P + subcc %g1, 0x40, %g1 + stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P + stxa %o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P + stxa %o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P + add %o0, 0x10, %o0 + stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P + stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P + stxa %o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P + stxa %o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P + bne,pt %icc, 1b + add %o0, 0x30, %o0 + ba,a,pt %icc, .Lpostloop +END(__bzero_niagara4) + +#endif diff --git a/REORG.TODO/sysdeps/sparc/sparc64/multiarch/memset.S b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/memset.S new file mode 100644 index 0000000000..9469d5e7ce --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/memset.S @@ -0,0 +1,124 @@ +/* Multiple versions of memset and bzero + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2010-2017 Free Software Foundation, Inc. + Contributed by David S. Miller (davem@davemloft.net) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#if IS_IN (libc) + .text +ENTRY(memset) + .type memset, @gnu_indirect_function +# ifdef SHARED + SETUP_PIC_REG_LEAF(o3, o5) +# endif + set HWCAP_SPARC_CRYPTO, %o1 + andcc %o0, %o1, %g0 + be 1f + andcc %o0, HWCAP_SPARC_BLKINIT, %g0 +# ifdef SHARED + sethi %gdop_hix22(__memset_niagara4), %o1 + xor %o1, %gdop_lox10(__memset_niagara4), %o1 +# else + set __memset_niagara4, %o1 +# endif + ba 10f + nop +1: be 9f + nop +# ifdef SHARED + sethi %gdop_hix22(__memset_niagara1), %o1 + xor %o1, %gdop_lox10(__memset_niagara1), %o1 +# else + set __memset_niagara1, %o1 +# endif + ba 10f + nop +9: +# ifdef SHARED + sethi %gdop_hix22(__memset_ultra1), %o1 + xor %o1, %gdop_lox10(__memset_ultra1), %o1 +# else + set __memset_ultra1, %o1 +# endif +10: +# ifdef SHARED + add %o3, %o1, %o1 +# endif + retl + mov %o1, %o0 +END(memset) + +ENTRY(__bzero) + .type bzero, @gnu_indirect_function +# ifdef SHARED + SETUP_PIC_REG_LEAF(o3, o5) +# endif + set HWCAP_SPARC_CRYPTO, %o1 + andcc %o0, %o1, %g0 + be 1f + andcc %o0, HWCAP_SPARC_BLKINIT, %g0 +# ifdef SHARED + sethi %gdop_hix22(__bzero_niagara4), %o1 + xor %o1, %gdop_lox10(__bzero_niagara4), %o1 +# else + set __bzero_niagara4, %o1 +# endif + ba 10f + nop +1: be 9f + nop +# ifdef SHARED + sethi %gdop_hix22(__bzero_niagara1), %o1 + xor %o1, %gdop_lox10(__bzero_niagara1), %o1 +# else + set __bzero_niagara1, %o1 +# endif + ba 10f + nop +9: +# ifdef SHARED + sethi %gdop_hix22(__bzero_ultra1), %o1 + xor %o1, %gdop_lox10(__bzero_ultra1), %o1 +# else + set __bzero_ultra1, %o1 +# endif +10: +# ifdef SHARED + add %o3, %o1, %o1 +# endif + retl + mov %o1, %o0 +END(__bzero) + +weak_alias (__bzero, bzero) + +# undef weak_alias +# define weak_alias(a, b) + +libc_hidden_builtin_def (memset) + +#undef libc_hidden_builtin_def +#define libc_hidden_builtin_def(name) + +#define memset __memset_ultra1 +#define __bzero __bzero_ultra1 + +#endif + +#include "../memset.S" diff --git a/REORG.TODO/sysdeps/sparc/sparc64/multiarch/mul_1-vis3.S b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/mul_1-vis3.S new file mode 100644 index 0000000000..7c4fa49ce4 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/mul_1-vis3.S @@ -0,0 +1,73 @@ +! SPARC v9 64-bit VIS3 __mpn_mul_1 -- Multiply a limb vector with a single +! limb and store the product in a second limb vector. +! +! Copyright (C) 2013-2017 Free Software Foundation, Inc. +! This file is part of the GNU C Library. +! Contributed by David S. Miller <davem@davemloft.net> +! +! The GNU C Library is free software; you can redistribute it and/or +! modify it under the terms of the GNU Lesser General Public +! License as published by the Free Software Foundation; either +! version 2.1 of the License, or (at your option) any later version. +! +! The GNU C Library is distributed in the hope that it will be useful, +! but WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +! Lesser General Public License for more details. +! +! You should have received a copy of the GNU Lesser General Public +! License along with the GNU C Library; if not, see +! <http://www.gnu.org/licenses/>. + +#include <sysdep.h> + +#define res_ptr %o0 +#define s1_ptr %o1 +#define sz %o2 +#define s2_limb %o3 +#define carry %o5 +#define tmp1 %g1 +#define tmp2 %g2 +#define tmp3 %g3 +#define tmp4 %o4 + + .register %g2,#scratch + .register %g3,#scratch +ENTRY(__mpn_mul_1_vis3) + subcc sz, 1, sz + be .Lfinal_limb + clr carry + +.Lloop: + ldx [s1_ptr + 0x00], tmp1 + ldx [s1_ptr + 0x08], tmp4 + mulx tmp1, s2_limb, tmp3 + add s1_ptr, 0x10, s1_ptr + umulxhi tmp1, s2_limb, tmp2 + sub sz, 2, sz + mulx tmp4, s2_limb, tmp1 + add res_ptr, 0x10, res_ptr + umulxhi tmp4, s2_limb, tmp4 + addcc carry, tmp3, tmp3 + stx tmp3, [res_ptr - 0x10] + addxc %g0, tmp2, carry + addcc carry, tmp1, tmp1 + addxc %g0, tmp4, carry + brgz sz, .Lloop + stx tmp1, [res_ptr - 0x08] + + brlz,pt sz, .Lfinish + nop + +.Lfinal_limb: + ldx [s1_ptr + 0x00], tmp1 + mulx tmp1, s2_limb, tmp3 + umulxhi tmp1, s2_limb, tmp2 + addcc carry, tmp3, tmp3 + addxc %g0, tmp2, carry + stx tmp3, [res_ptr + 0x00] + +.Lfinish: + retl + mov carry, %o0 +END(__mpn_mul_1_vis3) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/multiarch/mul_1.S b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/mul_1.S new file mode 100644 index 0000000000..75fca932b7 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/mul_1.S @@ -0,0 +1,56 @@ +/* Multiple versions of mul_1 + + Copyright (C) 2013-2017 Free Software Foundation, Inc. + Contributed by David S. Miller (davem@davemloft.net) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY(__mpn_mul_1) + .type __mpn_mul_1, @gnu_indirect_function +# ifdef SHARED + SETUP_PIC_REG_LEAF(o3, o5) +# endif + set HWCAP_SPARC_VIS3, %o1 + andcc %o0, %o1, %g0 + be 1f + nop +# ifdef SHARED + sethi %gdop_hix22(__mpn_mul_1_vis3), %o1 + xor %o1, %gdop_lox10(__mpn_mul_1_vis3), %o1 +# else + set __mpn_mul_1_vis3, %o1 +# endif + ba 10f + nop +1: +# ifdef SHARED + sethi %gdop_hix22(__mpn_mul_1_generic), %o1 + xor %o1, %gdop_lox10(__mpn_mul_1_generic), %o1 +# else + set __mpn_mul_1_generic, %o1 +# endif +10: +# ifdef SHARED + add %o3, %o1, %o1 +# endif + retl + mov %o1, %o0 +END(__mpn_mul_1) + +#define __mpn_mul_1 __mpn_mul_1_generic +#include "../mul_1.S" diff --git a/REORG.TODO/sysdeps/sparc/sparc64/multiarch/rtld-memcpy.c b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/rtld-memcpy.c new file mode 100644 index 0000000000..2452575343 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/rtld-memcpy.c @@ -0,0 +1 @@ +#include "../rtld-memcpy.c" diff --git a/REORG.TODO/sysdeps/sparc/sparc64/multiarch/rtld-memset.c b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/rtld-memset.c new file mode 100644 index 0000000000..c01eb0beb9 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/rtld-memset.c @@ -0,0 +1 @@ +#include "../rtld-memset.c" diff --git a/REORG.TODO/sysdeps/sparc/sparc64/multiarch/sha256-block.c b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/sha256-block.c new file mode 100644 index 0000000000..9d65315a5a --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/sha256-block.c @@ -0,0 +1,32 @@ +#include <sparc-ifunc.h> + +#define __sha256_process_block __sha256_process_block_generic +extern void __sha256_process_block_generic (const void *buffer, size_t len, + struct sha256_ctx *ctx); + +#include <crypt/sha256-block.c> + +#undef __sha256_process_block + +extern void __sha256_process_block_crop (const void *buffer, size_t len, + struct sha256_ctx *ctx); + +static bool cpu_supports_sha256(int hwcap) +{ + unsigned long cfr; + + if (!(hwcap & HWCAP_SPARC_CRYPTO)) + return false; + + __asm__ ("rd %%asr26, %0" : "=r" (cfr)); + if (cfr & (1 << 6)) + return true; + + return false; +} + +extern void __sha256_process_block (const void *buffer, size_t len, + struct sha256_ctx *ctx); +sparc_libc_ifunc (__sha256_process_block, + cpu_supports_sha256(hwcap) ? __sha256_process_block_crop + : __sha256_process_block_generic); diff --git a/REORG.TODO/sysdeps/sparc/sparc64/multiarch/sha256-crop.S b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/sha256-crop.S new file mode 100644 index 0000000000..8f07e4245a --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/sha256-crop.S @@ -0,0 +1,101 @@ +/* SHA256 using sparc crypto opcodes. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller (davem@davemloft.net) + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define SHA256 \ + .word 0x81b02840; + + .text + .align 32 +ENTRY(__sha256_process_block_crop) + /* %o0=buffer, %o1=len, %o2=CTX */ + ldx [%o2 + 0x20], %g1 + add %g1, %o1, %g1 + stx %g1, [%o2 + 0x20] + + ld [%o2 + 0x00], %f0 + ld [%o2 + 0x04], %f1 + ld [%o2 + 0x08], %f2 + ld [%o2 + 0x0c], %f3 + ld [%o2 + 0x10], %f4 + ld [%o2 + 0x14], %f5 + andcc %o1, 0x7, %g0 + ld [%o2 + 0x18], %f6 + bne,pn %xcc, 10f + ld [%o2 + 0x1c], %f7 + +1: + ldd [%o0 + 0x00], %f8 + ldd [%o0 + 0x08], %f10 + ldd [%o0 + 0x10], %f12 + ldd [%o0 + 0x18], %f14 + ldd [%o0 + 0x20], %f16 + ldd [%o0 + 0x28], %f18 + ldd [%o0 + 0x30], %f20 + ldd [%o0 + 0x38], %f22 + + SHA256 + + subcc %o1, 0x40, %o1 + bne,pt %xcc, 1b + add %o0, 0x40, %o0 + +5: + st %f0, [%o2 + 0x00] + st %f1, [%o2 + 0x04] + st %f2, [%o2 + 0x08] + st %f3, [%o2 + 0x0c] + st %f4, [%o2 + 0x10] + st %f5, [%o2 + 0x14] + st %f6, [%o2 + 0x18] + retl + st %f7, [%o2 + 0x1c] +10: + alignaddr %o0, %g0, %o0 + + ldd [%o0 + 0x00], %f10 +1: + ldd [%o0 + 0x08], %f12 + ldd [%o0 + 0x10], %f14 + ldd [%o0 + 0x18], %f16 + ldd [%o0 + 0x20], %f18 + ldd [%o0 + 0x28], %f20 + ldd [%o0 + 0x30], %f22 + ldd [%o0 + 0x38], %f24 + ldd [%o0 + 0x40], %f26 + + faligndata %f10, %f12, %f8 + faligndata %f12, %f14, %f10 + faligndata %f14, %f16, %f12 + faligndata %f16, %f18, %f14 + faligndata %f18, %f20, %f16 + faligndata %f20, %f22, %f18 + faligndata %f22, %f24, %f20 + faligndata %f24, %f26, %f22 + + SHA256 + + subcc %o1, 0x40, %o1 + fsrc2 %f26, %f10 + bne,pt %xcc, 1b + add %o0, 0x40, %o0 + + ba,a,pt %xcc, 5b +END(__sha256_process_block_crop) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/multiarch/sha512-block.c b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/sha512-block.c new file mode 100644 index 0000000000..2863e05d09 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/sha512-block.c @@ -0,0 +1,32 @@ +#include <sparc-ifunc.h> + +#define __sha512_process_block __sha512_process_block_generic +extern void __sha512_process_block_generic (const void *buffer, size_t len, + struct sha512_ctx *ctx); + +#include <crypt/sha512-block.c> + +#undef __sha512_process_block + +extern void __sha512_process_block_crop (const void *buffer, size_t len, + struct sha512_ctx *ctx); + +static bool cpu_supports_sha512(int hwcap) +{ + unsigned long cfr; + + if (!(hwcap & HWCAP_SPARC_CRYPTO)) + return false; + + __asm__ ("rd %%asr26, %0" : "=r" (cfr)); + if (cfr & (1 << 6)) + return true; + + return false; +} + +extern void __sha512_process_block (const void *buffer, size_t len, + struct sha512_ctx *ctx); +sparc_libc_ifunc (__sha512_process_block, + cpu_supports_sha512(hwcap) ? __sha512_process_block_crop + : __sha512_process_block_generic); diff --git a/REORG.TODO/sysdeps/sparc/sparc64/multiarch/sha512-crop.S b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/sha512-crop.S new file mode 100644 index 0000000000..f78354c485 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/sha512-crop.S @@ -0,0 +1,131 @@ +/* SHA512 using sparc crypto opcodes. + Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller (davem@davemloft.net) + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +#define SHA512 \ + .word 0x81b02860; + + .text + .align 32 +ENTRY(__sha512_process_block_crop) + /* %o0=buffer, %o1=len, %o2=CTX */ + ldx [%o2 + 0x48], %g1 + add %g1, %o1, %o4 + stx %o4, [%o2 + 0x48] + cmp %o4, %g1 + bgeu,pt %xcc, 1f + nop + ldx [%o2 + 0x40], %g1 + add %g1, 1, %g1 + stx %g1, [%o2 + 0x40] + +1: ldd [%o2 + 0x00], %f0 + ldd [%o2 + 0x08], %f2 + ldd [%o2 + 0x10], %f4 + ldd [%o2 + 0x18], %f6 + ldd [%o2 + 0x20], %f8 + ldd [%o2 + 0x28], %f10 + andcc %o1, 0x7, %g0 + ldd [%o2 + 0x30], %f12 + bne,pn %xcc, 10f + ldd [%o2 + 0x38], %f14 + +1: + ldd [%o0 + 0x00], %f16 + ldd [%o0 + 0x08], %f18 + ldd [%o0 + 0x10], %f20 + ldd [%o0 + 0x18], %f22 + ldd [%o0 + 0x20], %f24 + ldd [%o0 + 0x28], %f26 + ldd [%o0 + 0x30], %f28 + ldd [%o0 + 0x38], %f30 + ldd [%o0 + 0x40], %f32 + ldd [%o0 + 0x48], %f34 + ldd [%o0 + 0x50], %f36 + ldd [%o0 + 0x58], %f38 + ldd [%o0 + 0x60], %f40 + ldd [%o0 + 0x68], %f42 + ldd [%o0 + 0x70], %f44 + ldd [%o0 + 0x78], %f46 + + SHA512 + + subcc %o1, 0x80, %o1 + bne,pt %xcc, 1b + add %o0, 0x80, %o0 + +5: + std %f0, [%o2 + 0x00] + std %f2, [%o2 + 0x08] + std %f4, [%o2 + 0x10] + std %f6, [%o2 + 0x18] + std %f8, [%o2 + 0x20] + std %f10, [%o2 + 0x28] + std %f12, [%o2 + 0x30] + retl + std %f14, [%o2 + 0x38] +10: + alignaddr %o0, %g0, %o0 + + ldd [%o0 + 0x00], %f18 +1: + ldd [%o0 + 0x08], %f20 + ldd [%o0 + 0x10], %f22 + ldd [%o0 + 0x18], %f24 + ldd [%o0 + 0x20], %f26 + ldd [%o0 + 0x28], %f28 + ldd [%o0 + 0x30], %f30 + ldd [%o0 + 0x38], %f32 + ldd [%o0 + 0x40], %f34 + ldd [%o0 + 0x48], %f36 + ldd [%o0 + 0x50], %f38 + ldd [%o0 + 0x58], %f40 + ldd [%o0 + 0x60], %f42 + ldd [%o0 + 0x68], %f44 + ldd [%o0 + 0x70], %f46 + ldd [%o0 + 0x78], %f48 + ldd [%o0 + 0x80], %f50 + + faligndata %f18, %f20, %f16 + faligndata %f20, %f22, %f18 + faligndata %f22, %f24, %f20 + faligndata %f24, %f26, %f22 + faligndata %f26, %f28, %f24 + faligndata %f28, %f30, %f26 + faligndata %f30, %f32, %f28 + faligndata %f32, %f34, %f30 + faligndata %f34, %f36, %f32 + faligndata %f36, %f38, %f34 + faligndata %f38, %f40, %f36 + faligndata %f40, %f42, %f38 + faligndata %f42, %f44, %f40 + faligndata %f44, %f46, %f42 + faligndata %f46, %f48, %f44 + faligndata %f48, %f50, %f46 + + SHA512 + + subcc %o1, 0x80, %o1 + fsrc2 %f50, %f18 + bne,pt %xcc, 1b + add %o0, 0x80, %o0 + + ba,a,pt %xcc, 5b +END(__sha512_process_block_crop) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/multiarch/sub_n-vis3.S b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/sub_n-vis3.S new file mode 100644 index 0000000000..2d2a75dff8 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/sub_n-vis3.S @@ -0,0 +1,71 @@ +! SPARC v9 64-bit VIS3 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 +! and store difference in a third limb vector. +! +! Copyright (C) 2013-2017 Free Software Foundation, Inc. +! This file is part of the GNU C Library. +! Contributed by David S. Miller <davem@davemloft.net> +! +! The GNU C Library is free software; you can redistribute it and/or +! modify it under the terms of the GNU Lesser General Public +! License as published by the Free Software Foundation; either +! version 2.1 of the License, or (at your option) any later version. +! +! The GNU C Library is distributed in the hope that it will be useful, +! but WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +! Lesser General Public License for more details. +! +! You should have received a copy of the GNU Lesser General Public +! License along with the GNU C Library; if not, see +! <http://www.gnu.org/licenses/>. + +#include <sysdep.h> + +#define res_ptr %o0 +#define s1_ptr %o1 +#define s2_ptr %o2 +#define sz %o3 +#define tmp1 %g1 +#define tmp2 %g2 +#define tmp3 %g3 +#define tmp4 %o4 + + .register %g2,#scratch + .register %g3,#scratch +ENTRY(__mpn_sub_n_vis3) + subcc sz, 1, sz + be .Lfinal_limb + cmp %g0, 1 + +.Lloop: + ldx [s2_ptr + 0x00], tmp1 + add s2_ptr, 0x10, s2_ptr + ldx [s1_ptr + 0x00], tmp2 + add s1_ptr, 0x10, s1_ptr + ldx [s2_ptr - 0x08], tmp3 + add res_ptr, 0x10, res_ptr + ldx [s1_ptr - 0x08], tmp4 + sub sz, 2, sz + xnor tmp1, %g0, tmp1 + addxccc tmp1, tmp2, tmp1 + stx tmp1, [res_ptr - 0x10] + xnor tmp3, %g0, tmp3 + addxccc tmp3, tmp4, tmp3 + brgz sz, .Lloop + stx tmp3, [res_ptr - 0x08] + + brlz,pt sz, .Lfinish + nop + +.Lfinal_limb: + ldx [s2_ptr + 0x00], tmp1 + ldx [s1_ptr + 0x00], tmp2 + xnor tmp1, %g0, tmp1 + addxccc tmp1, tmp2, tmp1 + stx tmp1, [res_ptr + 0x00] + +.Lfinish: + clr %o0 + retl + movcc %xcc, 1, %o0 +END(__mpn_sub_n_vis3) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/multiarch/sub_n.S b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/sub_n.S new file mode 100644 index 0000000000..d20a286df1 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/sub_n.S @@ -0,0 +1,56 @@ +/* Multiple versions of sub_n + + Copyright (C) 2013-2017 Free Software Foundation, Inc. + Contributed by David S. Miller (davem@davemloft.net) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY(__mpn_sub_n) + .type __mpn_sub_n, @gnu_indirect_function +# ifdef SHARED + SETUP_PIC_REG_LEAF(o3, o5) +# endif + set HWCAP_SPARC_VIS3, %o1 + andcc %o0, %o1, %g0 + be 1f + nop +# ifdef SHARED + sethi %gdop_hix22(__mpn_sub_n_vis3), %o1 + xor %o1, %gdop_lox10(__mpn_sub_n_vis3), %o1 +# else + set __mpn_sub_n_vis3, %o1 +# endif + ba 10f + nop +1: +# ifdef SHARED + sethi %gdop_hix22(__mpn_sub_n_generic), %o1 + xor %o1, %gdop_lox10(__mpn_sub_n_generic), %o1 +# else + set __mpn_sub_n_generic, %o1 +# endif +10: +# ifdef SHARED + add %o3, %o1, %o1 +# endif + retl + mov %o1, %o0 +END(__mpn_sub_n) + +#define __mpn_sub_n __mpn_sub_n_generic +#include "../sub_n.S" diff --git a/REORG.TODO/sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S new file mode 100644 index 0000000000..99644491e7 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/submul_1-vis3.S @@ -0,0 +1,87 @@ +! SPARC v9 64-bit VIS3 __mpn_submul_1 -- Multiply a limb vector with a +! limb and subtract the result from a second limb vector. +! +! Copyright (C) 2013-2017 Free Software Foundation, Inc. +! This file is part of the GNU C Library. +! Contributed by David S. Miller <davem@davemloft.net> +! +! The GNU C Library is free software; you can redistribute it and/or +! modify it under the terms of the GNU Lesser General Public +! License as published by the Free Software Foundation; either +! version 2.1 of the License, or (at your option) any later version. +! +! The GNU C Library is distributed in the hope that it will be useful, +! but WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +! Lesser General Public License for more details. +! +! You should have received a copy of the GNU Lesser General Public +! License along with the GNU C Library; if not, see +! <http://www.gnu.org/licenses/>. + +#include <sysdep.h> + +#define res_ptr %i0 +#define s1_ptr %i1 +#define sz %i2 +#define s2_limb %i3 +#define carry %o5 +#define tmp1 %g1 +#define tmp2 %g2 +#define tmp3 %g3 +#define tmp4 %o4 +#define tmp5 %l0 +#define tmp6 %l1 +#define tmp7 %l2 +#define tmp8 %l3 + + .register %g2,#scratch + .register %g3,#scratch +ENTRY(__mpn_submul_1_vis3) + save %sp, -176, %sp + subcc sz, 1, sz + be .Lfinal_limb + clr carry + +.Lloop: + ldx [s1_ptr + 0x00], tmp1 + ldx [res_ptr + 0x00], tmp3 + ldx [s1_ptr + 0x08], tmp2 + ldx [res_ptr + 0x08], tmp4 + mulx tmp1, s2_limb, tmp5 + add s1_ptr, 0x10, s1_ptr + umulxhi tmp1, s2_limb, tmp6 + add res_ptr, 0x10, res_ptr + mulx tmp2, s2_limb, tmp7 + sub sz, 2, sz + umulxhi tmp2, s2_limb, tmp8 + addcc carry, tmp5, tmp5 + addxc %g0, tmp6, carry + subcc tmp3, tmp5, tmp5 + addxc %g0, carry, carry + stx tmp5, [res_ptr - 0x10] + addcc carry, tmp7, tmp7 + addxc %g0, tmp8, carry + subcc tmp4, tmp7, tmp7 + addxc %g0, carry, carry + brgz sz, .Lloop + stx tmp7, [res_ptr - 0x08] + + brlz,pt sz, .Lfinish + nop + +.Lfinal_limb: + ldx [s1_ptr + 0x00], tmp1 + ldx [res_ptr + 0x00], tmp3 + mulx tmp1, s2_limb, tmp5 + umulxhi tmp1, s2_limb, tmp6 + addcc carry, tmp5, tmp5 + addxc %g0, tmp6, carry + subcc tmp3, tmp5, tmp5 + addxc %g0, carry, carry + stx tmp5, [res_ptr + 0x00] + +.Lfinish: + jmpl %i7 + 8, %g0 + restore carry, 0, %o0 +END(__mpn_submul_1_vis3) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/multiarch/submul_1.S b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/submul_1.S new file mode 100644 index 0000000000..3c297d989b --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/multiarch/submul_1.S @@ -0,0 +1,56 @@ +/* Multiple versions of submul_1 + + Copyright (C) 2013-2017 Free Software Foundation, Inc. + Contributed by David S. Miller (davem@davemloft.net) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY(__mpn_submul_1) + .type __mpn_submul_1, @gnu_indirect_function +# ifdef SHARED + SETUP_PIC_REG_LEAF(o3, o5) +# endif + set HWCAP_SPARC_VIS3, %o1 + andcc %o0, %o1, %g0 + be 1f + nop +# ifdef SHARED + sethi %gdop_hix22(__mpn_submul_1_vis3), %o1 + xor %o1, %gdop_lox10(__mpn_submul_1_vis3), %o1 +# else + set __mpn_submul_1_vis3, %o1 +# endif + ba 10f + nop +1: +# ifdef SHARED + sethi %gdop_hix22(__mpn_submul_1_generic), %o1 + xor %o1, %gdop_lox10(__mpn_submul_1_generic), %o1 +# else + set __mpn_submul_1_generic, %o1 +# endif +10: +# ifdef SHARED + add %o3, %o1, %o1 +# endif + retl + mov %o1, %o0 +END(__mpn_submul_1) + +#define __mpn_submul_1 __mpn_submul_1_generic +#include "../submul_1.S" diff --git a/REORG.TODO/sysdeps/sparc/sparc64/pthread_spin_init.c b/REORG.TODO/sysdeps/sparc/sparc64/pthread_spin_init.c new file mode 100644 index 0000000000..58319ab62d --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/pthread_spin_init.c @@ -0,0 +1 @@ +/* pthread_spin_init is in pthread_spin_unlock.S */ diff --git a/REORG.TODO/sysdeps/sparc/sparc64/pthread_spin_lock.S b/REORG.TODO/sysdeps/sparc/sparc64/pthread_spin_lock.S new file mode 100644 index 0000000000..0f41ccf4d5 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/pthread_spin_lock.S @@ -0,0 +1,31 @@ +/* Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .text +ENTRY(pthread_spin_lock) +1: ldstub [%o0], %g1 + brnz,pn %g1, 2f + membar #StoreLoad | #StoreStore + retl + mov 0, %o0 +2: ldub [%o0], %g1 + brnz,pt %g1, 2b + membar #LoadLoad + ba,a,pt %xcc, 1b +END(pthread_spin_lock) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/pthread_spin_trylock.S b/REORG.TODO/sysdeps/sparc/sparc64/pthread_spin_trylock.S new file mode 100644 index 0000000000..fa05cf8fdb --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/pthread_spin_trylock.S @@ -0,0 +1,27 @@ +/* Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <pthread-errnos.h> + + .text +ENTRY(pthread_spin_trylock) + ldstub [%o0], %o0 + membar #StoreLoad | #StoreStore + retl + movrnz %o0, EBUSY, %o0 +END(pthread_spin_trylock) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/pthread_spin_unlock.S b/REORG.TODO/sysdeps/sparc/sparc64/pthread_spin_unlock.S new file mode 100644 index 0000000000..c82f05d538 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/pthread_spin_unlock.S @@ -0,0 +1,28 @@ +/* Copyright (C) 2012-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .text +ENTRY(pthread_spin_unlock) + membar #StoreStore | #LoadStore + stb %g0, [%o0] + retl + clr %o0 +END(pthread_spin_unlock) + +strong_alias (pthread_spin_unlock, pthread_spin_init) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/pthreaddef.h b/REORG.TODO/sysdeps/sparc/sparc64/pthreaddef.h new file mode 100644 index 0000000000..df81791a65 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/pthreaddef.h @@ -0,0 +1,33 @@ +/* Copyright (C) 2003-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* Default stack size. */ +#define ARCH_STACK_DEFAULT_SIZE (4 * 1024 * 1024) + +/* Required stack pointer alignment at beginning. */ +#define STACK_ALIGN 16 + +/* Minimal stack size after allocating thread descriptor and guard size. */ +#define MINIMAL_REST_STACK 4096 + +/* Alignment requirement for TCB. */ +#define TCB_ALIGNMENT 16 + + +/* Location of current stack frame. */ +#define CURRENT_STACK_FRAME (stack_pointer + (2 * 128)) +register char *stack_pointer __asm__("%sp"); diff --git a/REORG.TODO/sysdeps/sparc/sparc64/rawmemchr.S b/REORG.TODO/sysdeps/sparc/sparc64/rawmemchr.S new file mode 100644 index 0000000000..d3e7b5d4b4 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/rawmemchr.S @@ -0,0 +1,178 @@ +/* rawmemchr (str, ch) -- Return pointer to first occurrence of CH in STR. + For SPARC v9. + Copyright (C) 1999-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek <jj@ultra.linux.cz>. + This version is developed using the same algorithm as the fast C + version which carries the following introduction: + Based on strlen implementation by Torbjorn Granlund (tege@sics.se), + with help from Dan Sahlin (dan@sics.se) and + commentary by Jim Blandy (jimb@ai.mit.edu); + adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu), + and implemented by Roland McGrath (roland@ai.mit.edu). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <asm/asi.h> +#ifndef XCC +#define XCC xcc +#define USE_BPR + .register %g2, #scratch + .register %g3, #scratch +#endif + + /* Normally, this uses + ((xword - 0x0101010101010101) & 0x8080808080808080) test + to find out if any byte in xword could be zero. This is fast, but + also gives false alarm for any byte in range 0x81-0xff. It does + not matter for correctness, as if this test tells us there could + be some zero byte, we check it byte by byte, but if bytes with + high bits set are common in the strings, then this will give poor + performance. You can #define EIGHTBIT_NOT_RARE and the algorithm + will use one tick slower, but more precise test + ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080), + which does not give any false alarms (but if some bits are set, + one cannot assume from it which bytes are zero and which are not). + It is yet to be measured, what is the correct default for glibc + in these days for an average user. + */ + + .text + .align 32 +ENTRY(__rawmemchr) + and %o1, 0xff, %o1 /* IEU0 Group */ + sethi %hi(0x01010101), %g1 /* IEU1 */ + ldub [%o0], %o3 /* Load */ + sll %o1, 8, %o4 /* IEU0 Group */ + + or %g1, %lo(0x01010101), %g1 /* IEU1 */ + sllx %g1, 32, %g2 /* IEU0 Group */ + or %o4, %o1, %o4 /* IEU1 */ + andcc %o0, 7, %g0 /* IEU1 Group */ + + sll %o4, 16, %g5 /* IEU0 */ + or %o4, %g5, %o4 /* IEU0 Group */ + or %g1, %g2, %g1 /* IEU1 */ + bne,pn %icc, 32f /* CTI */ + + sllx %o4, 32, %g5 /* IEU0 Group */ + cmp %o3, %o1 /* IEU1 */ + be,pn %icc, 30f /* CTI */ + sllx %g1, 7, %g2 /* IEU0 Group */ + +18: ldx [%o0], %o3 /* Load */ + or %o4, %g5, %o4 /* IEU1 */ + add %o0, 8, %o0 /* IEU0 Group */ +19: xor %o3, %o4, %o3 /* IEU0 Group */ + + sub %o3, %g1, %o2 /* IEU0 Group */ +#ifdef EIGHTBIT_NOT_RARE + andn %o2, %o3, %o5 /* IEU0 Group */ + ldxa [%o0] ASI_PNF, %o3 /* Load */ + andcc %o5, %g2, %g0 /* IEU1 Group */ +#else + ldxa [%o0] ASI_PNF, %o3 /* Load */ + andcc %o2, %g2, %g0 /* IEU1 Group */ +#endif + be,pt %xcc, 19b /* CTI */ + + add %o0, 8, %o0 /* IEU0 */ + addcc %o2, %g1, %g3 /* IEU1 Group */ + srlx %o2, 32, %o2 /* IEU0 */ +20: andcc %o2, %g2, %g0 /* IEU1 Group */ + + be,pn %xcc, 21f /* CTI */ + srlx %g3, 56, %o2 /* IEU0 */ + andcc %o2, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 29f /* CTI */ + + srlx %g3, 48, %o2 /* IEU0 */ + andcc %o2, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 28f /* CTI */ + srlx %g3, 40, %o2 /* IEU0 */ + + andcc %o2, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 27f /* CTI */ + srlx %g3, 32, %o2 /* IEU0 */ + andcc %o2, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 26f /* CTI */ +21: srlx %g3, 24, %o2 /* IEU0 */ + andcc %o2, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 25f /* CTI */ + + srlx %g3, 16, %o2 /* IEU0 */ + andcc %o2, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 24f /* CTI */ + srlx %g3, 8, %o2 /* IEU0 */ + + andcc %o2, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 23f /* CTI */ + xor %o3, %o4, %o3 /* IEU0 */ + andcc %g3, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 22f /* CTI */ + sub %o3, %g1, %o2 /* IEU0 */ + ldxa [%o0] ASI_PNF, %o3 /* Load */ + andcc %o2, %g2, %g0 /* IEU1 Group */ + + be,pt %xcc, 19b /* CTI */ + add %o0, 8, %o0 /* IEU0 */ + addcc %o2, %g1, %g3 /* IEU1 Group */ + ba,pt %xcc, 20b /* CTI */ + + srlx %o2, 32, %o2 /* IEU0 */ + + .align 16 +22: retl /* CTI+IEU1 Group */ + add %o0, -9, %o0 /* IEU0 */ +23: retl /* CTI+IEU1 Group */ + add %o0, -10, %o0 /* IEU0 */ + +24: retl /* CTI+IEU1 Group */ + add %o0, -11, %o0 /* IEU0 */ +25: retl /* CTI+IEU1 Group */ + add %o0, -12, %o0 /* IEU0 */ + +26: retl /* CTI+IEU1 Group */ + add %o0, -13, %o0 /* IEU0 */ +27: retl /* CTI+IEU1 Group */ + add %o0, -14, %o0 /* IEU0 */ + +28: retl /* CTI+IEU1 Group */ + add %o0, -15, %o0 /* IEU0 */ +29: retl /* CTI+IEU1 Group */ + add %o0, -16, %o0 /* IEU0 */ + +30: retl /* CTI+IEU1 Group */ + nop /* IEU0 */ + + .align 16 +32: andcc %o0, 7, %g0 /* IEU1 Group */ + be,a,pn %icc, 18b /* CTI */ + sllx %g1, 7, %g2 /* IEU0 */ + add %o0, 1, %o0 /* IEU0 Group */ + + cmp %o3, %o1 /* IEU1 */ + bne,a,pt %icc, 32b /* CTI */ + lduba [%o0] ASI_PNF, %o3 /* Load */ + retl /* CTI+IEU1 Group */ + + add %o0, -1, %o0 /* IEU0 */ +END(__rawmemchr) + +libc_hidden_def (__rawmemchr) +weak_alias (__rawmemchr, rawmemchr) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/rshift.S b/REORG.TODO/sysdeps/sparc/sparc64/rshift.S new file mode 100644 index 0000000000..f9319f2ec7 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/rshift.S @@ -0,0 +1,92 @@ +/* SPARC v9 __mpn_rshift -- + + Copyright (C) 1996-2017 Free Software Foundation, Inc. + + This file is part of the GNU MP Library. + + The GNU MP Library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or (at your + option) any later version. + + The GNU MP Library is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with the GNU MP Library; see the file COPYING.LIB. If not, + see <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* INPUT PARAMETERS + res_ptr %o0 + src_ptr %o1 + size %o2 + cnt %o3 */ + + .register %g2, #scratch + .register %g3, #scratch + +ENTRY(__mpn_rshift) + ldx [%o1],%g2 ! load first limb + sub %g0,%o3,%o5 ! negate shift count + add %o2,-1,%o2 + andcc %o2,4-1,%g4 ! number of limbs in first loop + sllx %g2,%o5,%g1 ! compute function result + be,pn %xcc,.L0 ! if multiple of 4 limbs, skip first loop + mov %g1,%g5 + + sub %o2,%g4,%o2 ! adjust count for main loop + +.Loop0: ldx [%o1+8],%g3 + add %o0,8,%o0 + add %o1,8,%o1 + srlx %g2,%o3,%o4 + addcc %g4,-1,%g4 + sllx %g3,%o5,%g1 + mov %g3,%g2 + or %o4,%g1,%o4 + bne,pt %xcc,.Loop0 + stx %o4,[%o0-8] + +.L0: brz,pn %o2,.Lend + nop + +.Loop: ldx [%o1+8],%g3 + add %o0,32,%o0 + srlx %g2,%o3,%o4 + addcc %o2,-4,%o2 + sllx %g3,%o5,%g1 + + ldx [%o1+16],%g2 + srlx %g3,%o3,%g4 + or %o4,%g1,%o4 + stx %o4,[%o0-32] + sllx %g2,%o5,%g1 + + ldx [%o1+24],%g3 + srlx %g2,%o3,%o4 + or %g4,%g1,%g4 + stx %g4,[%o0-24] + sllx %g3,%o5,%g1 + + ldx [%o1+32],%g2 + srlx %g3,%o3,%g4 + or %o4,%g1,%o4 + stx %o4,[%o0-16] + sllx %g2,%o5,%g1 + + add %o1,32,%o1 + or %g4,%g1,%g4 + bne,pt %xcc,.Loop + stx %g4,[%o0-8] + +.Lend: srlx %g2,%o3,%g2 + stx %g2,[%o0-0] + + jmpl %o7+8,%g0 + mov %g5,%o0 + +END(__mpn_rshift) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/rtld-memcpy.c b/REORG.TODO/sysdeps/sparc/sparc64/rtld-memcpy.c new file mode 100644 index 0000000000..52f8302f08 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/rtld-memcpy.c @@ -0,0 +1,3 @@ +#define NO_MEMPCPY_STPCPY_REDIRECT +#include <string/memcpy.c> +#include <string/mempcpy.c> diff --git a/REORG.TODO/sysdeps/sparc/sparc64/rtld-memset.c b/REORG.TODO/sysdeps/sparc/sparc64/rtld-memset.c new file mode 100644 index 0000000000..55f3835790 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/rtld-memset.c @@ -0,0 +1 @@ +#include <string/memset.c> diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/Makefile b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/Makefile new file mode 100644 index 0000000000..b145df283b --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/Makefile @@ -0,0 +1,33 @@ +# Software floating-point emulation. +# Makefile for SPARC v9 ABI mandated long double utility +# functions (_Qp_*). +# Copyright (C) 1999-2017 Free Software Foundation, Inc. +# This file is part of the GNU C Library. +# Contributed by Jakub Jelinek (jj@ultra.linux.cz). +# + +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# <http://www.gnu.org/licenses/>. + +ifeq ($(subdir),soft-fp) +sparc64-quad-routines := qp_add qp_cmp qp_cmpe qp_div qp_dtoq qp_feq qp_fge \ + qp_fgt qp_fle qp_flt qp_fne qp_itoq qp_mul qp_neg qp_qtod qp_qtoi \ + qp_qtos qp_qtoui qp_qtoux qp_qtox qp_sqrt qp_stoq qp_sub qp_uitoq \ + qp_uxtoq qp_xtoq qp_util +sysdep_routines += $(sparc64-quad-routines) +endif + +ifeq ($(subdir),math) +CPPFLAGS += -I../soft-fp/ +endif diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/Versions b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/Versions new file mode 100644 index 0000000000..9e89c3c3ef --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/Versions @@ -0,0 +1,8 @@ +libc { + GLIBC_2.2 { + _Qp_add; _Qp_cmp; _Qp_cmpe; _Qp_div; _Qp_dtoq; _Qp_feq; _Qp_fge; _Qp_fgt; + _Qp_fle; _Qp_flt; _Qp_fne; _Qp_itoq; _Qp_mul; _Qp_neg; _Qp_qtod; _Qp_qtoi; + _Qp_qtos; _Qp_qtoui; _Qp_qtoux; _Qp_qtox; _Qp_sqrt; _Qp_stoq; _Qp_sub; + _Qp_uitoq; _Qp_uxtoq; _Qp_xtoq; + } +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/e_ilogbl.c b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/e_ilogbl.c new file mode 100644 index 0000000000..5b19d12a4e --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/e_ilogbl.c @@ -0,0 +1,79 @@ +/* Software floating-point emulation. + ilogbl(x, exp) + Copyright (C) 1999-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* ilogbl(long double x) + * return the binary exponent of non-zero x + * ilogbl(0) = 0x80000001 + * ilogbl(inf/NaN) = 0x7fffffff (no signal is raised) + */ + +#include "soft-fp.h" +#include "quad.h" +#include <math.h> + +int __ieee754_ilogbl (long double x) +{ + FP_DECL_EX; + FP_DECL_Q(X); + +/* + FP_UNPACK_Q(X, x); + switch (X_c) + { + case FP_CLS_ZERO: + return FP_ILOGB0; + case FP_CLS_NAN: + case FP_CLS_INF: + return FP_ILOGBNAN; + default: + return X_e; + } + */ + FP_UNPACK_RAW_Q(X, x); + switch (X_e) + { + default: + return X_e - _FP_EXPBIAS_Q; + case 0: +#if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q + if (_FP_FRAC_ZEROP_4(X)) + return FP_ILOGB0; + else + { + _FP_I_TYPE shift; + _FP_FRAC_CLZ_4(shift, X); + shift -= _FP_FRACXBITS_Q; + return X_e - _FP_EXPBIAS_Q - 1 + shift; + } +#else + if (_FP_FRAC_ZEROP_2(X)) + return FP_ILOGB0; + else + { + _FP_I_TYPE shift; + _FP_FRAC_CLZ_2(shift, X); + shift -= _FP_FRACXBITS_Q; + return X_e - _FP_EXPBIAS_Q - 1 + shift; + } +#endif + case _FP_EXPBIAS_Q: + return FP_ILOGBNAN; + } +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_add.c b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_add.c new file mode 100644 index 0000000000..a67deab315 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_add.c @@ -0,0 +1,44 @@ +/* Software floating-point emulation. + (*c) = (*a) + (*b) + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "quad.h" + +void _Qp_add(long double *c, const long double *a, const long double *b) +{ + FP_DECL_EX; + FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(C); + + FP_INIT_ROUNDMODE; + FP_UNPACK_SEMIRAW_QP(A, a); + FP_UNPACK_SEMIRAW_QP(B, b); + FP_ADD_Q(C, A, B); + FP_PACK_SEMIRAW_QP(c, C); + QP_HANDLE_EXCEPTIONS(__asm ( +" ldd [%1], %%f52\n" +" ldd [%1+8], %%f54\n" +" ldd [%2], %%f56\n" +" ldd [%2+8], %%f58\n" +" faddq %%f52, %%f56, %%f60\n" +" std %%f60, [%0]\n" +" std %%f62, [%0+8]\n" +" " : : "r" (c), "r" (a), "r" (b) : QP_CLOBBER)); +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_cmp.c b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_cmp.c new file mode 100644 index 0000000000..5316157ec7 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_cmp.c @@ -0,0 +1,48 @@ +/* Software floating-point emulation. + Compare (*a) and (*b), return float condition code. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "quad.h" + +int _Qp_cmp(const long double *a, const long double *b) +{ + FP_DECL_EX; + FP_DECL_Q(A); FP_DECL_Q(B); + int r; + + FP_INIT_ROUNDMODE; + FP_UNPACK_RAW_QP(A, a); + FP_UNPACK_RAW_QP(B, b); + FP_CMP_Q(r, B, A, 3, 1); + if (r == -1) r = 2; + QP_HANDLE_EXCEPTIONS( + __asm ( +" ldd [%0], %%f52\n" +" ldd [%0+8], %%f54\n" +" ldd [%1], %%f56\n" +" ldd [%1+8], %%f58\n" +" fcmpq %%fcc3, %%f52, %%f56\n" +" " : : "r" (a), "r" (b) : QP_CLOBBER_CC); + _FPU_GETCW(_fcw); + r = ((_fcw >> 36) & 3)); + + return r; +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_cmpe.c b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_cmpe.c new file mode 100644 index 0000000000..e0a834c721 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_cmpe.c @@ -0,0 +1,49 @@ +/* Software floating-point emulation. + Compare (*a) and (*b), return float condition code. + Signal exception (unless masked) if unordered. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "quad.h" + +int _Qp_cmpe(const long double *a, const long double *b) +{ + FP_DECL_EX; + FP_DECL_Q(A); FP_DECL_Q(B); + int r; + + FP_INIT_ROUNDMODE; + FP_UNPACK_RAW_QP(A, a); + FP_UNPACK_RAW_QP(B, b); + FP_CMP_Q(r, B, A, 3, 2); + if (r == -1) r = 2; + QP_HANDLE_EXCEPTIONS( + __asm ( +" ldd [%0], %%f52\n" +" ldd [%0+8], %%f54\n" +" ldd [%1], %%f56\n" +" ldd [%1+8], %%f58\n" +" fcmpeq %%fcc3, %%f52, %%f56\n" +" " : : "r" (a), "r" (b) : QP_CLOBBER_CC); + _FPU_GETCW(_fcw); + r = ((_fcw >> 36) & 3)); + + return r; +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_div.c b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_div.c new file mode 100644 index 0000000000..27d08f94dc --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_div.c @@ -0,0 +1,44 @@ +/* Software floating-point emulation. + (*c) = (*a) / (*b) + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "quad.h" + +void _Qp_div(long double *c, const long double *a, const long double *b) +{ + FP_DECL_EX; + FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(C); + + FP_INIT_ROUNDMODE; + FP_UNPACK_QP(A, a); + FP_UNPACK_QP(B, b); + FP_DIV_Q(C, A, B); + FP_PACK_QP(c, C); + QP_HANDLE_EXCEPTIONS(__asm ( +" ldd [%1], %%f52\n" +" ldd [%1+8], %%f54\n" +" ldd [%2], %%f56\n" +" ldd [%2+8], %%f58\n" +" fdivq %%f52, %%f56, %%f60\n" +" std %%f60, [%0]\n" +" std %%f62, [%0+8]\n" +" " : : "r" (c), "r" (a), "r" (b) : QP_CLOBBER)); +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_dtoq.c b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_dtoq.c new file mode 100644 index 0000000000..5a5c43b13c --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_dtoq.c @@ -0,0 +1,45 @@ +/* Software floating-point emulation. + (*c) = (long double)(a) + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "double.h" +#include "quad.h" + +void _Qp_dtoq(long double *c, const double a) +{ + FP_DECL_EX; + FP_DECL_D(A); + FP_DECL_Q(C); + + FP_INIT_ROUNDMODE; + FP_UNPACK_RAW_D(A, a); +#if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q + FP_EXTEND(Q,D,4,2,C,A); +#else + FP_EXTEND(Q,D,2,1,C,A); +#endif + FP_PACK_RAW_QP(c, C); + QP_HANDLE_EXCEPTIONS(__asm ( +" fdtoq %1, %%f60\n" +" std %%f60, [%0]\n" +" std %%f62, [%0+8]\n" +" " : : "r" (c), "e" (a) : QP_CLOBBER)); +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_feq.c b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_feq.c new file mode 100644 index 0000000000..c7c6263782 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_feq.c @@ -0,0 +1,48 @@ +/* Software floating-point emulation. + Return 1 if (*a) == (*b) + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "quad.h" + +int _Qp_feq(const long double *a, const long double *b) +{ + FP_DECL_EX; + FP_DECL_Q(A); FP_DECL_Q(B); + int r; + + FP_INIT_ROUNDMODE; + FP_UNPACK_RAW_QP(A, a); + FP_UNPACK_RAW_QP(B, b); + FP_CMP_EQ_Q(r, A, B, 1); + + QP_HANDLE_EXCEPTIONS( + __asm ( +" ldd [%0], %%f52\n" +" ldd [%0+8], %%f54\n" +" ldd [%1], %%f56\n" +" ldd [%1+8], %%f58\n" +" fcmpq %%fcc3, %%f52, %%f56\n" +" " : : "r" (a), "r" (b) : QP_CLOBBER_CC); + _FPU_GETCW(_fcw); + r = ((_fcw >> 36) & 3)); + + return !r; +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_fge.c b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_fge.c new file mode 100644 index 0000000000..19cacbb342 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_fge.c @@ -0,0 +1,48 @@ +/* Software floating-point emulation. + Return 1 if (*a) >= (*b) + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "quad.h" + +int _Qp_fge(const long double *a, const long double *b) +{ + FP_DECL_EX; + FP_DECL_Q(A); FP_DECL_Q(B); + int r; + + FP_INIT_ROUNDMODE; + FP_UNPACK_RAW_QP(A, a); + FP_UNPACK_RAW_QP(B, b); + FP_CMP_Q(r, B, A, 3, 2); + + QP_HANDLE_EXCEPTIONS( + __asm ( +" ldd [%0], %%f52\n" +" ldd [%0+8], %%f54\n" +" ldd [%1], %%f56\n" +" ldd [%1+8], %%f58\n" +" fcmpeq %%fcc3, %%f52, %%f56\n" +" " : : "r" (a), "r" (b) : QP_CLOBBER_CC); + _FPU_GETCW(_fcw); + r = ((_fcw >> 36) & 1)); + + return (r <= 0); +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_fgt.c b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_fgt.c new file mode 100644 index 0000000000..70645d1cba --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_fgt.c @@ -0,0 +1,48 @@ +/* Software floating-point emulation. + Return 1 if (*a) > (*b) + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "quad.h" + +int _Qp_fgt(const long double *a, const long double *b) +{ + FP_DECL_EX; + FP_DECL_Q(A); FP_DECL_Q(B); + int r; + + FP_INIT_ROUNDMODE; + FP_UNPACK_RAW_QP(A, a); + FP_UNPACK_RAW_QP(B, b); + FP_CMP_Q(r, B, A, 3, 2); + + QP_HANDLE_EXCEPTIONS( + __asm ( +" ldd [%0], %%f52\n" +" ldd [%0+8], %%f54\n" +" ldd [%1], %%f56\n" +" ldd [%1+8], %%f58\n" +" fcmpeq %%fcc3, %%f52, %%f56\n" +" " : : "r" (a), "r" (b) : QP_CLOBBER_CC); + _FPU_GETCW(_fcw); + r = ((_fcw >> 36) & 3) - 3); + + return (r == -1); +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_fle.c b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_fle.c new file mode 100644 index 0000000000..6293fcbcda --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_fle.c @@ -0,0 +1,48 @@ +/* Software floating-point emulation. + Return 1 if (*a) <= (*b) + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "quad.h" + +int _Qp_fle(const long double *a, const long double *b) +{ + FP_DECL_EX; + FP_DECL_Q(A); FP_DECL_Q(B); + int r; + + FP_INIT_ROUNDMODE; + FP_UNPACK_RAW_QP(A, a); + FP_UNPACK_RAW_QP(B, b); + FP_CMP_Q(r, B, A, -2, 2); + + QP_HANDLE_EXCEPTIONS( + __asm ( +" ldd [%0], %%f52\n" +" ldd [%0+8], %%f54\n" +" ldd [%1], %%f56\n" +" ldd [%1+8], %%f58\n" +" fcmpeq %%fcc3, %%f52, %%f56\n" +" " : : "r" (a), "r" (b) : QP_CLOBBER_CC); + _FPU_GETCW(_fcw); + r = ((_fcw >> 36) & 2) ? -1 : 0); + + return (r >= 0); +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_flt.c b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_flt.c new file mode 100644 index 0000000000..7aa054697a --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_flt.c @@ -0,0 +1,48 @@ +/* Software floating-point emulation. + Return 1 if (*a) < (*b) + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "quad.h" + +int _Qp_flt(const long double *a, const long double *b) +{ + FP_DECL_EX; + FP_DECL_Q(A); FP_DECL_Q(B); + int r; + + FP_INIT_ROUNDMODE; + FP_UNPACK_RAW_QP(A, a); + FP_UNPACK_RAW_QP(B, b); + FP_CMP_Q(r, B, A, 3, 2); + + QP_HANDLE_EXCEPTIONS( + __asm ( +" ldd [%0], %%f52\n" +" ldd [%0+8], %%f54\n" +" ldd [%1], %%f56\n" +" ldd [%1+8], %%f58\n" +" fcmpeq %%fcc3, %%f52, %%f56\n" +" " : : "r" (a), "r" (b) : QP_CLOBBER_CC); + _FPU_GETCW(_fcw); + r = ((_fcw >> 36) & 3)); + + return (r == 1); +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_fne.c b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_fne.c new file mode 100644 index 0000000000..dd358eda0a --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_fne.c @@ -0,0 +1,49 @@ +/* Software floating-point emulation. + Return 1 if (*a) != (*b) + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "quad.h" + +int _Qp_fne(const long double *a, const long double *b) +{ + FP_DECL_EX; + FP_DECL_Q(A); FP_DECL_Q(B); + int r; + + FP_INIT_ROUNDMODE; + FP_UNPACK_RAW_QP(A, a); + FP_UNPACK_RAW_QP(B, b); + FP_CMP_EQ_Q(r, A, B, 1); + + QP_HANDLE_EXCEPTIONS( + __asm ( +" ldd [%0], %%f52\n" +" ldd [%0+8], %%f54\n" +" ldd [%1], %%f56\n" +" ldd [%1+8], %%f58\n" +" fcmpq %%fcc3, %%f52, %%f56\n" +" " : : "r" (a), "r" (b) : QP_CLOBBER_CC); + _FPU_GETCW(_fcw); + r = ((_fcw >> 36) & 3) != 0); + + + return r; +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_itoq.c b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_itoq.c new file mode 100644 index 0000000000..230fde365f --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_itoq.c @@ -0,0 +1,34 @@ +/* Software floating-point emulation. + (*c) = (long double)(a) + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "quad.h" + +void _Qp_itoq(long double *c, const int a) +{ + FP_DECL_EX; + FP_DECL_Q(C); + int b = a; + + FP_FROM_INT_Q(C, b, 32, unsigned int); + FP_PACK_RAW_QP(c, C); + QP_NO_EXCEPTIONS; +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_mul.c b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_mul.c new file mode 100644 index 0000000000..49a290af93 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_mul.c @@ -0,0 +1,49 @@ +/* Software floating-point emulation. + (*c) = (*a) * (*b) + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* As QP_HANDLE_EXCEPTIONS reloads FPU control word anyway, + avoid doing it twice. */ +#define _FP_MUL_MEAT_RESET_FE do {} while (0) +#include "soft-fp.h" +#include "quad.h" + +void _Qp_mul(long double *c, const long double *a, const long double *b) +{ + FP_DECL_EX; + FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(C); + + FP_INIT_ROUNDMODE; + FP_UNPACK_QP(A, a); + FP_UNPACK_QP(B, b); + FP_MUL_Q(C, A, B); + FP_PACK_QP(c, C); + QP_HANDLE_EXCEPTIONS( + _FPU_SETCW(_fcw); + __asm ( +" ldd [%1], %%f52\n" +" ldd [%1+8], %%f54\n" +" ldd [%2], %%f56\n" +" ldd [%2+8], %%f58\n" +" fmulq %%f52, %%f56, %%f60\n" +" std %%f60, [%0]\n" +" std %%f62, [%0+8]\n" +" " : : "r" (c), "r" (a), "r" (b) : QP_CLOBBER)); +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_neg.S b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_neg.S new file mode 100644 index 0000000000..d2fd7f286f --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_neg.S @@ -0,0 +1,30 @@ +/* Quad floating-point emulation. + (*c) = !(*a) + Copyright (C) 1999-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +ENTRY(_Qp_neg) + ldd [%o1], %f60 + ldd [%o1 + 8], %f62 + fnegd %f60, %f60 + std %f60, [%o0] + jmpl %o7 + 8, %g0 + std %f62, [%o0 + 8] +END(_Qp_neg) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_qtod.c b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_qtod.c new file mode 100644 index 0000000000..7c3889da97 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_qtod.c @@ -0,0 +1,48 @@ +/* Software floating-point emulation. + Return (double)(*a) + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "double.h" +#include "quad.h" + +double _Qp_qtod(const long double *a) +{ + FP_DECL_EX; + FP_DECL_Q(A); + FP_DECL_D(R); + double r; + + FP_INIT_ROUNDMODE; + FP_UNPACK_SEMIRAW_QP(A, a); +#if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q + FP_TRUNC(D,Q,2,4,R,A); +#else + FP_TRUNC(D,Q,1,2,R,A); +#endif + FP_PACK_SEMIRAW_D(r, R); + QP_HANDLE_EXCEPTIONS(__asm ( +" ldd [%1], %%f52\n" +" ldd [%1+8], %%f54\n" +" fqtod %%f52, %0\n" +" " : "=&e" (r) : "r" (a) : QP_CLOBBER)); + + return r; +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_qtoi.c b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_qtoi.c new file mode 100644 index 0000000000..99cd760acd --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_qtoi.c @@ -0,0 +1,46 @@ +/* Software floating-point emulation. + Return (int)(*a) + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define FP_ROUNDMODE FP_RND_ZERO +#include "soft-fp.h" +#include "quad.h" + +int _Qp_qtoi(const long double *a) +{ + FP_DECL_EX; + FP_DECL_Q(A); + unsigned int r; + + FP_INIT_ROUNDMODE; + FP_UNPACK_RAW_QP(A, a); + FP_TO_INT_Q(r, A, 32, 1); + QP_HANDLE_EXCEPTIONS( + int rx; + __asm ( +" ldd [%1], %%f52\n" +" ldd [%1+8], %%f54\n" +" fqtoi %%f52, %%f31\n" +" st %%f31, [%0]\n" +" " : : "r" (&rx), "r" (a) : QP_CLOBBER, "f31"); + r = rx); + + return r; +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_qtos.c b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_qtos.c new file mode 100644 index 0000000000..dacd6c0620 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_qtos.c @@ -0,0 +1,49 @@ +/* Software floating-point emulation. + Return (float)(*a) + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "single.h" +#include "quad.h" + +float _Qp_qtos(const long double *a) +{ + FP_DECL_EX; + FP_DECL_Q(A); + FP_DECL_S(R); + float r; + + FP_INIT_ROUNDMODE; + FP_UNPACK_SEMIRAW_QP(A, a); +#if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q + FP_TRUNC(S,Q,1,4,R,A); +#else + FP_TRUNC(S,Q,1,2,R,A); +#endif + FP_PACK_SEMIRAW_S(r, R); + + QP_HANDLE_EXCEPTIONS(__asm ( +" ldd [%1], %%f52\n" +" ldd [%1+8], %%f54\n" +" fqtos %%f52, %0\n" +" " : "=&f" (r) : "r" (a) : QP_CLOBBER)); + + return r; +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_qtoui.c b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_qtoui.c new file mode 100644 index 0000000000..2d8fb52530 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_qtoui.c @@ -0,0 +1,46 @@ +/* Software floating-point emulation. + Return (unsigned int)(*a) + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define FP_ROUNDMODE FP_RND_ZERO +#include "soft-fp.h" +#include "quad.h" + +unsigned int _Qp_qtoui(const long double *a) +{ + FP_DECL_EX; + FP_DECL_Q(A); + unsigned int r; + + FP_INIT_ROUNDMODE; + FP_UNPACK_RAW_QP(A, a); + FP_TO_INT_Q(r, A, 32, -1); + QP_HANDLE_EXCEPTIONS( + int rx; + __asm ( +" ldd [%1], %%f52\n" +" ldd [%1+8], %%f54\n" +" fqtoi %%f52, %%f31\n" +" st %%f31, [%0]\n" +" " : : "r" (&rx), "r" (a) : QP_CLOBBER, "f31"); + r = rx); + + return r; +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_qtoux.c b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_qtoux.c new file mode 100644 index 0000000000..2d74a6ee15 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_qtoux.c @@ -0,0 +1,46 @@ +/* Software floating-point emulation. + Return (unsigned long)(*a) + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define FP_ROUNDMODE FP_RND_ZERO +#include "soft-fp.h" +#include "quad.h" + +unsigned long _Qp_qtoux(const long double *a) +{ + FP_DECL_EX; + FP_DECL_Q(A); + unsigned long r; + + FP_INIT_ROUNDMODE; + FP_UNPACK_RAW_QP(A, a); + FP_TO_INT_Q(r, A, 64, -1); + QP_HANDLE_EXCEPTIONS( + unsigned long rx; + __asm ( +" ldd [%1], %%f52\n" +" ldd [%1+8], %%f54\n" +" fqtox %%f52, %%f60\n" +" std %%f60, [%0]\n" +" " : : "r" (&rx), "r" (a) : QP_CLOBBER); + r = rx); + + return r; +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_qtox.c b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_qtox.c new file mode 100644 index 0000000000..abfc666cde --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_qtox.c @@ -0,0 +1,46 @@ +/* Software floating-point emulation. + Return (long)(*a) + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#define FP_ROUNDMODE FP_RND_ZERO +#include "soft-fp.h" +#include "quad.h" + +long _Qp_qtox(const long double *a) +{ + FP_DECL_EX; + FP_DECL_Q(A); + unsigned long r; + + FP_INIT_ROUNDMODE; + FP_UNPACK_RAW_QP(A, a); + FP_TO_INT_Q(r, A, 64, 1); + QP_HANDLE_EXCEPTIONS( + long rx; + __asm ( +" ldd [%1], %%f52\n" +" ldd [%1+8], %%f54\n" +" fqtox %%f52, %%f60\n" +" std %%f60, [%0]\n" +" " : : "r" (&rx), "r" (a) : QP_CLOBBER); + r = rx); + + return r; +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_sqrt.c b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_sqrt.c new file mode 100644 index 0000000000..3d78b1fdbb --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_sqrt.c @@ -0,0 +1,41 @@ +/* Software floating-point emulation. + (*c) = sqrtl(*a) + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "quad.h" + +void _Qp_sqrt(long double *c, const long double *a) +{ + FP_DECL_EX; + FP_DECL_Q(A); FP_DECL_Q(C); + + FP_INIT_ROUNDMODE; + FP_UNPACK_QP(A, a); + FP_SQRT_Q(C, A); + FP_PACK_QP(c, C); + QP_HANDLE_EXCEPTIONS(__asm ( +" ldd [%1], %%f52\n" +" ldd [%1+8], %%f54\n" +" fsqrtq %%f52, %%f60\n" +" std %%f60, [%0]\n" +" std %%f62, [%0+8]\n" +" " : : "r" (c), "r" (a) : QP_CLOBBER)); +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_stoq.c b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_stoq.c new file mode 100644 index 0000000000..9202a7269b --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_stoq.c @@ -0,0 +1,45 @@ +/* Software floating-point emulation. + (*c) = (long double)(a) + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "single.h" +#include "quad.h" + +void _Qp_stoq(long double *c, const float a) +{ + FP_DECL_EX; + FP_DECL_S(A); + FP_DECL_Q(C); + + FP_INIT_ROUNDMODE; + FP_UNPACK_RAW_S(A, a); +#if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q + FP_EXTEND(Q,S,4,1,C,A); +#else + FP_EXTEND(Q,S,2,1,C,A); +#endif + FP_PACK_RAW_QP(c, C); + QP_HANDLE_EXCEPTIONS(__asm ( +" fstoq %1, %%f60\n" +" std %%f60, [%0]\n" +" std %%f62, [%0+8]\n" +" " : : "r" (c), "f" (a) : QP_CLOBBER)); +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_sub.c b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_sub.c new file mode 100644 index 0000000000..71b9890743 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_sub.c @@ -0,0 +1,44 @@ +/* Software floating-point emulation. + (*c) = (*a) - (*b) + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "quad.h" + +void _Qp_sub(long double *c, const long double *a, const long double *b) +{ + FP_DECL_EX; + FP_DECL_Q(A); FP_DECL_Q(B); FP_DECL_Q(C); + + FP_INIT_ROUNDMODE; + FP_UNPACK_SEMIRAW_QP(A, a); + FP_UNPACK_SEMIRAW_QP(B, b); + FP_SUB_Q(C, A, B); + FP_PACK_SEMIRAW_QP(c, C); + QP_HANDLE_EXCEPTIONS(__asm ( +" ldd [%1], %%f52\n" +" ldd [%1+8], %%f54\n" +" ldd [%2], %%f56\n" +" ldd [%2+8], %%f58\n" +" fsubq %%f52, %%f56, %%f60\n" +" std %%f60, [%0]\n" +" std %%f62, [%0+8]\n" +" " : : "r" (c), "r" (a), "r" (b) : QP_CLOBBER)); +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_uitoq.c b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_uitoq.c new file mode 100644 index 0000000000..cc8603ad41 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_uitoq.c @@ -0,0 +1,34 @@ +/* Software floating-point emulation. + (*c) = (long double)(a) + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "quad.h" + +void _Qp_uitoq(long double *c, const unsigned int a) +{ + FP_DECL_EX; + FP_DECL_Q(C); + unsigned int b = a; + + FP_FROM_INT_Q(C, b, 32, unsigned int); + FP_PACK_RAW_QP(c, C); + QP_NO_EXCEPTIONS; +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_util.c b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_util.c new file mode 100644 index 0000000000..7e1f2511e2 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_util.c @@ -0,0 +1,60 @@ +/* Software floating-point emulation. + Helper routine for _Qp_* routines. + Simulate exceptions using double arithmetics. + Copyright (C) 1999-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <float.h> +#include <math.h> +#include <assert.h> +#include "soft-fp.h" + +void __Qp_handle_exceptions(int exceptions) +{ + if (exceptions & FP_EX_INVALID) + { + float f = 0.0; + __asm__ __volatile__ ("fdivs %0, %0, %0" : "+f" (f)); + } + if (exceptions & FP_EX_DIVZERO) + { + float f = 1.0, g = 0.0; + __asm__ __volatile__ ("fdivs %0, %1, %0" + : "+f" (f) + : "f" (g)); + } + if (exceptions & FP_EX_OVERFLOW) + { + float f = FLT_MAX; + __asm__ __volatile__("fmuls %0, %0, %0" : "+f" (f)); + exceptions &= ~FP_EX_INEXACT; + } + if (exceptions & FP_EX_UNDERFLOW) + { + float f = FLT_MIN; + __asm__ __volatile__("fmuls %0, %0, %0" : "+f" (f)); + exceptions &= ~FP_EX_INEXACT; + } + if (exceptions & FP_EX_INEXACT) + { + double d = 1.0, e = M_PI; + __asm__ __volatile__ ("fdivd %0, %1, %0" + : "+f" (d) + : "f" (e)); + } +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_uxtoq.c b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_uxtoq.c new file mode 100644 index 0000000000..766ca78872 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_uxtoq.c @@ -0,0 +1,34 @@ +/* Software floating-point emulation. + (*c) = (long double)(a) + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "quad.h" + +void _Qp_uxtoq(long double *c, const unsigned long a) +{ + FP_DECL_EX; + FP_DECL_Q(C); + unsigned long b = a; + + FP_FROM_INT_Q(C, b, 64, unsigned long); + FP_PACK_RAW_QP(c, C); + QP_NO_EXCEPTIONS; +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_xtoq.c b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_xtoq.c new file mode 100644 index 0000000000..42c0bf1044 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/qp_xtoq.c @@ -0,0 +1,34 @@ +/* Software floating-point emulation. + (*c) = (long double)(*a) + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com) and + Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include "soft-fp.h" +#include "quad.h" + +void _Qp_xtoq(long double *c, const long a) +{ + FP_DECL_EX; + FP_DECL_Q(C); + long b = a; + + FP_FROM_INT_Q(C, b, 64, unsigned long); + FP_PACK_RAW_QP(c, C); + QP_NO_EXCEPTIONS; +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/s_frexpl.c b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/s_frexpl.c new file mode 100644 index 0000000000..6f0baeb3fb --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/s_frexpl.c @@ -0,0 +1,51 @@ +/* Software floating-point emulation. + frexpl(x, exp) + Copyright (C) 1999-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* + * for non-zero x + * x = frexpl(arg,&exp); + * return a long double fp quantity x such that 0.5 <= |x| <1.0 + * and the corresponding binary exponent "exp". That is + * arg = x*2^exp. + * If arg is inf, 0.0, or NaN, then frexpl(arg,&exp) returns arg + * with *exp=0. + */ + +#include "soft-fp.h" +#include "quad.h" + +long double __frexpl(long double arg, int *exp) +{ + FP_DECL_EX; + FP_DECL_Q(A); + long double r; + + *exp = 0; + FP_UNPACK_Q(A, arg); + if (A_c != FP_CLS_NORMAL) + return arg; + *exp = A_e + 1; + A_e = -1; + FP_PACK_Q(r, A); + + return r; +} + +weak_alias (__frexpl, frexpl) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/s_scalblnl.c b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/s_scalblnl.c new file mode 100644 index 0000000000..250a8f4f0f --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/s_scalblnl.c @@ -0,0 +1,52 @@ +/* Software floating-point emulation. + scalblnl(x, exp) + Copyright (C) 1999-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* + * scalblnl (long double x, long int n) + * scalblnl(x,n) returns x* 2**n computed by exponent + * manipulation rather than by actually performing an + * exponentiation or a multiplication. + */ + +#include "soft-fp.h" +#include "quad.h" + +long double __scalblnl(long double arg, int exp) +{ + FP_DECL_EX; + FP_DECL_Q(A); + long double r; + + FP_UNPACK_Q(A, arg); + switch (A_c) + { + case FP_CLS_ZERO: + return arg; + case FP_CLS_NAN: + case FP_CLS_INF: + FP_HANDLE_EXCEPTIONS; + return arg; + } + A_e += exp; + FP_PACK_Q(r, A); + FP_HANDLE_EXCEPTIONS; + + return r; +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/s_scalbnl.c b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/s_scalbnl.c new file mode 100644 index 0000000000..c686175e97 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/s_scalbnl.c @@ -0,0 +1,52 @@ +/* Software floating-point emulation. + scalbnl(x, exp) + Copyright (C) 1999-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek (jj@ultra.linux.cz). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +/* + * scalbnl (long double x, int n) + * scalbnl(x,n) returns x* 2**n computed by exponent + * manipulation rather than by actually performing an + * exponentiation or a multiplication. + */ + +#include "soft-fp.h" +#include "quad.h" + +long double __scalbnl(long double arg, int exp) +{ + FP_DECL_EX; + FP_DECL_Q(A); + long double r; + + FP_UNPACK_Q(A, arg); + switch (A_c) + { + case FP_CLS_ZERO: + return arg; + case FP_CLS_NAN: + case FP_CLS_INF: + FP_HANDLE_EXCEPTIONS; + return arg; + } + A_e += exp; + FP_PACK_Q(r, A); + FP_HANDLE_EXCEPTIONS; + + return r; +} diff --git a/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/sfp-machine.h b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/sfp-machine.h new file mode 100644 index 0000000000..c03a6d3764 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/soft-fp/sfp-machine.h @@ -0,0 +1,147 @@ +/* Machine-dependent software floating-point definitions. + Sparc64 userland (_Q_* and _Qp_*) version. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson (rth@cygnus.com), + Jakub Jelinek (jj@ultra.linux.cz) and + David S. Miller (davem@redhat.com). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <fpu_control.h> +#include <fenv.h> +#include <stdlib.h> + +#define _FP_W_TYPE_SIZE 64 +#define _FP_W_TYPE unsigned long +#define _FP_WS_TYPE signed long +#define _FP_I_TYPE long + +/* Helper macros for _FP_MUL_MEAT_2_120_240_double. */ +#define _FP_MUL_MEAT_SET_FE_TZ \ +do { \ + static fpu_control_t _fetz = _FPU_RC_DOWN; \ + _FPU_SETCW(_fetz); \ +} while (0) +#ifndef _FP_MUL_MEAT_RESET_FE +#define _FP_MUL_MEAT_RESET_FE _FPU_SETCW(_fcw) +#endif + +#define _FP_MUL_MEAT_S(R,X,Y) \ + _FP_MUL_MEAT_1_imm(_FP_WFRACBITS_S,R,X,Y) +#define _FP_MUL_MEAT_D(R,X,Y) \ + _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_Q(R,X,Y) \ + _FP_MUL_MEAT_2_120_240_double(_FP_WFRACBITS_Q,R,X,Y, \ + _FP_MUL_MEAT_SET_FE_TZ, \ + _FP_MUL_MEAT_RESET_FE) + +#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_imm(S,R,X,Y,_FP_DIV_HELP_imm) +#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_1_udiv_norm(D,R,X,Y) +#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_2_udiv(Q,R,X,Y) + +#define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1) +#define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1) +#define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1 +#define _FP_NANSIGN_S 0 +#define _FP_NANSIGN_D 0 +#define _FP_NANSIGN_Q 0 + +#define _FP_KEEPNANFRACP 1 +#define _FP_QNANNEGATEDP 0 + +/* If one NaN is signaling and the other is not, + * we choose that one, otherwise we choose Y. + */ +#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ + do { \ + if ((_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs) \ + && !(_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs)) \ + { \ + R##_s = X##_s; \ + _FP_FRAC_COPY_##wc(R,X); \ + } \ + else \ + { \ + R##_s = Y##_s; \ + _FP_FRAC_COPY_##wc(R,Y); \ + } \ + R##_c = FP_CLS_NAN; \ + } while (0) + +/* Obtain the current rounding mode. */ +#ifndef FP_ROUNDMODE +#define FP_ROUNDMODE ((_fcw >> 30) & 0x3) +#endif + +/* Exception flags. */ +#define FP_EX_INVALID (1 << 4) +#define FP_EX_OVERFLOW (1 << 3) +#define FP_EX_UNDERFLOW (1 << 2) +#define FP_EX_DIVZERO (1 << 1) +#define FP_EX_INEXACT (1 << 0) + +#define _FP_TININESS_AFTER_ROUNDING 0 + +#define _FP_DECL_EX \ + fpu_control_t _fcw __attribute__ ((unused)) = (FP_RND_NEAREST << 30) + +#define FP_INIT_ROUNDMODE \ +do { \ + _FPU_GETCW(_fcw); \ +} while (0) + +#define FP_TRAPPING_EXCEPTIONS ((_fcw >> 23) & 0x1f) +#define FP_INHIBIT_RESULTS ((_fcw >> 23) & _fex) + +/* Simulate exceptions using double arithmetics. */ +extern void __Qp_handle_exceptions(int exc); + +#define FP_HANDLE_EXCEPTIONS \ +do { \ + if (!_fex) \ + { \ + /* This is the common case, so we do it inline. \ + * We need to clear cexc bits if any. \ + */ \ + __asm__ __volatile__("fzero %%f62\n\t" \ + "faddd %%f62, %%f62, %%f62" \ + : : : "f62"); \ + } \ + else \ + { \ + __Qp_handle_exceptions (_fex); \ + } \ +} while (0) + +#define QP_HANDLE_EXCEPTIONS(_a) \ +do { \ + if ((_fcw >> 23) & _fex) \ + { \ + _a; \ + } \ + else \ + { \ + _fcw = (_fcw & ~0x1fL) | (_fex << 5) | _fex; \ + _FPU_SETCW(_fcw); \ + } \ +} while (0) + +#define QP_NO_EXCEPTIONS \ + __asm ("fzero %%f62\n\t" \ + "faddd %%f62, %%f62, %%f62" : : : "f62") + +#define QP_CLOBBER "memory", "f52", "f54", "f56", "f58", "f60", "f62" +#define QP_CLOBBER_CC QP_CLOBBER , "cc" diff --git a/REORG.TODO/sysdeps/sparc/sparc64/stackguard-macros.h b/REORG.TODO/sysdeps/sparc/sparc64/stackguard-macros.h new file mode 100644 index 0000000000..cc0c12c041 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/stackguard-macros.h @@ -0,0 +1,7 @@ +#include <stdint.h> + +#define STACK_CHK_GUARD \ + ({ uintptr_t x; asm ("ldx [%%g7+0x28], %0" : "=r" (x)); x; }) + +#define POINTER_CHK_GUARD \ + ({ uintptr_t x; asm ("ldx [%%g7+0x30], %0" : "=r" (x)); x; }) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/start.S b/REORG.TODO/sysdeps/sparc/sparc64/start.S new file mode 100644 index 0000000000..fcd4721463 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/start.S @@ -0,0 +1,100 @@ +/* Startup code for elf64-sparc + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Richard Henderson <richard@gnu.ai.mit.edu>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + In addition to the permissions in the GNU Lesser General Public + License, the Free Software Foundation gives you unlimited + permission to link the compiled version of this file with other + programs, and to distribute those programs without any restriction + coming from the use of this file. (The GNU Lesser General Public + License restrictions do apply in other respects; for example, they + cover modification of the file, and distribution when not linked + into another program.) + + Note that people who make modified versions of this file are not + obligated to grant this special exception for their modified + versions; it is their choice whether to do so. The GNU Lesser + General Public License gives permission to release a modified + version without this exception; this exception also makes it + possible to release a modified version which carries forward this + exception. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + + .section ".text" + .align 4 + .global _start + .type _start,#function +_start: +#ifdef SHARED + SETUP_PIC_REG(l7) +#endif + + /* Terminate the stack frame, and reserve space for functions to + drop their arguments. */ + mov %g0, %fp + sub %sp, 6*8, %sp + + /* Extract the arguments and environment as encoded on the stack. The + argument info starts after one register window (16 words) past the SP, + plus the bias we added, plus the magic v9 STACK_BIAS. */ + ldx [%sp+STACK_BIAS+22*8], %o1 + add %sp, STACK_BIAS+23*8, %o2 + + /* Load the addresses of the user entry points. */ +#ifndef SHARED + sethi %hi(main), %o0 + sethi %hi(__libc_csu_init), %o3 + sethi %hi(__libc_csu_fini), %o4 + or %o0, %lo(main), %o0 + or %o3, %lo(__libc_csu_init), %o3 + or %o4, %lo(__libc_csu_fini), %o4 +#else + sethi %gdop_hix22(main), %o0 + sethi %gdop_hix22(__libc_csu_init), %o3 + sethi %gdop_hix22(__libc_csu_fini), %o4 + xor %o0, %gdop_lox10(main), %o0 + xor %o3, %gdop_lox10(__libc_csu_init), %o3 + xor %o4, %gdop_lox10(__libc_csu_fini), %o4 + ldx [%l7 + %o0], %o0, %gdop(main) + ldx [%l7 + %o3], %o3, %gdop(__libc_csu_init) + ldx [%l7 + %o4], %o4, %gdop(__libc_csu_fini) +#endif + + /* When starting a binary via the dynamic linker, %g1 contains the + address of the shared library termination function, which will be + registered with atexit(). If we are statically linked, this will + be NULL. */ + mov %g1, %o5 + + /* Let libc do the rest of the initialization, and call main. */ + call __libc_start_main + nop + + /* Die very horribly if exit returns. */ + illtrap 0 + + .size _start, .-_start + +/* Define a symbol for the first piece of initialized data. */ + .data + .globl __data_start +__data_start: + .long 0 +weak_alias (__data_start, data_start) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/stpcpy.S b/REORG.TODO/sysdeps/sparc/sparc64/stpcpy.S new file mode 100644 index 0000000000..b1593563a9 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/stpcpy.S @@ -0,0 +1,274 @@ +/* Copy SRC to DEST returning the address of the terminating '\0' in DEST. + For SPARC v9. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and + Jakub Jelinek <jj@ultra.linux.cz>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <asm/asi.h> +#ifndef XCC + .register %g2, #scratch + .register %g3, #scratch + .register %g6, #scratch +#endif + + /* Normally, this uses + ((xword - 0x0101010101010101) & 0x8080808080808080) test + to find out if any byte in xword could be zero. This is fast, but + also gives false alarm for any byte in range 0x81-0xff. It does + not matter for correctness, as if this test tells us there could + be some zero byte, we check it byte by byte, but if bytes with + high bits set are common in the strings, then this will give poor + performance. You can #define EIGHTBIT_NOT_RARE and the algorithm + will use one tick slower, but more precise test + ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080), + which does not give any false alarms (but if some bits are set, + one cannot assume from it which bytes are zero and which are not). + It is yet to be measured, what is the correct default for glibc + in these days for an average user. + */ + + .text + .align 32 +ENTRY(__stpcpy) + sethi %hi(0x01010101), %g1 /* IEU0 Group */ + or %g1, %lo(0x01010101), %g1 /* IEU0 Group */ + andcc %o0, 7, %g0 /* IEU1 */ + sllx %g1, 32, %g2 /* IEU0 Group */ + + bne,pn %icc, 12f /* CTI */ + andcc %o1, 7, %g3 /* IEU1 */ + or %g1, %g2, %g1 /* IEU0 Group */ + bne,pn %icc, 14f /* CTI */ + + sllx %g1, 7, %g2 /* IEU0 Group */ +1: ldx [%o1], %o3 /* Load */ + add %o1, 8, %o1 /* IEU1 */ +2: mov %o3, %g3 /* IEU0 Group */ + + sub %o3, %g1, %o2 /* IEU1 */ +3: ldxa [%o1] ASI_PNF, %o3 /* Load */ +#ifdef EIGHTBIT_NOT_RARE + andn %o2, %g3, %o2 /* IEU0 Group */ +#endif + add %o0, 8, %o0 /* IEU0 Group */ + andcc %o2, %g2, %g0 /* IEU1 */ + + add %o1, 8, %o1 /* IEU0 Group */ + be,a,pt %xcc, 2b /* CTI */ + stx %g3, [%o0 - 8] /* Store */ + srlx %g3, 56, %g5 /* IEU0 Group */ + + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 11f /* CTI */ + srlx %g3, 48, %g4 /* IEU0 */ + andcc %g4, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 10f /* CTI */ + srlx %g3, 40, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 9f /* CTI */ + + srlx %g3, 32, %g4 /* IEU0 */ + andcc %g4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 8f /* CTI */ + srlx %g3, 24, %g5 /* IEU0 */ + + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 7f /* CTI */ + srlx %g3, 16, %g4 /* IEU0 */ + andcc %g4, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 6f /* CTI */ + srlx %g3, 8, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 5f /* CTI */ + + sub %o3, %g1, %o2 /* IEU0 */ + stx %g3, [%o0 - 8] /* Store Group */ + andcc %g3, 0xff, %g0 /* IEU1 */ + bne,pt %icc, 3b /* CTI */ + + mov %o3, %g3 /* IEU0 Group */ +4: retl /* CTI+IEU1 Group */ + sub %o0, 1, %o0 /* IEU0 */ + + .align 16 +6: ba,pt %xcc, 23f /* CTI Group */ + sub %o0, 3, %g6 /* IEU0 */ +5: sub %o0, 2, %g6 /* IEU0 Group */ + stb %g5, [%o0 - 2] /* Store */ + + srlx %g3, 16, %g4 /* IEU0 Group */ +23: sth %g4, [%o0 - 4] /* Store */ + srlx %g3, 32, %g4 /* IEU0 Group */ + stw %g4, [%o0 - 8] /* Store */ + + retl /* CTI+IEU1 Group */ + mov %g6, %o0 /* IEU0 */ +8: ba,pt %xcc, 24f /* CTI Group */ + sub %o0, 5, %g6 /* IEU0 */ + +7: sub %o0, 4, %g6 /* IEU0 Group */ + stb %g5, [%o0 - 4] /* Store */ + srlx %g3, 32, %g4 /* IEU0 Group */ +24: stw %g4, [%o0 - 8] /* Store */ + + retl /* CTI+IEU1 Group */ + mov %g6, %o0 /* IEU0 */ +10: ba,pt %xcc, 25f /* CTI Group */ + sub %o0, 7, %g6 /* IEU0 */ + +9: sub %o0, 6, %g6 /* IEU0 Group */ + stb %g5, [%o0 - 6] /* Store */ + srlx %g3, 48, %g4 /* IEU0 */ +25: sth %g4, [%o0 - 8] /* Store Group */ + + retl /* CTI+IEU1 Group */ + mov %g6, %o0 /* IEU0 */ +11: stb %g5, [%o0 - 8] /* Store Group */ + retl /* CTI+IEU1 Group */ + + sub %o0, 8, %o0 /* IEU0 */ + + .align 16 +12: or %g1, %g2, %g1 /* IEU0 Group */ + ldub [%o1], %o3 /* Load */ + sllx %g1, 7, %g2 /* IEU0 Group */ + stb %o3, [%o0] /* Store Group */ + +13: add %o0, 1, %o0 /* IEU0 */ + add %o1, 1, %o1 /* IEU1 */ + andcc %o3, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 4b /* CTI */ + + lduba [%o1] ASI_PNF, %o3 /* Load */ + andcc %o0, 7, %g0 /* IEU1 Group */ + bne,a,pt %icc, 13b /* CTI */ + stb %o3, [%o0] /* Store */ + + andcc %o1, 7, %g3 /* IEU1 Group */ + be,a,pt %icc, 1b /* CTI */ + ldx [%o1], %o3 /* Load */ +14: orcc %g0, 64, %g4 /* IEU1 Group */ + + sllx %g3, 3, %g5 /* IEU0 */ + sub %o1, %g3, %o1 /* IEU0 Group */ + sub %g4, %g5, %g4 /* IEU1 */ + /* %g1 = 0101010101010101 * + * %g2 = 8080808080808080 * + * %g3 = source alignment * + * %g5 = number of bits to shift left * + * %g4 = number of bits to shift right */ + ldxa [%o1] ASI_PNF, %o5 /* Load Group */ + + addcc %o1, 8, %o1 /* IEU1 */ +15: sllx %o5, %g5, %o3 /* IEU0 Group */ + ldxa [%o1] ASI_PNF, %o5 /* Load */ + srlx %o5, %g4, %o4 /* IEU0 Group */ + + add %o0, 8, %o0 /* IEU1 */ + or %o3, %o4, %o3 /* IEU0 Group */ + add %o1, 8, %o1 /* IEU1 */ + sub %o3, %g1, %o4 /* IEU0 Group */ + +#ifdef EIGHTBIT_NOT_RARE + andn %o4, %o3, %o4 /* IEU0 Group */ +#endif + andcc %o4, %g2, %g0 /* IEU1 Group */ + be,a,pt %xcc, 15b /* CTI */ + stx %o3, [%o0 - 8] /* Store */ + srlx %o3, 56, %o4 /* IEU0 Group */ + + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 22f /* CTI */ + srlx %o3, 48, %o4 /* IEU0 */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 21f /* CTI */ + srlx %o3, 40, %o4 /* IEU0 */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 20f /* CTI */ + + srlx %o3, 32, %o4 /* IEU0 */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 19f /* CTI */ + srlx %o3, 24, %o4 /* IEU0 */ + + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 18f /* CTI */ + srlx %o3, 16, %o4 /* IEU0 */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 17f /* CTI */ + srlx %o3, 8, %o4 /* IEU0 */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 16f /* CTI */ + + andcc %o3, 0xff, %g0 /* IEU1 Group */ + bne,pn %icc, 15b /* CTI */ + stx %o3, [%o0 - 8] /* Store */ + retl /* CTI+IEU1 Group */ + + sub %o0, 1, %o0 /* IEU0 */ + + .align 16 +17: ba,pt %xcc, 26f /* CTI Group */ + subcc %o0, 3, %g6 /* IEU1 */ +18: ba,pt %xcc, 27f /* CTI Group */ + subcc %o0, 4, %g6 /* IEU1 */ + +19: ba,pt %xcc, 28f /* CTI Group */ + subcc %o0, 5, %g6 /* IEU1 */ +16: subcc %o0, 2, %g6 /* IEU1 Group */ + srlx %o3, 8, %o4 /* IEU0 */ + + stb %o4, [%o0 - 2] /* Store */ +26: srlx %o3, 16, %o4 /* IEU0 Group */ + stb %o4, [%o0 - 3] /* Store */ +27: srlx %o3, 24, %o4 /* IEU0 Group */ + + stb %o4, [%o0 - 4] /* Store */ +28: srlx %o3, 32, %o4 /* IEU0 Group */ + stw %o4, [%o0 - 8] /* Store */ + retl /* CTI+IEU1 Group */ + + mov %g6, %o0 /* IEU0 */ + + .align 16 +21: ba,pt %xcc, 29f /* CTI Group */ + subcc %o0, 7, %g6 /* IEU1 */ +22: ba,pt %xcc, 30f /* CTI Group */ + subcc %o0, 8, %g6 /* IEU1 */ + +20: subcc %o0, 6, %g6 /* IEU1 Group */ + srlx %o3, 40, %o4 /* IEU0 */ + stb %o4, [%o0 - 6] /* Store */ +29: srlx %o3, 48, %o4 /* IEU0 Group */ + + stb %o4, [%o0 - 7] /* Store */ +30: srlx %o3, 56, %o4 /* IEU0 Group */ + stb %o4, [%o0 - 8] /* Store */ + retl /* CTI+IEU1 Group */ + + mov %g6, %o0 /* IEU0 */ +END(__stpcpy) + +weak_alias (__stpcpy, stpcpy) +libc_hidden_def (__stpcpy) +libc_hidden_builtin_def (stpcpy) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/stpncpy.S b/REORG.TODO/sysdeps/sparc/sparc64/stpncpy.S new file mode 100644 index 0000000000..537d29257f --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/stpncpy.S @@ -0,0 +1,424 @@ +/* stpncpy(DST, SRC, COUNT) - Copy no more than N characters of + SRC to DEST, returning the address of the terminating '\0' in + DEST, if any, or else DEST + N. + For SPARC v9. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek <jj@ultra.linux.cz> and + Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <asm/asi.h> +#ifndef XCC +#define XCC xcc +#define USE_BPR + .register %g2, #scratch + .register %g3, #scratch + .register %g6, #scratch +#endif + + /* Normally, this uses + ((xword - 0x0101010101010101) & 0x8080808080808080) test + to find out if any byte in xword could be zero. This is fast, but + also gives false alarm for any byte in range 0x81-0xff. It does + not matter for correctness, as if this test tells us there could + be some zero byte, we check it byte by byte, but if bytes with + high bits set are common in the strings, then this will give poor + performance. You can #define EIGHTBIT_NOT_RARE and the algorithm + will use one tick slower, but more precise test + ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080), + which does not give any false alarms (but if some bits are set, + one cannot assume from it which bytes are zero and which are not). + It is yet to be measured, what is the correct default for glibc + in these days for an average user. + */ + + .text + .align 32 +ENTRY(__stpncpy) + sethi %hi(0x01010101), %g1 /* IEU0 Group */ +#ifdef USE_BPR + brz,pn %o2, 19f /* CTI+IEU1 */ +#else + tst %o2 /* IEU1 */ + be,pn %XCC, 19f /* CTI */ +#endif + or %g1, %lo(0x01010101), %g1 /* IEU1 */ + andcc %o0, 7, %g0 /* IEU1 Group */ + + sllx %g1, 32, %g2 /* IEU0 */ + bne,pn %icc, 26f /* CTI */ + or %g1, %g2, %g1 /* IEU0 Group */ + andcc %o1, 7, %g3 /* IEU1 */ + + bne,pn %icc, 28f /* CTI */ + sllx %g1, 7, %g2 /* IEU0 Group */ + ldx [%o1], %o3 /* Load */ +1: add %o1, 8, %o1 /* IEU1 */ + +2: subcc %o2, 8, %o2 /* IEU1 Group */ + bl,pn %XCC, 18f /* CTI */ + sub %o3, %g1, %o4 /* IEU0 */ + add %o0, 8, %o0 /* IEU0 Group */ + +#ifdef EIGHTBIT_NOT_MORE + andn %o4, %o3, %o4 /* IEU1 */ +#endif + mov %o3, %g3 /* IEU1 */ + ldxa [%o1] ASI_PNF, %o3 /* Load */ + add %o1, 8, %o1 /* IEU0 Group */ + andcc %o4, %g2, %g0 /* IEU1 */ + + be,a,pt %xcc, 2b /* CTI */ + stx %g3, [%o0-8] /* Store Group */ + srlx %g3, 56, %g5 /* IEU0 Group */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 16f /* CTI */ + srlx %g3, 48, %g4 /* IEU0 */ + andcc %g4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 15f /* CTI */ + + srlx %g3, 40, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 14f /* CTI */ + srlx %g3, 32, %g4 /* IEU0 */ + + andcc %g4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 13f /* CTI */ + srlx %g3, 24, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 12f /* CTI */ + srlx %g3, 16, %g4 /* IEU0 */ + andcc %g4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 11f /* CTI */ + + srlx %g3, 8, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 10f /* CTI */ + sub %o0, 1, %g6 /* IEU0 */ + + andcc %g3, 0xff, %g0 /* IEU1 Group */ + bne,pt %icc, 2b /* CTI */ +3: stx %g3, [%o0-8] /* Store */ + andncc %o2, 31, %g3 /* IEU1 Group */ + +4: be,pn %XCC, 41f /* CTI */ + and %o2, 31, %o2 /* IEU1 Group */ +40: stx %g0, [%o0] /* Store */ + stx %g0, [%o0 + 8] /* Store Group */ + + subcc %g3, 32, %g3 /* IEU1 */ + stx %g0, [%o0 + 16] /* Store Group */ + stx %g0, [%o0 + 24] /* Store Group */ + bne,pt %XCC, 40b /* CTI */ + + add %o0, 32, %o0 /* IEU0 */ +41: subcc %o2, 8, %o2 /* IEU1 Group */ + bl,a,pn %XCC, 6f /* CTI */ + andcc %o2, 4, %g0 /* IEU1 Group */ + +5: stx %g0, [%o0] /* Store */ + subcc %o2, 8, %o2 /* IEU1 Group */ + bge,pt %XCC, 5b /* CTI */ + add %o0, 8, %o0 /* IEU0 */ + + andcc %o2, 4, %g0 /* IEU1 Group */ +6: be,a,pn %icc, 7f /* CTI */ + andcc %o2, 2, %g0 /* IEU1 Group */ + stw %g0, [%o0] /* Store */ + + add %o0, 4, %o0 /* IEU0 */ + andcc %o2, 2, %g0 /* IEU1 Group */ +7: be,a,pn %icc, 8f /* CTI */ + andcc %o2, 1, %g0 /* IEU1 Group */ + + sth %g0, [%o0] /* Store */ + add %o0, 2, %o0 /* IEU0 */ + andcc %o2, 1, %g0 /* IEU1 Group */ +8: bne,a,pn %icc, 9f /* CTI */ + + stb %g0, [%o0] /* Store */ +9: retl /* CTI+IEU1 Group */ + mov %g6, %o0 /* IEU0 */ +10: subcc %o0, 2, %g6 /* IEU1 Group */ + + ba,pt %xcc, 3b /* CTI */ + sllx %g5, 8, %g3 /* IEU0 */ +11: subcc %o0, 3, %g6 /* IEU1 Group */ + ba,pt %xcc, 3b /* CTI */ + + sllx %g4, 16, %g3 /* IEU0 */ +12: subcc %o0, 4, %g6 /* IEU1 Group */ + ba,pt %xcc, 3b /* CTI */ + sllx %g5, 24, %g3 /* IEU0 */ + +13: subcc %o0, 5, %g6 /* IEU1 Group */ + ba,pt %xcc, 3b /* CTI */ + sllx %g4, 32, %g3 /* IEU0 */ +14: subcc %o0, 6, %g6 /* IEU1 Group */ + + ba,pt %xcc, 3b /* CTI */ + sllx %g5, 40, %g3 /* IEU0 */ +15: subcc %o0, 7, %g6 /* IEU1 Group */ + ba,pt %xcc, 3b /* CTI */ + + sllx %g4, 48, %g3 /* IEU0 */ +16: subcc %o0, 8, %g6 /* IEU1 Group */ + ba,pt %xcc, 3b /* CTI */ + clr %g3 /* IEU0 */ + + .align 16 +17: or %o3, %o4, %o3 /* IEU0 Group */ + sub %o3, %g1, %o4 /* IEU1 */ +18: addcc %o2, 8, %o2 /* IEU1 Group */ + be,pn %XCC, 19f /* CTI */ + + andcc %o4, %g2, %g0 /* IEU1 Group */ + be,pt %xcc, 21f /* CTI */ + srlx %o3, 56, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 20f /* CTI */ + stb %g5, [%o0] /* Store */ + add %o0, 1, %o0 /* IEU0 Group */ + subcc %o2, 1, %o2 /* IEU1 */ + + be,pn %XCC, 19f /* CTI */ + srlx %o3, 48, %g5 /* IEU0 Group */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 20f /* CTI */ + + stb %g5, [%o0] /* Store */ + add %o0, 1, %o0 /* IEU0 Group */ + subcc %o2, 1, %o2 /* IEU1 */ + be,pn %XCC, 19f /* CTI */ + + srlx %o3, 40, %g5 /* IEU0 Group */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 20f /* CTI */ + stb %g5, [%o0] /* Store */ + + add %o0, 1, %o0 /* IEU0 Group */ + subcc %o2, 1, %o2 /* IEU1 */ + be,pn %XCC, 19f /* CTI */ + srlx %o3, 32, %g5 /* IEU0 Group */ + + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 20f /* CTI */ + stb %g5, [%o0] /* Store */ + add %o0, 1, %o0 /* IEU0 Group */ + + subcc %o2, 1, %o2 /* IEU1 */ + be,pn %XCC, 19f /* CTI */ + srlx %o3, 24, %g5 /* IEU0 Group */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 20f /* CTI */ + stb %g5, [%o0] /* Store */ + add %o0, 1, %o0 /* IEU0 Group */ + subcc %o2, 1, %o2 /* IEU1 */ + + be,pn %XCC, 19f /* CTI */ + srlx %o3, 16, %g5 /* IEU0 Group */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 20f /* CTI */ + + stb %g5, [%o0] /* Store */ + add %o0, 1, %o0 /* IEU0 Group */ + subcc %o2, 1, %o2 /* IEU1 */ + be,pn %XCC, 19f /* CTI */ + + srlx %o3, 8, %o3 /* IEU0 Group */ + stb %o3, [%o0] /* Store */ +59: add %o0, 1, %o2 /* IEU1 */ + andcc %o3, 0xff, %g0 /* IEU1 Group */ + + retl /* CTI+IEU1 Group */ + movne %icc, %o2, %o0 /* Single Group */ +19: retl /* CTI+IEU1 Group */ + nop /* IEU0 */ + +20: mov %o0, %g6 /* IEU0 Group */ + subcc %o2, 1, %o2 /* IEU1 */ + be,pn %XCC, 51f /* CTI */ + add %o0, 1, %o0 /* IEU0 Group */ + +50: stb %g0, [%o0] /* Store Group */ + subcc %o2, 1, %o2 /* IEU1 Group */ + bne,pt %XCC, 50b /* CTI */ + add %o0, 1, %o0 /* IEU0 */ + +51: retl /* CTI+IEU1 Group */ + mov %g6, %o0 /* IEU0 */ + + .align 16 +21: andcc %o2, 4, %g0 /* IEU1 Group */ + be,pn %icc, 22f /* CTI */ + srlx %o3, 32, %g5 /* IEU0 */ + stw %g5, [%o0] /* Store Group */ + + add %o0, 4, %o0 /* IEU0 */ + mov %o3, %g5 /* IEU1 */ +22: andcc %o2, 2, %g0 /* IEU1 Group */ + be,pn %icc, 23f /* CTI */ + + srlx %g5, 16, %g4 /* IEU0 */ + sth %g4, [%o0] /* Store Group */ + add %o0, 2, %o0 /* IEU0 */ + mov %g5, %g4 /* IEU1 */ + +23: srlx %g4, 8, %g4 /* IEU0 Group */ + andcc %o2, 1, %g0 /* IEU1 */ + bne,a,pn %icc, 24f /* CTI */ + stb %g4, [%o0] /* Store Group */ + + retl /* CTI+IEU1 Group */ + nop /* IEU0 */ +24: retl /* CTI+IEU1 Group */ + add %o0, 1, %o0 /* IEU0 */ + + .align 16 +55: sub %o0, 1, %g6 /* IEU0 Group */ +25: andcc %o0, 7, %g0 /* IEU1 */ + be,a,pn %icc, 4b /* CTI */ + andncc %o2, 31, %g3 /* IEU1 Group */ + + stb %g0, [%o0] /* Store Group */ + subcc %o2, 1, %o2 /* IEU1 */ + bne,pt %XCC, 25b /* CTI */ + add %o0, 1, %o0 /* IEU0 Group */ + + retl /* CTI+IEU1 Group */ + mov %g6, %o0 /* IEU0 */ + + .align 16 +26: ldub [%o1], %o3 /* Load */ + sllx %g1, 7, %g2 /* IEU0 Group */ + stb %o3, [%o0] /* Store */ +27: subcc %o2, 1, %o2 /* IEU1 */ + + be,pn %XCC, 59b /* CTI */ + add %o1, 1, %o1 /* IEU0 Group */ + add %o0, 1, %o0 /* IEU1 */ + andcc %o3, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 55b /* CTI */ + lduba [%o1] ASI_PNF, %o3 /* Load */ + andcc %o0, 7, %g0 /* IEU1 Group */ + bne,a,pt %icc, 27b /* CTI */ + + stb %o3, [%o0] /* Store */ + andcc %o1, 7, %g3 /* IEU1 Group */ + be,a,pt %icc, 1b /* CTI */ + ldx [%o1], %o3 /* Load */ + +28: orcc %g0, 64, %g4 /* IEU1 Group */ + sllx %g3, 3, %g5 /* IEU0 */ + sub %g4, %g5, %g4 /* IEU0 Group */ + sub %o1, %g3, %o1 /* IEU1 */ + /* %g1 = 0101010101010101 + %g2 = 8080808080808080 + %g3 = source alignment + %g5 = number of bits to shift left + %g4 = number of bits to shift right */ + + ldxa [%o1] ASI_PNF, %o5 /* Load Group */ + addcc %o1, 8, %o1 /* IEU1 */ +29: sllx %o5, %g5, %o3 /* IEU0 Group */ + ldxa [%o1] ASI_PNF, %o5 /* Load */ + + subcc %o2, 8, %o2 /* IEU1 */ + bl,pn %XCC, 17b /* CTI */ + srlx %o5, %g4, %o4 /* IEU0 Group */ + add %o1, 8, %o1 /* IEU1 */ + + or %o3, %o4, %o3 /* IEU0 Group */ + add %o0, 8, %o0 /* IEU1 */ + sub %o3, %g1, %o4 /* IEU0 Group */ +#ifdef EIGHTBIT_NOT_RARE + andn %o4, %o3, %o4 /* IEU0 Group */ +#endif + andcc %o4, %g2, %g0 /* IEU1 Group */ + + be,a,pt %xcc, 29b /* CTI */ + stx %o3, [%o0-8] /* Store */ + srlx %o3, 56, %o4 /* IEU0 Group */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 36f /* CTI */ + srlx %o3, 48, %g6 /* IEU0 */ + andcc %g6, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 35f /* CTI */ + + srlx %o3, 40, %o4 /* IEU0 */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 34f /* CTI */ + srlx %o3, 32, %g6 /* IEU0 */ + + andcc %g6, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 33f /* CTI */ + srlx %o3, 24, %o4 /* IEU0 */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 32f /* CTI */ + srlx %o3, 16, %g6 /* IEU0 */ + andcc %g6, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 31f /* CTI */ + + srlx %o3, 8, %o4 /* IEU0 */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 30f /* CTI */ + andcc %o3, 0xff, %g0 /* IEU1 Group */ + + bne,pn %icc, 29b /* CTI */ + stx %o3, [%o0-8] /* Store */ + sub %o0, 1, %g6 /* IEU0 Group */ + ba,pt %xcc, 4b /* CTI */ + + andncc %o2, 31, %g3 /* IEU1 */ +30: subcc %o0, 2, %g6 /* IEU0 */ + ba,pt %xcc, 3b /* CTI */ + sllx %o4, 8, %g3 /* IEU0 Group */ + +31: sllx %g6, 16, %g3 /* IEU0 Group */ + ba,pt %xcc, 3b /* CTI */ + sub %o0, 3, %g6 /* IEU1 */ +32: subcc %o0, 4, %g6 /* IEU1 Group */ + + ba,pt %xcc, 3b /* CTI */ + sllx %o4, 24, %g3 /* IEU0 */ +33: sllx %g6, 32, %g3 /* IEU0 Group */ + ba,pt %xcc, 3b /* CTI */ + + sub %o0, 5, %g6 /* IEU1 */ +34: subcc %o0, 6, %g6 /* IEU1 Group */ + ba,pt %xcc, 3b /* CTI */ + sllx %o4, 40, %g3 /* IEU0 */ + +35: sllx %g6, 48, %g3 /* IEU0 Group */ + ba,pt %xcc, 3b /* CTI */ + sub %o0, 7, %g6 /* IEU1 */ +36: subcc %o0, 8, %g6 /* IEU1 Group */ + + ba,pt %xcc, 3b /* CTI */ + sllx %o4, 56, %g3 /* IEU0 */ +END(__stpncpy) + +libc_hidden_def (__stpncpy) +weak_alias (__stpncpy, stpncpy) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/strcat.S b/REORG.TODO/sysdeps/sparc/sparc64/strcat.S new file mode 100644 index 0000000000..151a4e8853 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/strcat.S @@ -0,0 +1,339 @@ +/* strcat (dest, src) -- Append SRC on the end of DEST. + For SPARC v9. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek <jj@ultra.linux.cz> and + Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <asm/asi.h> +#ifndef XCC +#define XCC xcc +#define USE_BPR + .register %g2, #scratch + .register %g3, #scratch + .register %g6, #scratch +#endif + + /* Normally, this uses + ((xword - 0x0101010101010101) & 0x8080808080808080) test + to find out if any byte in xword could be zero. This is fast, but + also gives false alarm for any byte in range 0x81-0xff. It does + not matter for correctness, as if this test tells us there could + be some zero byte, we check it byte by byte, but if bytes with + high bits set are common in the strings, then this will give poor + performance. You can #define EIGHTBIT_NOT_RARE and the algorithm + will use one tick slower, but more precise test + ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080), + which does not give any false alarms (but if some bits are set, + one cannot assume from it which bytes are zero and which are not). + It is yet to be measured, what is the correct default for glibc + in these days for an average user. + */ + + .text + .align 32 +ENTRY(strcat) + sethi %hi(0x01010101), %g1 /* IEU0 Group */ + ldub [%o0], %o3 /* Load */ + or %g1, %lo(0x01010101), %g1 /* IEU0 Group */ + mov %o0, %g6 /* IEU1 */ + + sllx %g1, 32, %g2 /* IEU0 Group */ + andcc %o0, 7, %g0 /* IEU1 */ + or %g1, %g2, %g1 /* IEU0 Group */ + bne,pn %icc, 32f /* CTI */ + + sllx %g1, 7, %g2 /* IEU0 Group */ + brz,pn %o3, 30f /* CTI+IEU1 */ + ldx [%o0], %o3 /* Load */ +48: add %o0, 8, %o0 /* IEU0 Group */ + +49: sub %o3, %g1, %o2 /* IEU0 Group */ +#ifdef EIGHTBIT_NOT_RARE + andn %o2, %o3, %g5 /* IEU0 Group */ + ldxa [%o0] ASI_PNF, %o3 /* Load */ + andcc %g5, %g2, %g0 /* IEU1 Group */ +#else + ldxa [%o0] ASI_PNF, %o3 /* Load */ + andcc %o2, %g2, %g0 /* IEU1 Group */ +#endif + be,pt %xcc, 49b /* CTI */ + + add %o0, 8, %o0 /* IEU0 */ + addcc %o2, %g1, %g3 /* IEU1 Group */ + srlx %o2, 32, %o2 /* IEU0 */ +50: andcc %o2, %g2, %g0 /* IEU1 Group */ + + be,pn %xcc, 51f /* CTI */ + srlx %g3, 56, %o2 /* IEU0 */ + andcc %o2, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 29f /* CTI */ + + srlx %g3, 48, %o2 /* IEU0 */ + andcc %o2, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 28f /* CTI */ + srlx %g3, 40, %o2 /* IEU0 */ + + andcc %o2, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 27f /* CTI */ + srlx %g3, 32, %o2 /* IEU0 */ + andcc %o2, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 26f /* CTI */ +51: srlx %g3, 24, %o2 /* IEU0 */ + andcc %o2, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 25f /* CTI */ + + srlx %g3, 16, %o2 /* IEU0 */ + andcc %o2, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 24f /* CTI */ + srlx %g3, 8, %o2 /* IEU0 */ + + andcc %o2, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 23f /* CTI */ + sub %o3, %g1, %o2 /* IEU0 */ + andcc %g3, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 52f /* CTI */ + ldxa [%o0] ASI_PNF, %o3 /* Load */ + andcc %o2, %g2, %g0 /* IEU1 Group */ + be,pt %xcc, 49b /* CTI */ + + add %o0, 8, %o0 /* IEU0 */ + addcc %o2, %g1, %g3 /* IEU1 Group */ + ba,pt %xcc, 50b /* CTI */ + srlx %o2, 32, %o2 /* IEU0 */ + + .align 16 +52: ba,pt %xcc, 12f /* CTI Group */ + add %o0, -9, %o0 /* IEU0 */ +23: ba,pt %xcc, 12f /* CTI Group */ + add %o0, -10, %o0 /* IEU0 */ + +24: ba,pt %xcc, 12f /* CTI Group */ + add %o0, -11, %o0 /* IEU0 */ +25: ba,pt %xcc, 12f /* CTI Group */ + add %o0, -12, %o0 /* IEU0 */ + +26: ba,pt %xcc, 12f /* CTI Group */ + add %o0, -13, %o0 /* IEU0 */ +27: ba,pt %xcc, 12f /* CTI Group */ + add %o0, -14, %o0 /* IEU0 */ + +28: ba,pt %xcc, 12f /* CTI Group */ + add %o0, -15, %o0 /* IEU0 */ +29: add %o0, -16, %o0 /* IEU0 Group */ +30: andcc %o1, 7, %g3 /* IEU1 */ + +31: bne,pn %icc, 14f /* CTI */ + orcc %g0, 64, %g4 /* IEU1 Group */ +1: ldx [%o1], %o3 /* Load */ + add %o1, 8, %o1 /* IEU1 */ + +2: mov %o3, %g3 /* IEU0 Group */ +3: sub %o3, %g1, %o2 /* IEU1 */ + ldxa [%o1] ASI_PNF, %o3 /* Load */ +#ifdef EIGHTBIT_NOT_RARE + andn %o2, %g3, %o2 /* IEU0 Group */ +#endif + add %o0, 8, %o0 /* IEU0 Group */ + + andcc %o2, %g2, %g0 /* IEU1 */ + add %o1, 8, %o1 /* IEU0 Group */ + be,a,pt %xcc, 2b /* CTI */ + stx %g3, [%o0 - 8] /* Store */ + + srlx %g3, 56, %g5 /* IEU0 Group */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 11f /* CTI */ + srlx %g3, 48, %g4 /* IEU0 */ + + andcc %g4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 10f /* CTI */ + srlx %g3, 40, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 9f /* CTI */ + srlx %g3, 32, %g4 /* IEU0 */ + andcc %g4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 8f /* CTI */ + + srlx %g3, 24, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 7f /* CTI */ + srlx %g3, 16, %g4 /* IEU0 */ + + andcc %g4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 6f /* CTI */ + srlx %g3, 8, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 5f /* CTI */ + sub %o3, %g1, %o2 /* IEU0 */ + stx %g3, [%o0 - 8] /* Store Group */ + andcc %g3, 0xff, %g0 /* IEU1 */ + + bne,pt %icc, 3b /* CTI */ + mov %o3, %g3 /* IEU0 Group */ +4: retl /* CTI+IEU1 Group */ + mov %g6, %o0 /* IEU0 */ + + .align 16 +5: stb %g5, [%o0 - 2] /* Store Group */ + srlx %g3, 16, %g4 /* IEU0 */ +6: sth %g4, [%o0 - 4] /* Store Group */ + srlx %g3, 32, %g4 /* IEU0 */ + + stw %g4, [%o0 - 8] /* Store Group */ + retl /* CTI+IEU1 Group */ + mov %g6, %o0 /* IEU0 */ +7: stb %g5, [%o0 - 4] /* Store Group */ + + srlx %g3, 32, %g4 /* IEU0 */ +8: stw %g4, [%o0 - 8] /* Store Group */ + retl /* CTI+IEU1 Group */ + mov %g6, %o0 /* IEU0 */ + +9: stb %g5, [%o0 - 6] /* Store Group */ + srlx %g3, 48, %g4 /* IEU0 */ +10: sth %g4, [%o0 - 8] /* Store Group */ + retl /* CTI+IEU1 Group */ + + mov %g6, %o0 /* IEU0 */ +11: stb %g5, [%o0 - 8] /* Store Group */ + retl /* CTI+IEU1 Group */ + mov %g6, %o0 /* IEU0 */ + + .align 16 +32: andcc %o0, 7, %g0 /* IEU1 Group */ + be,a,pn %icc, 48b /* CTI */ + ldx [%o0], %o3 /* Load */ + add %o0, 1, %o0 /* IEU0 Group */ + + brnz,a,pt %o3, 32b /* CTI+IEU1 */ + lduba [%o0] ASI_PNF, %o3 /* Load */ + add %o0, -1, %o0 /* IEU0 Group */ + andcc %o0, 7, %g0 /* IEU1 Group */ + + be,a,pn %icc, 31b /* CTI */ + andcc %o1, 7, %g3 /* IEU1 Group */ +12: ldub [%o1], %o3 /* Load */ + stb %o3, [%o0] /* Store Group */ + +13: add %o0, 1, %o0 /* IEU0 */ + add %o1, 1, %o1 /* IEU1 */ + andcc %o3, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 4b /* CTI */ + + lduba [%o1] ASI_PNF, %o3 /* Load */ + andcc %o0, 7, %g0 /* IEU1 Group */ + bne,a,pt %icc, 13b /* CTI */ + stb %o3, [%o0] /* Store */ + + andcc %o1, 7, %g3 /* IEU1 Group */ + be,a,pt %icc, 1b /* CTI */ + ldx [%o1], %o3 /* Load */ + orcc %g0, 64, %g4 /* IEU1 Group */ + +14: sllx %g3, 3, %g5 /* IEU0 */ + sub %o1, %g3, %o1 /* IEU0 Group */ + sub %g4, %g5, %g4 /* IEU1 */ + /* %g1 = 0101010101010101 * + * %g2 = 8080808080808080 * + * %g3 = source alignment * + * %g5 = number of bits to shift left * + * %g4 = number of bits to shift right */ + ldxa [%o1] ASI_PNF, %o5 /* Load Group */ + + addcc %o1, 8, %o1 /* IEU1 */ +15: sllx %o5, %g5, %o3 /* IEU0 Group */ + ldxa [%o1] ASI_PNF, %o5 /* Load */ + srlx %o5, %g4, %o4 /* IEU0 Group */ + + add %o0, 8, %o0 /* IEU1 */ + or %o3, %o4, %o3 /* IEU0 Group */ + add %o1, 8, %o1 /* IEU1 */ + sub %o3, %g1, %o4 /* IEU0 Group */ + +#ifdef EIGHTBIT_NOT_RARE + andn %o4, %o3, %o4 /* IEU0 Group */ +#endif + andcc %o4, %g2, %g0 /* IEU1 Group */ + be,a,pt %xcc, 15b /* CTI */ + stx %o3, [%o0 - 8] /* Store */ + srlx %o3, 56, %o4 /* IEU0 Group */ + + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 22f /* CTI */ + srlx %o3, 48, %o4 /* IEU0 */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 21f /* CTI */ + srlx %o3, 40, %o4 /* IEU0 */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 20f /* CTI */ + + srlx %o3, 32, %o4 /* IEU0 */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 19f /* CTI */ + srlx %o3, 24, %o4 /* IEU0 */ + + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 18f /* CTI */ + srlx %o3, 16, %o4 /* IEU0 */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 17f /* CTI */ + srlx %o3, 8, %o4 /* IEU0 */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 16f /* CTI */ + + andcc %o3, 0xff, %g0 /* IEU1 Group */ + bne,pn %icc, 15b /* CTI */ + stx %o3, [%o0 - 8] /* Store */ + retl /* CTI+IEU1 Group */ + + mov %g6, %o0 /* IEU0 */ + + .align 16 +16: srlx %o3, 8, %o4 /* IEU0 Group */ + stb %o4, [%o0 - 2] /* Store */ +17: srlx %o3, 16, %o4 /* IEU0 Group */ + stb %o4, [%o0 - 3] /* Store */ + +18: srlx %o3, 24, %o4 /* IEU0 Group */ + stb %o4, [%o0 - 4] /* Store */ +19: srlx %o3, 32, %o4 /* IEU0 Group */ + stw %o4, [%o0 - 8] /* Store */ + + retl /* CTI+IEU1 Group */ + mov %g6, %o0 /* IEU0 */ + nop + nop + +20: srlx %o3, 40, %o4 /* IEU0 Group */ + stb %o4, [%o0 - 6] /* Store */ +21: srlx %o3, 48, %o4 /* IEU0 Group */ + stb %o4, [%o0 - 7] /* Store */ + +22: srlx %o3, 56, %o4 /* IEU0 Group */ + stb %o4, [%o0 - 8] /* Store */ + retl /* CTI+IEU1 Group */ + mov %g6, %o0 /* IEU0 */ +END(strcat) +libc_hidden_builtin_def (strcat) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/strchr.S b/REORG.TODO/sysdeps/sparc/sparc64/strchr.S new file mode 100644 index 0000000000..31b9e58d76 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/strchr.S @@ -0,0 +1,482 @@ +/* strchr (str, ch) -- Return pointer to first occurrence of CH in STR. + For SPARC v9. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and + Jakub Jelinek <jj@ultra.linux.cz>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <asm/asi.h> +#ifndef XCC +#define XCC xcc +#define USE_BPR + .register %g2, #scratch + .register %g3, #scratch + .register %g6, #scratch +#endif + + /* Normally, this uses + ((xword - 0x0101010101010101) & 0x8080808080808080) test + to find out if any byte in xword could be zero. This is fast, but + also gives false alarm for any byte in range 0x81-0xff. It does + not matter for correctness, as if this test tells us there could + be some zero byte, we check it byte by byte, but if bytes with + high bits set are common in the strings, then this will give poor + performance. You can #define EIGHTBIT_NOT_RARE and the algorithm + will use one tick slower, but more precise test + ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080), + which does not give any false alarms (but if some bits are set, + one cannot assume from it which bytes are zero and which are not). + It is yet to be measured, what is the correct default for glibc + in these days for an average user. + */ + + .text + .align 32 +ENTRY(strchr) + andcc %o1, 0xff, %o1 /* IEU1 Group */ + be,pn %icc, 17f /* CTI */ + sllx %o1, 8, %g3 /* IEU0 Group */ + sethi %hi(0x01010101), %g1 /* IEU1 */ + + or %g3, %o1, %g3 /* IEU0 Group */ + ldub [%o0], %o3 /* Load */ + sllx %g3, 16, %g5 /* IEU0 Group */ + or %g1, %lo(0x01010101), %g1 /* IEU1 */ + + sllx %g1, 32, %g2 /* IEU0 Group */ + brz,pn %o3, 5f /* CTI+IEU1 */ + orcc %g3, %g5, %g3 /* IEU1 Group */ + sllx %g3, 32, %g5 /* IEU0 */ + + cmp %o3, %o1 /* IEU1 Group */ + be,pn %xcc, 14f /* CTI */ + or %g1, %g2, %g1 /* IEU0 */ + andcc %o0, 7, %g0 /* IEU1 Group */ + + bne,a,pn %icc, 15f /* CTI */ + add %o0, 1, %o0 /* IEU0 */ + ldx [%o0], %o3 /* Load Group */ +1: sllx %g1, 7, %g2 /* IEU0 */ + + or %g3, %g5, %g3 /* IEU1 */ + add %o0, 8, %o0 /* IEU0 Group */ + xor %o3, %g3, %o4 /* IEU1 */ + /* %g1 = 0101010101010101 * + * %g2 = 8080088080808080 * + * %g3 = c c c c c c c c * + * %o3 = value * + * %o4 = value XOR c */ +2: sub %o3, %g1, %o2 /* IEU0 Group */ + + sub %o4, %g1, %o5 /* IEU1 */ +#ifdef EIGHTBIT_NOT_RARE + andn %o2, %o3, %g6 /* IEU0 Group */ + andn %o5, %o4, %o5 /* IEU1 */ + ldxa [%o0] ASI_PNF, %o3 /* Load */ + or %o5, %g6, %o5 /* IEU0 Group */ +#else + ldxa [%o0] ASI_PNF, %o3 /* Load */ + or %o5, %o2, %o5 /* IEU0 Group */ +#endif + add %o0, 8, %o0 /* IEU1 */ + + andcc %o5, %g2, %g0 /* IEU1 Group */ + be,a,pt %xcc, 2b /* CTI */ + xor %o3, %g3, %o4 /* IEU0 */ + srlx %o5, 32, %g5 /* IEU0 Group */ + + add %o2, %g1, %o2 /* IEU1 */ +3: andcc %g5, %g2, %g0 /* IEU1 Group */ + be,pn %xcc, 4f /* CTI */ + srlx %o2, 56, %g5 /* IEU0 */ + + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 5f /* CTI */ + srlx %o4, 56, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 6f /* CTI */ + srlx %o2, 48, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 5f /* CTI */ + + srlx %o4, 48, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 7f /* CTI */ + srlx %o2, 40, %g5 /* IEU0 */ + + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 5f /* CTI */ + srlx %o4, 40, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 8f /* CTI */ + srlx %o2, 32, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 5f /* CTI */ + + srlx %o4, 32, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 9f /* CTI */ +4: srlx %o2, 24, %g5 /* IEU0 */ + + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 5f /* CTI */ + srlx %o4, 24, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 10f /* CTI */ + srlx %o2, 16, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 5f /* CTI */ + + srlx %o4, 16, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 11f /* CTI */ + srlx %o2, 8, %g5 /* IEU0 */ + + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 5f /* CTI */ + srlx %o4, 8, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 12f /* CTI */ + andcc %o2, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 5f /* CTI */ + sub %o3, %g1, %o2 /* IEU0 */ + + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 13f /* CTI */ + xor %o3, %g3, %o4 /* IEU0 */ + ldxa [%o0] ASI_PNF, %o3 /* Load Group */ + + sub %o4, %g1, %o5 /* IEU0 */ + or %o5, %o2, %o5 /* IEU1 */ + add %o0, 8, %o0 /* IEU0 Group */ + andcc %o5, %g2, %g0 /* IEU1 */ + + be,a,pt %xcc, 2b /* CTI */ + xor %o3, %g3, %o4 /* IEU0 Group */ + srlx %o5, 32, %g5 /* IEU0 Group */ + ba,pt %xcc, 3b /* CTI */ + + add %o2, %g1, %o2 /* IEU1 */ + + .align 16 +5: retl /* CTI+IEU1 Group */ + clr %o0 /* IEU0 */ +6: retl /* CTI+IEU1 Group */ + add %o0, -16, %o0 /* IEU0 */ + +7: retl /* CTI+IEU1 Group */ + add %o0, -15, %o0 /* IEU0 */ +8: retl /* CTI+IEU1 Group */ + add %o0, -14, %o0 /* IEU0 */ + +9: retl /* CTI+IEU1 Group */ + add %o0, -13, %o0 /* IEU0 */ +10: retl /* CTI+IEU1 Group */ + add %o0, -12, %o0 /* IEU0 */ + +11: retl /* CTI+IEU1 Group */ + add %o0, -11, %o0 /* IEU0 */ +12: retl /* CTI+IEU1 Group */ + add %o0, -10, %o0 /* IEU0 */ + +13: retl /* CTI+IEU1 Group */ + add %o0, -9, %o0 /* IEU0 */ +14: retl /* CTI+IEU1 Group */ + nop /* IEU0 */ + + .align 16 +15: ldub [%o0], %o3 /* Load Group */ +16: andcc %o0, 7, %g0 /* IEU1 */ + be,a,pn %icc, 1b /* CTI */ + ldx [%o0], %o3 /* Load Group */ + + andcc %o3, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 5b /* CTI */ + add %o0, 1, %o0 /* IEU0 */ + cmp %o3, %o1 /* IEU1 Group */ + + bne,a,pn %icc, 16b /* CTI */ + ldub [%o0], %o3 /* Load */ + retl /* CTI+IEU1 Group */ + add %o0, -1, %o0 /* IEU0 */ + + /* strchr (str, 0) */ + .align 32 + nop + .align 16 +17: sethi %hi(0x01010101), %g1 /* IEU0 Group */ + ldub [%o0], %o3 /* Load */ + or %g1, %lo(0x01010101), %g1 /* IEU0 Group */ + sllx %g1, 32, %g2 /* IEU0 Group */ + + andcc %o0, 7, %g0 /* IEU1 */ + or %g1, %g2, %g1 /* IEU0 Group */ + bne,pn %icc, 32f /* CTI */ + sllx %g1, 7, %g2 /* IEU0 Group */ + + brz,pn %o3, 30f /* CTI+IEU1 */ + ldx [%o0], %o3 /* Load */ +18: add %o0, 8, %o0 /* IEU0 Group */ +19: sub %o3, %g1, %o2 /* IEU0 Group */ + +#ifdef EIGHTBIT_NOT_RARE + andn %o2, %o3, %g6 /* IEU0 Group */ + ldxa [%o0] ASI_PNF, %o3 /* Load */ + andcc %g6, %g2, %g0 /* IEU1 Group */ +#else + ldxa [%o0] ASI_PNF, %o3 /* Load */ + andcc %o2, %g2, %g0 /* IEU1 Group */ +#endif + be,pt %xcc, 19b /* CTI */ + add %o0, 8, %o0 /* IEU0 */ + + addcc %o2, %g1, %g3 /* IEU1 Group */ + srlx %o2, 32, %o2 /* IEU0 */ +20: andcc %o2, %g2, %g0 /* IEU1 Group */ + be,pn %xcc, 21f /* CTI */ + + srlx %g3, 56, %o2 /* IEU0 */ + andcc %o2, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 29f /* CTI */ + srlx %g3, 48, %o2 /* IEU0 */ + + andcc %o2, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 28f /* CTI */ + srlx %g3, 40, %o2 /* IEU0 */ + andcc %o2, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 27f /* CTI */ + srlx %g3, 32, %o2 /* IEU0 */ + andcc %o2, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 26f /* CTI */ + +21: srlx %g3, 24, %o2 /* IEU0 */ + andcc %o2, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 25f /* CTI */ + srlx %g3, 16, %o2 /* IEU0 */ + + andcc %o2, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 24f /* CTI */ + srlx %g3, 8, %o2 /* IEU0 */ + andcc %o2, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 23f /* CTI */ + sub %o3, %g1, %o2 /* IEU0 */ + andcc %g3, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 22f /* CTI */ + + ldxa [%o0] ASI_PNF, %o3 /* Load */ + andcc %o2, %g2, %g0 /* IEU1 Group */ + be,pt %xcc, 19b /* CTI */ + add %o0, 8, %o0 /* IEU0 */ + + addcc %o2, %g1, %g3 /* IEU1 Group */ + ba,pt %xcc, 20b /* CTI */ + srlx %o2, 32, %o2 /* IEU0 */ + + .align 16 +22: retl /* CTI+IEU1 Group */ + add %o0, -9, %o0 /* IEU0 */ +23: retl /* CTI+IEU1 Group */ + add %o0, -10, %o0 /* IEU0 */ + +24: retl /* CTI+IEU1 Group */ + add %o0, -11, %o0 /* IEU0 */ +25: retl /* CTI+IEU1 Group */ + add %o0, -12, %o0 /* IEU0 */ + +26: retl /* CTI+IEU1 Group */ + add %o0, -13, %o0 /* IEU0 */ +27: retl /* CTI+IEU1 Group */ + add %o0, -14, %o0 /* IEU0 */ + +28: retl /* CTI+IEU1 Group */ + add %o0, -15, %o0 /* IEU0 */ +29: retl /* CTI+IEU1 Group */ + add %o0, -16, %o0 /* IEU0 */ + +30: retl /* CTI+IEU1 Group */ + nop /* IEU0 */ + + .align 16 +32: andcc %o0, 7, %g0 /* IEU1 Group */ + be,a,pn %icc, 18b /* CTI */ + ldx [%o0], %o3 /* Load */ + add %o0, 1, %o0 /* IEU0 Group */ + + brnz,a,pt %o3, 32b /* CTI+IEU1 */ + lduba [%o0] ASI_PNF, %o3 /* Load */ + retl /* CTI+IEU1 Group */ + add %o0, -1, %o0 /* IEU0 */ +END(strchr) + + .align 32 +ENTRY(strrchr) + andcc %o1, 0xff, %o1 /* IEU1 Group */ + be,pn %icc, 17b /* CTI */ + clr %g4 /* IEU0 */ + andcc %o0, 7, %g0 /* IEU1 Group */ + + bne,pn %icc, 13f /* CTI */ + sllx %o1, 8, %g3 /* IEU0 */ + ldx [%o0], %o3 /* Load Group */ +1: sethi %hi(0x01010101), %g1 /* IEU0 */ + + or %g3, %o1, %g3 /* IEU1 */ + sllx %g3, 16, %g5 /* IEU0 Group */ + or %g1, %lo(0x01010101), %g1 /* IEU1 */ + sllx %g1, 32, %g2 /* IEU0 Group */ + + or %g3, %g5, %g3 /* IEU1 */ + sllx %g3, 32, %g5 /* IEU0 Group */ + or %g1, %g2, %g1 /* IEU1 */ + sllx %g1, 7, %g2 /* IEU0 Group */ + + or %g3, %g5, %g3 /* IEU1 */ + add %o0, 8, %o0 /* IEU0 Group */ + xor %o3, %g3, %o4 /* IEU1 */ + /* %g1 = 0101010101010101 * + * %g2 = 8080088080808080 * + * %g3 = c c c c c c c c * + * %o3 = value * + * %o4 = value XOR c */ +2: sub %o3, %g1, %o2 /* IEU0 Group */ + +3: sub %o4, %g1, %o5 /* IEU1 */ +#ifdef EIGHTBIT_NOT_RARE + andn %o2, %o3, %g6 /* IEU0 Group */ + andn %o5, %o4, %o5 /* IEU1 */ + ldxa [%o0] ASI_PNF, %o3 /* Load */ + + or %o5, %g6, %o5 /* IEU0 Group */ +#else + ldxa [%o0] ASI_PNF, %o3 /* Load */ + + or %o5, %o2, %o5 /* IEU0 Group */ +#endif + add %o0, 8, %o0 /* IEU1 */ + andcc %o5, %g2, %g0 /* IEU1 Group */ + be,a,pt %xcc, 2b /* CTI */ + + xor %o3, %g3, %o4 /* IEU0 */ + srlx %o5, 32, %g5 /* IEU0 Group */ + add %o2, %g1, %o2 /* IEU1 */ + andcc %g5, %g2, %g0 /* IEU1 Group */ + + be,pn %xcc, 7f /* CTI */ + srlx %o2, 56, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 12f /* CTI */ + + srlx %o4, 56, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + srlx %o2, 48, %g5 /* IEU0 */ + be,a,pn %icc, 4f /* CTI */ + + add %o0, -16, %g4 /* IEU0 Group */ +4: andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 12f /* CTI */ + srlx %o4, 48, %g5 /* IEU0 */ + + andcc %g5, 0xff, %g0 /* IEU1 Group */ + srlx %o2, 40, %g5 /* IEU0 */ + be,a,pn %icc, 5f /* CTI */ + add %o0, -15, %g4 /* IEU0 Group */ + +5: andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 12f /* CTI */ + srlx %o4, 40, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + + srlx %o2, 32, %g5 /* IEU0 */ + be,a,pn %icc, 6f /* CTI */ + add %o0, -14, %g4 /* IEU0 Group */ +6: andcc %g5, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 12f /* CTI */ + srlx %o4, 32, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,a,pn %icc, 7f /* CTI */ + + add %o0, -13, %g4 /* IEU0 */ +7: srlx %o2, 24, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 12f /* CTI */ + + srlx %o4, 24, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + srlx %o2, 16, %g5 /* IEU0 */ + be,a,pn %icc, 8f /* CTI */ + + add %o0, -12, %g4 /* IEU0 Group */ +8: andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 12f /* CTI */ + srlx %o4, 16, %g5 /* IEU0 */ + + andcc %g5, 0xff, %g0 /* IEU1 Group */ + srlx %o2, 8, %g5 /* IEU0 */ + be,a,pn %icc, 9f /* CTI */ + add %o0, -11, %g4 /* IEU0 Group */ + +9: andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 12f /* CTI */ + srlx %o4, 8, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + + be,a,pn %icc, 10f /* CTI */ + add %o0, -10, %g4 /* IEU0 */ +10: andcc %o2, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 12f /* CTI */ + + sub %o3, %g1, %o2 /* IEU0 */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,a,pn %icc, 11f /* CTI */ + add %o0, -9, %g4 /* IEU0 */ + +11: ba,pt %xcc, 3b /* CTI Group */ + xor %o3, %g3, %o4 /* IEU0 Group */ +12: retl /* CTI+IEU1 Group */ + mov %g4, %o0 /* IEU0 */ + + .align 16 +13: ldub [%o0], %o3 /* Load Group */ + add %o0, 1, %o0 /* IEU0 */ +14: andcc %o3, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 12b /* CTI */ + + cmp %o3, %o1 /* IEU1 Group */ + ldub [%o0], %o3 /* Load */ + be,a,pn %icc, 15f /* CTI */ + add %o0, -1, %g4 /* IEU0 Group */ + +15: andcc %o0, 7, %g0 /* IEU1 Group */ + bne,a,pt %icc, 14b /* CTI */ + add %o0, 1, %o0 /* IEU0 */ + ba,pt %xcc, 1b /* CTI Group */ + + ldx [%o0], %o3 /* Load */ +END(strrchr) + +weak_alias (strchr, index) +weak_alias (strrchr, rindex) +libc_hidden_builtin_def (strchr) +libc_hidden_builtin_def (strrchr) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/strcmp.S b/REORG.TODO/sysdeps/sparc/sparc64/strcmp.S new file mode 100644 index 0000000000..d4b0a22f18 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/strcmp.S @@ -0,0 +1,232 @@ +/* Compare two strings for differences. + For SPARC v9. + Copyright (C) 2011-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller <davem@davemloft.net> + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <asm/asi.h> + +#ifndef XCC + .register %g2, #scratch + .register %g3, #scratch + .register %g6, #scratch +#endif + +#define rSTR1 %o0 +#define rSTR2 %o1 +#define r0101 %o2 /* 0x0101010101010101 */ +#define r8080 %o3 /* 0x8080808080808080 */ +#define rSTRXOR %o4 +#define rWORD1 %o5 +#define rTMP1 %g1 +#define rTMP2 %g2 +#define rWORD2 %g3 +#define rSLL %g4 +#define rSRL %g5 +#define rBARREL %g6 + + /* There are two cases, either the two pointers are aligned + * identically or they are not. If they have the same + * alignment we can use the normal full speed loop. Otherwise + * we have to use the barrel-shifter version. + */ + + .text + .align 32 +ENTRY(strcmp) + or rSTR2, rSTR1, rTMP1 + sethi %hi(0x80808080), r8080 + + andcc rTMP1, 0x7, %g0 + bne,pn %icc, .Lmaybe_barrel_shift + or r8080, %lo(0x80808080), r8080 + ldx [rSTR1], rWORD1 + + sub rSTR2, rSTR1, rSTR2 + sllx r8080, 32, rTMP1 + + ldx [rSTR1 + rSTR2], rWORD2 + or r8080, rTMP1, r8080 + + ba,pt %xcc, .Laligned_loop_entry + srlx r8080, 7, r0101 + + .align 32 +.Laligned_loop_entry: +.Laligned_loop: + add rSTR1, 8, rSTR1 + + sub rWORD1, r0101, rTMP2 + xorcc rWORD1, rWORD2, rSTRXOR + bne,pn %xcc, .Lcommon_endstring + + andn r8080, rWORD1, rTMP1 + + ldxa [rSTR1] ASI_PNF, rWORD1 + andcc rTMP1, rTMP2, %g0 + be,a,pt %xcc, .Laligned_loop + + ldxa [rSTR1 + rSTR2] ASI_PNF, rWORD2 + +.Lcommon_equal: + retl + mov 0, %o0 + + /* All loops terminate here once they find an unequal word. + * If a zero byte appears in the word before the first unequal + * byte, we must report zero. Otherwise we report '1' or '-1' + * depending upon whether the first mis-matching byte is larger + * in the first string or the second, respectively. + * + * First we compute a 64-bit mask value that has "0x01" in + * each byte where a zero exists in rWORD1. rSTRXOR holds the + * value (rWORD1 ^ rWORD2). Therefore, if considered as an + * unsigned quantity, our "0x01" mask value is "greater than" + * rSTRXOR then a zero terminating byte comes first and + * therefore we report '0'. + * + * The formula for this mask is: + * + * mask_tmp1 = ~rWORD1 & 0x8080808080808080; + * mask_tmp2 = ((rWORD1 & 0x7f7f7f7f7f7f7f7f) + + * 0x7f7f7f7f7f7f7f7f); + * + * mask = ((mask_tmp1 & ~mask_tmp2) >> 7); + */ +.Lcommon_endstring: + andn rWORD1, r8080, rTMP2 + or r8080, 1, %o1 + + mov 1, %o0 + sub rTMP2, %o1, rTMP2 + + cmp rWORD1, rWORD2 + andn rTMP1, rTMP2, rTMP1 + + movleu %xcc, -1, %o0 + srlx rTMP1, 7, rTMP1 + + /* In order not to be influenced by bytes after the zero byte, we + * have to retain only the highest bit in the mask for the comparison + * with rSTRXOR to work properly. + */ + mov 0, rTMP2 + andcc rTMP1, 0x0100, %g0 + + movne %xcc, 8, rTMP2 + sllx rTMP1, 63 - 16, %o1 + + movrlz %o1, 16, rTMP2 + sllx rTMP1, 63 - 24, %o1 + + movrlz %o1, 24, rTMP2 + sllx rTMP1, 63 - 32, %o1 + + movrlz %o1, 32, rTMP2 + sllx rTMP1, 63 - 40, %o1 + + movrlz %o1, 40, rTMP2 + sllx rTMP1, 63 - 48, %o1 + + movrlz %o1, 48, rTMP2 + sllx rTMP1, 63 - 56, %o1 + + movrlz %o1, 56, rTMP2 + + srlx rTMP1, rTMP2, rTMP1 + + sllx rTMP1, rTMP2, rTMP1 + + cmp rTMP1, rSTRXOR + retl + movgu %xcc, 0, %o0 + +.Lmaybe_barrel_shift: + sub rSTR2, rSTR1, rSTR2 + sllx r8080, 32, rTMP1 + + or r8080, rTMP1, r8080 + and rSTR1, 0x7, rTMP2 + + srlx r8080, 7, r0101 + andn rSTR1, 0x7, rSTR1 + + ldxa [rSTR1] ASI_PNF, rWORD1 + andcc rSTR2, 0x7, rSLL + sll rTMP2, 3, rSTRXOR + + bne,pn %icc, .Lneed_barrel_shift + mov -1, rTMP1 + ldxa [rSTR1 + rSTR2] ASI_PNF, rBARREL + + srlx rTMP1, rSTRXOR, rTMP2 + + orn rWORD1, rTMP2, rWORD1 + ba,pt %xcc, .Laligned_loop_entry + orn rBARREL, rTMP2, rWORD2 + +.Lneed_barrel_shift: + sllx rSLL, 3, rSLL + andn rSTR2, 0x7, rSTR2 + + ldxa [rSTR1 + rSTR2] ASI_PNF, rBARREL + mov 64, rTMP2 + sub rTMP2, rSLL, rSRL + + srlx rTMP1, rSTRXOR, rTMP1 + add rSTR2, 8, rSTR2 + + orn rWORD1, rTMP1, rWORD1 + sllx rBARREL, rSLL, rWORD2 + ldxa [rSTR1 + rSTR2] ASI_PNF, rBARREL + + add rSTR1, 8, rSTR1 + sub rWORD1, r0101, rTMP2 + + srlx rBARREL, rSRL, rSTRXOR + + or rWORD2, rSTRXOR, rWORD2 + + orn rWORD2, rTMP1, rWORD2 + ba,pt %xcc, .Lbarrel_shift_loop_entry + andn r8080, rWORD1, rTMP1 + +.Lbarrel_shift_loop: + sllx rBARREL, rSLL, rWORD2 + ldxa [rSTR1 + rSTR2] ASI_PNF, rBARREL + + add rSTR1, 8, rSTR1 + sub rWORD1, r0101, rTMP2 + + srlx rBARREL, rSRL, rSTRXOR + andn r8080, rWORD1, rTMP1 + + or rWORD2, rSTRXOR, rWORD2 + +.Lbarrel_shift_loop_entry: + xorcc rWORD1, rWORD2, rSTRXOR + bne,pn %xcc, .Lcommon_endstring + + andcc rTMP1, rTMP2, %g0 + be,a,pt %xcc, .Lbarrel_shift_loop + ldxa [rSTR1] ASI_PNF, rWORD1 + + retl + mov 0, %o0 +END(strcmp) +libc_hidden_builtin_def (strcmp) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/strcpy.S b/REORG.TODO/sysdeps/sparc/sparc64/strcpy.S new file mode 100644 index 0000000000..8732809f2b --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/strcpy.S @@ -0,0 +1,244 @@ +/* Copy SRC to DEST returning DEST. + For SPARC v9. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and + Jakub Jelinek <jj@ultra.linux.cz>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <asm/asi.h> +#ifndef XCC + .register %g2, #scratch + .register %g3, #scratch + .register %g6, #scratch +#endif + + /* Normally, this uses + ((xword - 0x0101010101010101) & 0x8080808080808080) test + to find out if any byte in xword could be zero. This is fast, but + also gives false alarm for any byte in range 0x81-0xff. It does + not matter for correctness, as if this test tells us there could + be some zero byte, we check it byte by byte, but if bytes with + high bits set are common in the strings, then this will give poor + performance. You can #define EIGHTBIT_NOT_RARE and the algorithm + will use one tick slower, but more precise test + ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080), + which does not give any false alarms (but if some bits are set, + one cannot assume from it which bytes are zero and which are not). + It is yet to be measured, what is the correct default for glibc + in these days for an average user. + */ + + .text + .align 32 +ENTRY(strcpy) + sethi %hi(0x01010101), %g1 /* IEU0 Group */ + mov %o0, %g6 /* IEU1 */ + or %g1, %lo(0x01010101), %g1 /* IEU0 Group */ + andcc %o0, 7, %g0 /* IEU1 */ + + sllx %g1, 32, %g2 /* IEU0 Group */ + bne,pn %icc, 12f /* CTI */ + andcc %o1, 7, %g3 /* IEU1 */ + or %g1, %g2, %g1 /* IEU0 Group */ + + bne,pn %icc, 14f /* CTI */ + sllx %g1, 7, %g2 /* IEU0 Group */ +1: ldx [%o1], %o3 /* Load */ + add %o1, 8, %o1 /* IEU1 */ + +2: mov %o3, %g3 /* IEU0 Group */ +3: sub %o3, %g1, %o2 /* IEU1 */ + ldxa [%o1] ASI_PNF, %o3 /* Load */ +#ifdef EIGHTBIT_NOT_RARE + andn %o2, %g3, %o2 /* IEU0 Group */ +#endif + add %o0, 8, %o0 /* IEU0 Group */ + + andcc %o2, %g2, %g0 /* IEU1 */ + add %o1, 8, %o1 /* IEU0 Group */ + be,a,pt %xcc, 2b /* CTI */ + stx %g3, [%o0 - 8] /* Store */ + + srlx %g3, 56, %g5 /* IEU0 Group */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 11f /* CTI */ + srlx %g3, 48, %g4 /* IEU0 */ + + andcc %g4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 10f /* CTI */ + srlx %g3, 40, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 9f /* CTI */ + srlx %g3, 32, %g4 /* IEU0 */ + andcc %g4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 8f /* CTI */ + + srlx %g3, 24, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 7f /* CTI */ + srlx %g3, 16, %g4 /* IEU0 */ + + andcc %g4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 6f /* CTI */ + srlx %g3, 8, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 5f /* CTI */ + sub %o3, %g1, %o2 /* IEU0 */ + stx %g3, [%o0 - 8] /* Store Group */ + andcc %g3, 0xff, %g0 /* IEU1 */ + + bne,pt %icc, 3b /* CTI */ + mov %o3, %g3 /* IEU0 Group */ +4: retl /* CTI+IEU1 Group */ + mov %g6, %o0 /* IEU0 */ + + .align 16 +5: stb %g5, [%o0 - 2] /* Store Group */ + srlx %g3, 16, %g4 /* IEU0 */ +6: sth %g4, [%o0 - 4] /* Store Group */ + srlx %g3, 32, %g4 /* IEU0 */ + + stw %g4, [%o0 - 8] /* Store Group */ + retl /* CTI+IEU1 Group */ + mov %g6, %o0 /* IEU0 */ +7: stb %g5, [%o0 - 4] /* Store Group */ + + srlx %g3, 32, %g4 /* IEU0 */ +8: stw %g4, [%o0 - 8] /* Store Group */ + retl /* CTI+IEU1 Group */ + mov %g6, %o0 /* IEU0 */ + +9: stb %g5, [%o0 - 6] /* Store Group */ + srlx %g3, 48, %g4 /* IEU0 */ +10: sth %g4, [%o0 - 8] /* Store Group */ + retl /* CTI+IEU1 Group */ + + mov %g6, %o0 /* IEU0 */ +11: stb %g5, [%o0 - 8] /* Store Group */ + retl /* CTI+IEU1 Group */ + mov %g6, %o0 /* IEU0 */ + +12: or %g1, %g2, %g1 /* IEU0 Group */ + ldub [%o1], %o3 /* Load */ + sllx %g1, 7, %g2 /* IEU0 Group */ + stb %o3, [%o0] /* Store Group */ + +13: add %o0, 1, %o0 /* IEU0 */ + add %o1, 1, %o1 /* IEU1 */ + andcc %o3, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 4b /* CTI */ + + lduba [%o1] ASI_PNF, %o3 /* Load */ + andcc %o0, 7, %g0 /* IEU1 Group */ + bne,a,pt %icc, 13b /* CTI */ + stb %o3, [%o0] /* Store */ + + andcc %o1, 7, %g3 /* IEU1 Group */ + be,a,pt %icc, 1b /* CTI */ + ldx [%o1], %o3 /* Load */ +14: orcc %g0, 64, %g4 /* IEU1 Group */ + + sllx %g3, 3, %g5 /* IEU0 */ + sub %o1, %g3, %o1 /* IEU0 Group */ + sub %g4, %g5, %g4 /* IEU1 */ + /* %g1 = 0101010101010101 * + * %g2 = 8080808080808080 * + * %g3 = source alignment * + * %g5 = number of bits to shift left * + * %g4 = number of bits to shift right */ + ldxa [%o1] ASI_PNF, %o5 /* Load Group */ + + addcc %o1, 8, %o1 /* IEU1 */ +15: sllx %o5, %g5, %o3 /* IEU0 Group */ + ldxa [%o1] ASI_PNF, %o5 /* Load */ + srlx %o5, %g4, %o4 /* IEU0 Group */ + + add %o0, 8, %o0 /* IEU1 */ + or %o3, %o4, %o3 /* IEU0 Group */ + add %o1, 8, %o1 /* IEU1 */ + sub %o3, %g1, %o4 /* IEU0 Group */ + +#ifdef EIGHTBIT_NOT_RARE + andn %o4, %o3, %o4 /* IEU0 Group */ +#endif + andcc %o4, %g2, %g0 /* IEU1 Group */ + be,a,pt %xcc, 15b /* CTI */ + stx %o3, [%o0 - 8] /* Store */ + srlx %o3, 56, %o4 /* IEU0 Group */ + + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 22f /* CTI */ + srlx %o3, 48, %o4 /* IEU0 */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 21f /* CTI */ + srlx %o3, 40, %o4 /* IEU0 */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 20f /* CTI */ + + srlx %o3, 32, %o4 /* IEU0 */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 19f /* CTI */ + srlx %o3, 24, %o4 /* IEU0 */ + + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 18f /* CTI */ + srlx %o3, 16, %o4 /* IEU0 */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 17f /* CTI */ + srlx %o3, 8, %o4 /* IEU0 */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 16f /* CTI */ + + andcc %o3, 0xff, %g0 /* IEU1 Group */ + bne,pn %icc, 15b /* CTI */ + stx %o3, [%o0 - 8] /* Store */ + retl /* CTI+IEU1 Group */ + + mov %g6, %o0 /* IEU0 */ + + .align 16 +16: srlx %o3, 8, %o4 /* IEU0 Group */ + stb %o4, [%o0 - 2] /* Store */ +17: srlx %o3, 16, %o4 /* IEU0 Group */ + stb %o4, [%o0 - 3] /* Store */ + +18: srlx %o3, 24, %o4 /* IEU0 Group */ + stb %o4, [%o0 - 4] /* Store */ +19: srlx %o3, 32, %o4 /* IEU0 Group */ + stw %o4, [%o0 - 8] /* Store */ + + retl /* CTI+IEU1 Group */ + mov %g6, %o0 /* IEU0 */ + nop + nop + +20: srlx %o3, 40, %o4 /* IEU0 Group */ + stb %o4, [%o0 - 6] /* Store */ +21: srlx %o3, 48, %o4 /* IEU0 Group */ + stb %o4, [%o0 - 7] /* Store */ + +22: srlx %o3, 56, %o4 /* IEU0 Group */ + stb %o4, [%o0 - 8] /* Store */ + retl /* CTI+IEU1 Group */ + mov %g6, %o0 /* IEU0 */ +END(strcpy) +libc_hidden_builtin_def (strcpy) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/strcspn.S b/REORG.TODO/sysdeps/sparc/sparc64/strcspn.S new file mode 100644 index 0000000000..8b448d16e7 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/strcspn.S @@ -0,0 +1,212 @@ +/* strcspn (str, ss) -- Return the length of the initial segment of STR + which contains no characters from SS. + For SPARC v9. + Copyright (C) 1999-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek <jj@ultra.linux.cz> + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <asm/asi.h> +#ifndef XCC +#define XCC xcc +#define STACK_SIZE 128 +#define STACK_OFFSET 128+0x7ff + .register %g2, #scratch +#else +#define STACK_SIZE 64 +#define STACK_OFFSET 64 +#endif + + .text + .align 32 +ENTRY(strcspn) + sub %sp, STACK_SIZE+32, %sp /* IEU0 Group */ + mov 1, %o4 /* IEU1 */ + stx %o4, [%sp + STACK_OFFSET] /* Store Group */ + mov %o0, %g4 /* IEU0 */ + + stx %g0, [%sp + STACK_OFFSET + 8] /* Store Group */ + add %sp, STACK_OFFSET, %o5 /* IEU0 */ + stx %g0, [%sp + STACK_OFFSET + 16] /* Store Group */ + stx %g0, [%sp + STACK_OFFSET + 24] /* Store Group */ + +1: ldub [%o1], %o2 /* Load Group */ + brz,pn %o2, 2f /* CTI+IEU1 Group */ + srl %o2, 3, %o3 /* IEU0 */ + and %o3, 0x18, %o3 /* IEU0 Group */ + + and %o2, 0x3f, %o2 /* IEU1 */ + ldx [%o5 + %o3], %g2 /* Load Group */ + sllx %o4, %o2, %g1 /* IEU0 */ + add %o1, 1, %o1 /* IEU1 */ + + or %g2, %g1, %g2 /* IEU0 Group */ + ba,pt %xcc, 1b /* CTI */ + stx %g2, [%o5 + %o3] /* Store */ +2: andcc %o0, 7, %g0 /* IEU1 Group */ + + be,a,pt %xcc, 4f /* CTI */ + ldx [%o0], %o2 /* Load */ + ldub [%o0], %o2 /* Load Group */ +3: srl %o2, 3, %o3 /* IEU0 Group */ + + and %o2, 0x3f, %o2 /* IEU1 */ + and %o3, 0x18, %o3 /* IEU0 Group */ + ldx [%o5 + %o3], %g2 /* Load Group */ + sllx %o4, %o2, %g1 /* IEU0 */ + + add %o0, 1, %o0 /* IEU1 */ + andcc %g2, %g1, %g0 /* IEU1 Group */ + bne,pn %xcc, 12f /* CTI */ + andcc %o0, 7, %g0 /* IEU1 Group */ + + bne,a,pt %icc, 3b /* CTI */ + ldub [%o0], %o2 /* Load */ + ldx [%o0], %o2 /* Load Group */ +4: srlx %o2, 59, %o3 /* IEU0 Group */ + + srlx %o2, 56, %g5 /* IEU0 Group */ +5: and %o3, 0x18, %o3 /* IEU1 */ + andcc %g5, 0x3f, %g5 /* IEU1 Group */ + ldx [%o5 + %o3], %g2 /* Load */ + + srlx %o2, 51, %o3 /* IEU0 */ + sllx %o4, %g5, %g1 /* IEU0 Group */ + srlx %o2, 48, %g5 /* IEU0 Group */ + andcc %g2, %g1, %g2 /* IEU1 */ + + bne,pn %xcc, 13f /* CTI */ + and %o3, 0x18, %o3 /* IEU0 Group */ + and %g5, 0x3f, %g5 /* IEU1 */ + ldx [%o5 + %o3], %g2 /* Load Group */ + + srlx %o2, 43, %o3 /* IEU0 */ + sllx %o4, %g5, %g1 /* IEU0 Group */ + srlx %o2, 40, %g5 /* IEU0 Group */ + andcc %g2, %g1, %g2 /* IEU1 */ + + bne,pn %xcc, 14f /* CTI */ + and %o3, 0x18, %o3 /* IEU0 Group */ + and %g5, 0x3f, %g5 /* IEU1 */ + ldx [%o5 + %o3], %g2 /* Load Group */ + + srlx %o2, 35, %o3 /* IEU0 */ + sllx %o4, %g5, %g1 /* IEU0 Group */ + srlx %o2, 32, %g5 /* IEU0 Group */ + andcc %g2, %g1, %g2 /* IEU1 */ + + bne,pn %xcc, 15f /* CTI */ + and %o3, 0x18, %o3 /* IEU0 Group */ + and %g5, 0x3f, %g5 /* IEU1 */ + ldx [%o5 + %o3], %g2 /* Load Group */ + + srlx %o2, 27, %o3 /* IEU0 */ + sllx %o4, %g5, %g1 /* IEU0 Group */ + srlx %o2, 24, %g5 /* IEU0 Group */ + andcc %g2, %g1, %g2 /* IEU1 */ + + bne,pn %xcc, 16f /* CTI */ + and %o3, 0x18, %o3 /* IEU0 Group */ + and %g5, 0x3f, %g5 /* IEU1 */ + ldx [%o5 + %o3], %g2 /* Load Group */ + + srlx %o2, 19, %o3 /* IEU0 */ + sllx %o4, %g5, %g1 /* IEU0 Group */ + srlx %o2, 16, %g5 /* IEU0 Group */ + andcc %g2, %g1, %g2 /* IEU1 */ + + bne,pn %xcc, 17f /* CTI */ + and %o3, 0x18, %o3 /* IEU0 Group */ + and %g5, 0x3f, %g5 /* IEU1 */ + ldx [%o5 + %o3], %g2 /* Load Group */ + + srlx %o2, 11, %o3 /* IEU0 */ + sllx %o4, %g5, %g1 /* IEU0 Group */ + add %o0, 8, %o0 /* IEU1 */ + srlx %o2, 8, %g5 /* IEU0 Group */ + + andcc %g2, %g1, %g2 /* IEU1 */ + bne,pn %xcc, 18f /* CTI */ + and %o3, 0x18, %o3 /* IEU0 Group */ + and %g5, 0x3f, %g5 /* IEU1 */ + + ldx [%o5 + %o3], %g2 /* Load Group */ + sllx %o4, %g5, %g1 /* IEU0 */ + mov %o2, %g5 /* IEU1 */ + srlx %o2, 3, %o3 /* IEU0 Group */ + + ldxa [%o0] ASI_PNF, %o2 /* Load */ + andcc %g2, %g1, %g2 /* IEU1 Group */ + bne,pn %xcc, 19f /* CTI */ + and %o3, 0x18, %o3 /* IEU0 Group */ + + and %g5, 0x3f, %g5 /* IEU1 */ + ldx [%o5 + %o3], %g2 /* Load Group */ + sllx %o4, %g5, %g1 /* IEU0 */ + srlx %o2, 59, %o3 /* IEU0 Group */ + + andcc %g2, %g1, %g2 /* IEU1 Group */ + be,pt %xcc, 5b /* CTI */ + srlx %o2, 56, %g5 /* IEU0 Group */ + sub %o0, 1, %o0 /* IEU1 */ + + add %sp, STACK_SIZE+32, %sp /* IEU0 Group */ + retl /* CTI+IEU1 Group */ + sub %o0, %g4, %o0 /* IEU0 */ + + .align 16 +19: sub %o0, 2, %o0 /* IEU1 */ + add %sp, STACK_SIZE+32, %sp /* IEU0 Group */ + retl /* CTI+IEU1 Group */ + sub %o0, %g4, %o0 /* IEU0 */ + +18: sub %o0, 3, %o0 /* IEU1 */ + add %sp, STACK_SIZE+32, %sp /* IEU0 Group */ + retl /* CTI+IEU1 Group */ + sub %o0, %g4, %o0 /* IEU0 */ + +17: add %o0, 4, %o0 /* IEU1 */ + add %sp, STACK_SIZE+32, %sp /* IEU0 Group */ + retl /* CTI+IEU1 Group */ + sub %o0, %g4, %o0 /* IEU0 */ + +16: add %o0, 3, %o0 /* IEU1 */ + add %sp, STACK_SIZE+32, %sp /* IEU0 Group */ + retl /* CTI+IEU1 Group */ + sub %o0, %g4, %o0 /* IEU0 */ + +15: add %o0, 2, %o0 /* IEU1 */ + add %sp, STACK_SIZE+32, %sp /* IEU0 Group */ + retl /* CTI+IEU1 Group */ + sub %o0, %g4, %o0 /* IEU0 */ + +14: add %o0, 1, %o0 /* IEU1 */ + add %sp, STACK_SIZE+32, %sp /* IEU0 Group */ + retl /* CTI+IEU1 Group */ + sub %o0, %g4, %o0 /* IEU0 */ + +13: add %sp, STACK_SIZE+32, %sp /* IEU1 */ + retl /* CTI+IEU1 Group */ + sub %o0, %g4, %o0 /* IEU0 */ + + .align 16 +12: sub %o0, 1, %o0 /* IEU0 Group */ + add %sp, STACK_SIZE+32, %sp /* IEU1 */ + retl /* CTI+IEU1 Group */ + sub %o0, %g4, %o0 /* IEU0 */ +END(strcspn) +libc_hidden_builtin_def (strcspn) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/strlen.S b/REORG.TODO/sysdeps/sparc/sparc64/strlen.S new file mode 100644 index 0000000000..25a63df03a --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/strlen.S @@ -0,0 +1,85 @@ +/* Determine the length of a string. For SPARC v9. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz>, + Jakub Jelinek <jj@ultra.linux.cz>, and + David S. Miller <davem@davemloft.net>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + .register %g2, #scratch + .register %g3, #scratch + + .text + .align 32 +ENTRY(strlen) + mov %o0, %o1 + andn %o0, 0x7, %o0 + + ldx [%o0], %o5 + and %o1, 0x7, %g1 + mov -1, %g5 + + sethi %hi(0x01010101), %o2 + sll %g1, 3, %g1 + + or %o2, %lo(0x01010101), %o2 + srlx %g5, %g1, %o3 + + sllx %o2, 32, %g1 + sethi %hi(0x0000ff00), %g5 + + orn %o5, %o3, %o5 + or %o2, %g1, %o2 + + sllx %o2, 7, %o3 +10: add %o0, 8, %o0 + + andn %o3, %o5, %g1 + sub %o5, %o2, %g2 + + andcc %g1, %g2, %g0 + be,a,pt %xcc, 10b + ldx [%o0], %o5 + srlx %o5, 32, %g1 + + andn %o3, %g1, %o4 + sub %g1, %o2, %g2 + + add %o0, 4, %g3 + andcc %o4, %g2, %g0 + movne %icc, %g1, %o5 + + move %icc, %g3, %o0 + or %g5, %lo(0x0000ff00), %g5 + mov 3 - 8, %g2 + + andcc %o5, %g5, %g0 + srlx %o5, 16, %g1 + move %icc, 2 - 8, %g2 + + andcc %g1, 0xff, %g0 + srl %o5, 24, %o5 + move %icc, 1 - 8, %g2 + + movrz %o5, 0 - 8, %g2 + sub %o0, %o1, %o0 + + retl + add %o0, %g2, %o0 +END(strlen) +libc_hidden_builtin_def (strlen) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/strncmp.S b/REORG.TODO/sysdeps/sparc/sparc64/strncmp.S new file mode 100644 index 0000000000..f0af16155b --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/strncmp.S @@ -0,0 +1,363 @@ +/* Compare no more than N characters of S1 and S2, returning less than, + equal to or greater than zero if S1 is lexicographically less than, + equal to or greater than S2. + For SPARC v9. + Copyright (C) 1997-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and + Jakub Jelinek <jj@ultra.linux.cz>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <asm/asi.h> +#ifndef XCC +#define XCC xcc +#define USE_BPR + .register %g2, #scratch + .register %g3, #scratch + .register %g6, #scratch +#endif + + /* Normally, this uses + ((xword - 0x0101010101010101) & 0x8080808080808080) test + to find out if any byte in xword could be zero. This is fast, but + also gives false alarm for any byte in range 0x81-0xff. It does + not matter for correctness, as if this test tells us there could + be some zero byte, we check it byte by byte, but if bytes with + high bits set are common in the strings, then this will give poor + performance. You can #define EIGHTBIT_NOT_RARE and the algorithm + will use one tick slower, but more precise test + ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080), + which does not give any false alarms (but if some bits are set, + one cannot assume from it which bytes are zero and which are not). + It is yet to be measured, what is the correct default for glibc + in these days for an average user. + */ + + .text + .align 32 +ENTRY(strncmp) +#ifdef USE_BPR + brz,pn %o2, 4f /* CTI+IEU1 Group */ +#else + tst %o2 /* IEU1 Group */ + be,pn %XCC, 4f /* CTI */ +#endif + sethi %hi(0x1010101), %g1 /* IEU0 */ + andcc %o0, 7, %g0 /* IEU1 Group */ + bne,pn %icc, 9f /* CTI */ + + or %g1, %lo(0x1010101), %g1 /* IEU0 */ + andcc %o1, 7, %g3 /* IEU1 Group */ + bne,pn %icc, 11f /* CTI */ + sllx %g1, 32, %g2 /* IEU0 */ + + ldx [%o0], %g4 /* Load Group */ + or %g1, %g2, %g1 /* IEU0 */ +1: ldx [%o1], %o3 /* Load Group */ + sllx %g1, 7, %g2 /* IEU0 */ + + add %o0, 8, %o0 /* IEU1 */ +2: subcc %o2, 8, %o2 /* IEU1 Group */ + bcs,pn %XCC, 5f /* CTI */ + add %o1, 8, %o1 /* IEU0 */ + + sub %g4, %g1, %g3 /* IEU0 Group */ + subcc %g4, %o3, %o4 /* IEU1 */ +#ifdef EIGHTBIT_NOT_RARE + andn %g3, %g4, %g6 /* IEU0 Group */ +#endif + bne,pn %xcc, 6f /* CTI */ + ldxa [%o0] ASI_PNF, %g4 /* Load Group */ + + add %o0, 8, %o0 /* IEU0 */ +#ifdef EIGHTBIT_NOT_RARE + andcc %g6, %g2, %g0 /* IEU1 */ +#else + andcc %g3, %g2, %g0 /* IEU1 */ +#endif + be,a,pt %xcc, 2b /* CTI */ + ldxa [%o1] ASI_PNF, %o3 /* Load Group */ + + addcc %g3, %g1, %o4 /* IEU1 */ +#ifdef EIGHTBIT_NOT_RARE + srlx %g6, 32, %g6 /* IEU0 */ + andcc %g6, %g2, %g0 /* IEU1 Group */ +#else + srlx %g3, 32, %g3 /* IEU0 */ + andcc %g3, %g2, %g0 /* IEU1 Group */ +#endif + be,pt %xcc, 3f /* CTI */ + + srlx %o4, 56, %o5 /* IEU0 */ + andcc %o5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 4f /* CTI */ + srlx %o4, 48, %o5 /* IEU0 */ + + andcc %o5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 4f /* CTI */ + srlx %o4, 40, %o5 /* IEU0 */ + andcc %o5, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 4f /* CTI */ + srlx %o4, 32, %o5 /* IEU0 */ + andcc %o5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 4f /* CTI */ + +3: srlx %o4, 24, %o5 /* IEU0 */ + andcc %o5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 4f /* CTI */ + srlx %o4, 16, %o5 /* IEU0 */ + + andcc %o5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 4f /* CTI */ + srlx %o4, 8, %o5 /* IEU0 */ + andcc %o5, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 4f /* CTI */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + bne,a,pn %icc, 2b /* CTI */ + ldxa [%o1] ASI_PNF, %o3 /* Load */ + +4: retl /* CTI+IEU1 Group */ + clr %o0 /* IEU0 */ + + .align 16 +5: srlx %g4, 56, %o4 /* IEU0 Group */ + cmp %o2, -8 /* IEU1 */ + be,pn %XCC, 4b /* CTI */ + srlx %o3, 56, %o5 /* IEU0 Group */ + + andcc %o4, 0xff, %g0 /* IEU1 */ + be,pn %xcc, 8f /* CTI */ + subcc %o4, %o5, %o4 /* IEU1 Group */ + bne,pn %xcc, 8f /* CTI */ + + srlx %o3, 48, %o5 /* IEU0 */ + cmp %o2, -7 /* IEU1 Group */ + be,pn %XCC, 4b /* CTI */ + srlx %g4, 48, %o4 /* IEU0 */ + + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %xcc, 8f /* CTI */ + subcc %o4, %o5, %o4 /* IEU1 Group */ + bne,pn %xcc, 8f /* CTI */ + + srlx %o3, 40, %o5 /* IEU0 */ + cmp %o2, -6 /* IEU1 Group */ + be,pn %XCC, 4b /* CTI */ + srlx %g4, 40, %o4 /* IEU0 */ + + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %xcc, 8f /* CTI */ + subcc %o4, %o5, %o4 /* IEU1 Group */ + bne,pn %xcc, 8f /* CTI */ + + srlx %o3, 32, %o5 /* IEU0 */ + cmp %o2, -5 /* IEU1 Group */ + be,pn %XCC, 4b /* CTI */ + srlx %g4, 32, %o4 /* IEU0 */ + + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %xcc, 8f /* CTI */ + subcc %o4, %o5, %o4 /* IEU1 Group */ + bne,pn %xcc, 8f /* CTI */ + + srlx %o3, 24, %o5 /* IEU0 */ + cmp %o2, -4 /* IEU1 Group */ + be,pn %XCC, 4b /* CTI */ + srlx %g4, 24, %o4 /* IEU0 */ + + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %xcc, 8f /* CTI */ + subcc %o4, %o5, %o4 /* IEU1 Group */ + bne,pn %xcc, 8f /* CTI */ + + srlx %o3, 16, %o5 /* IEU0 */ + cmp %o2, -3 /* IEU1 Group */ + be,pn %XCC, 4b /* CTI */ + srlx %g4, 16, %o4 /* IEU0 */ + + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %xcc, 8f /* CTI */ + subcc %o4, %o5, %o4 /* IEU1 Group */ + bne,pn %xcc, 8f /* CTI */ + + srlx %o3, 8, %o5 /* IEU0 */ + cmp %o2, -2 /* IEU1 Group */ + be,pn %XCC, 4b /* CTI */ + srlx %g4, 8, %o4 /* IEU0 */ + + retl /* CTI+IEU1 Group */ + sub %o4, %o5, %o0 /* IEU0 */ +6: addcc %o3, %o4, %g4 /* IEU1 */ +7: srlx %o3, 56, %o5 /* IEU0 */ + + srlx %g4, 56, %o4 /* IEU0 Group */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %xcc, 8f /* CTI */ + subcc %o4, %o5, %o4 /* IEU1 Group */ + + bne,pn %xcc, 8f /* CTI */ + srlx %o3, 48, %o5 /* IEU0 */ + srlx %g4, 48, %o4 /* IEU0 Group */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + + be,pn %xcc, 8f /* CTI */ + subcc %o4, %o5, %o4 /* IEU1 Group */ + bne,pn %xcc, 8f /* CTI */ + srlx %o3, 40, %o5 /* IEU0 */ + + srlx %g4, 40, %o4 /* IEU0 Group */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %xcc, 8f /* CTI */ + subcc %o4, %o5, %o4 /* IEU1 Group */ + + bne,pn %xcc, 8f /* CTI */ + srlx %o3, 32, %o5 /* IEU0 */ + srlx %g4, 32, %o4 /* IEU0 Group */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + + be,pn %xcc, 8f /* CTI */ + subcc %o4, %o5, %o4 /* IEU1 Group */ + bne,pn %xcc, 8f /* CTI */ + srlx %o3, 24, %o5 /* IEU0 */ + + srlx %g4, 24, %o4 /* IEU0 Group */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %xcc, 8f /* CTI */ + subcc %o4, %o5, %o4 /* IEU1 Group */ + + bne,pn %xcc, 8f /* CTI */ + srlx %o3, 16, %o5 /* IEU0 */ + srlx %g4, 16, %o4 /* IEU0 Group */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + + be,pn %xcc, 8f /* CTI */ + subcc %o4, %o5, %o4 /* IEU1 Group */ + bne,pn %xcc, 8f /* CTI */ + srlx %o3, 8, %o5 /* IEU0 */ + + srlx %g4, 8, %o4 /* IEU0 Group */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %xcc, 8f /* CTI */ + subcc %o4, %o5, %o4 /* IEU1 Group */ + + retl /* CTI+IEU1 Group */ + sub %g4, %o3, %o0 /* IEU0 */ +8: retl /* CTI+IEU1 Group */ + mov %o4, %o0 /* IEU0 */ + +9: ldub [%o0], %g4 /* Load Group */ + add %o0, 1, %o0 /* IEU0 */ + ldub [%o1], %o3 /* Load Group */ + sllx %g1, 32, %g2 /* IEU0 */ + +10: subcc %o2, 1, %o2 /* IEU1 Group */ + be,pn %XCC, 8b /* CTI */ + sub %g4, %o3, %o4 /* IEU0 */ + add %o1, 1, %o1 /* IEU0 Group */ + + cmp %g4, %o3 /* IEU1 */ + bne,pn %xcc, 8b /* CTI */ + lduba [%o0] ASI_PNF, %g4 /* Load Group */ + andcc %o3, 0xff, %g0 /* IEU1 */ + + be,pn %icc, 4b /* CTI */ + lduba [%o1] ASI_PNF, %o3 /* Load Group */ + andcc %o0, 7, %g0 /* IEU1 */ + bne,a,pn %icc, 10b /* CTI */ + + add %o0, 1, %o0 /* IEU0 Group */ + or %g1, %g2, %g1 /* IEU1 */ + andcc %o1, 7, %g3 /* IEU1 Group */ + be,pn %icc, 1b /* CTI */ + + ldxa [%o0] ASI_PNF, %g4 /* Load */ +11: sllx %g3, 3, %g5 /* IEU0 Group */ + mov 64, %g6 /* IEU1 */ + or %g1, %g2, %g1 /* IEU0 Group */ + sub %o1, %g3, %o1 /* IEU1 */ + + sub %g6, %g5, %g6 /* IEU0 Group */ + ldxa [%o1] ASI_PNF, %o4 /* Load */ + sllx %g1, 7, %g2 /* IEU1 */ + add %o1, 8, %o1 /* IEU0 Group */ + /* %g1 = 0101010101010101 + %g2 = 8080808080808080 + %g3 = %o1 alignment + %g5 = number of bits to shift left + %g6 = number of bits to shift right */ + +12: sllx %o4, %g5, %o3 /* IEU0 Group */ + ldxa [%o1] ASI_PNF, %o4 /* Load */ + add %o1, 8, %o1 /* IEU1 */ +13: ldxa [%o0] ASI_PNF, %g4 /* Load Group */ + + addcc %o0, 8, %o0 /* IEU1 */ + srlx %o4, %g6, %o5 /* IEU0 */ + subcc %o2, 8, %o2 /* IEU1 Group */ + bcs,pn %XCC, 5b /* CTI */ + + or %o3, %o5, %o3 /* IEU0 */ + cmp %g4, %o3 /* IEU1 Group */ + bne,pn %xcc, 7b /* CTI */ + sub %g4, %g1, %o5 /* IEU0 */ + +#ifdef EIGHTBIT_NOT_RARE + andn %o5, %g4, %o5 /* IEU0 Group */ +#endif + andcc %o5, %g2, %g0 /* IEU1 Group */ + be,pt %xcc, 12b /* CTI */ + srlx %o5, 32, %o5 /* IEU0 */ + andcc %o5, %g2, %g0 /* IEU1 Group */ + + be,pt %xcc, 14f /* CTI */ + srlx %g4, 56, %o5 /* IEU0 */ + andcc %o5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 4b /* CTI */ + + srlx %g4, 48, %o5 /* IEU0 */ + andcc %o5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 4b /* CTI */ + srlx %g4, 40, %o5 /* IEU0 */ + + andcc %o5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 4b /* CTI */ + srlx %g4, 32, %o5 /* IEU0 */ + andcc %o5, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 4b /* CTI */ +14: srlx %g4, 24, %o5 /* IEU0 */ + andcc %o5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 4b /* CTI */ + + srlx %g4, 16, %o5 /* IEU0 */ + andcc %o5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 4b /* CTI */ + srlx %g4, 8, %o5 /* IEU0 */ + + andcc %o5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 4b /* CTI */ + andcc %g4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 4b /* CTI */ + + sllx %o4, %g5, %o3 /* IEU0 */ + ldxa [%o1] ASI_PNF, %o4 /* Load Group */ + ba,pt %xcc, 13b /* CTI */ + add %o1, 8, %o1 /* IEU0 */ +END(strncmp) +libc_hidden_builtin_def (strncmp) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/strncpy.S b/REORG.TODO/sysdeps/sparc/sparc64/strncpy.S new file mode 100644 index 0000000000..f8c801f552 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/strncpy.S @@ -0,0 +1,396 @@ +/* strncpy(DST, SRC, COUNT) - Copy no more than COUNT bytes of the + null-terminated string from SRC to DST. If SRC does not cover all of + COUNT, the balance is zeroed. + For SPARC v9. + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and + Jakub Jelinek <jj@ultra.linux.cz>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <asm/asi.h> +#ifndef XCC +#define XCC xcc +#define USE_BPR + .register %g2, #scratch + .register %g3, #scratch + .register %g6, #scratch +#endif + + /* Normally, this uses + ((xword - 0x0101010101010101) & 0x8080808080808080) test + to find out if any byte in xword could be zero. This is fast, but + also gives false alarm for any byte in range 0x81-0xff. It does + not matter for correctness, as if this test tells us there could + be some zero byte, we check it byte by byte, but if bytes with + high bits set are common in the strings, then this will give poor + performance. You can #define EIGHTBIT_NOT_RARE and the algorithm + will use one tick slower, but more precise test + ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080), + which does not give any false alarms (but if some bits are set, + one cannot assume from it which bytes are zero and which are not). + It is yet to be measured, what is the correct default for glibc + in these days for an average user. + */ + + .text + .align 32 +ENTRY(strncpy) + sethi %hi(0x01010101), %g1 /* IEU0 Group */ +#ifdef USE_BPR + brz,pn %o2, 19f /* CTI+IEU1 */ +#else + tst %o2 /* IEU1 */ + be,pn %XCC, 19f /* CTI */ +#endif + mov %o0, %g6 /* IEU0 Group */ + or %g1, %lo(0x01010101), %g1 /* IEU1 */ + + andcc %o0, 7, %g0 /* IEU1 Group */ + sllx %g1, 32, %g2 /* IEU0 */ + bne,pn %icc, 26f /* CTI */ + or %g1, %g2, %g1 /* IEU0 Group */ + + andcc %o1, 7, %g3 /* IEU1 */ + bne,pn %icc, 28f /* CTI */ + sllx %g1, 7, %g2 /* IEU0 Group */ + ldx [%o1], %o3 /* Load */ + +1: add %o1, 8, %o1 /* IEU1 */ +2: subcc %o2, 8, %o2 /* IEU1 Group */ + bl,pn %XCC, 18f /* CTI */ + sub %o3, %g1, %o4 /* IEU0 */ + + add %o0, 8, %o0 /* IEU0 Group */ +#ifdef EIGHTBIT_NOT_MORE + andn %o4, %o3, %o4 /* IEU1 */ +#endif + mov %o3, %g3 /* IEU1 */ + ldxa [%o1] ASI_PNF, %o3 /* Load */ + add %o1, 8, %o1 /* IEU0 Group */ + + andcc %o4, %g2, %g0 /* IEU1 */ + be,a,pt %xcc, 2b /* CTI */ + stx %g3, [%o0-8] /* Store Group */ + srlx %g3, 56, %g5 /* IEU0 Group */ + + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 16f /* CTI */ + srlx %g3, 48, %g4 /* IEU0 */ + andcc %g4, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 15f /* CTI */ + srlx %g3, 40, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 14f /* CTI */ + + srlx %g3, 32, %g4 /* IEU0 */ + andcc %g4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 13f /* CTI */ + srlx %g3, 24, %g5 /* IEU0 */ + + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 12f /* CTI */ + srlx %g3, 16, %g4 /* IEU0 */ + andcc %g4, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 11f /* CTI */ + srlx %g3, 8, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 10f /* CTI */ + + andcc %g3, 0xff, %g0 /* IEU1 Group */ + bne,pt %icc, 2b /* CTI */ +3: stx %g3, [%o0-8] /* Store */ + andncc %o2, 31, %g3 /* IEU1 Group */ + +4: be,pn %XCC, 41f /* CTI */ + and %o2, 31, %o2 /* IEU1 Group */ +40: stx %g0, [%o0] /* Store */ + stx %g0, [%o0 + 8] /* Store Group */ + + subcc %g3, 32, %g3 /* IEU1 */ + stx %g0, [%o0 + 16] /* Store Group */ + stx %g0, [%o0 + 24] /* Store Group */ + bne,pt %XCC, 40b /* CTI */ + + add %o0, 32, %o0 /* IEU0 */ +41: subcc %o2, 8, %o2 /* IEU1 Group */ + bl,a,pn %XCC, 6f /* CTI */ + andcc %o2, 4, %g0 /* IEU1 Group */ + +5: stx %g0, [%o0] /* Store */ + subcc %o2, 8, %o2 /* IEU1 Group */ + bge,pt %XCC, 5b /* CTI */ + add %o0, 8, %o0 /* IEU0 */ + + andcc %o2, 4, %g0 /* IEU1 Group */ +6: be,a,pn %icc, 7f /* CTI */ + andcc %o2, 2, %g0 /* IEU1 Group */ + stw %g0, [%o0] /* Store */ + + add %o0, 4, %o0 /* IEU0 */ + andcc %o2, 2, %g0 /* IEU1 Group */ +7: be,a,pn %icc, 8f /* CTI */ + andcc %o2, 1, %g0 /* IEU1 Group */ + + sth %g0, [%o0] /* Store */ + add %o0, 2, %o0 /* IEU0 */ + andcc %o2, 1, %g0 /* IEU1 Group */ +8: bne,a,pn %icc, 9f /* CTI */ + + stb %g0, [%o0] /* Store */ +9: retl /* CTI+IEU1 Group */ + mov %g6, %o0 /* IEU0 */ + + .align 16 +10: ba,pt %xcc, 3b /* CTI */ + sllx %g5, 8, %g3 /* IEU0 */ +11: ba,pt %xcc, 3b /* CTI Group */ + sllx %g4, 16, %g3 /* IEU0 */ + +12: ba,pt %xcc, 3b /* CTI Group */ + sllx %g5, 24, %g3 /* IEU0 */ +13: ba,pt %xcc, 3b /* CTI Group */ + sllx %g4, 32, %g3 /* IEU0 */ + +14: ba,pt %xcc, 3b /* CTI Group */ + sllx %g5, 40, %g3 /* IEU0 */ +15: ba,pt %xcc, 3b /* CTI Group */ + sllx %g4, 48, %g3 /* IEU0 */ + +16: ba,pt %xcc, 3b /* CTI */ + sllx %g5, 56, %g3 /* IEU0 */ +17: or %o3, %o4, %o3 /* IEU0 Group */ + sub %o3, %g1, %o4 /* IEU1 */ + +18: addcc %o2, 8, %o2 /* IEU1 Group */ + be,pn %XCC, 19f /* CTI */ + andcc %o4, %g2, %g0 /* IEU1 Group */ + be,pt %xcc, 21f /* CTI */ + + srlx %o3, 56, %g5 /* IEU0 */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 20f /* CTI */ + stb %g5, [%o0] /* Store */ + + add %o0, 1, %o0 /* IEU0 Group */ + subcc %o2, 1, %o2 /* IEU1 */ + be,pn %XCC, 19f /* CTI */ + srlx %o3, 48, %g5 /* IEU0 Group */ + + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 20f /* CTI */ + stb %g5, [%o0] /* Store */ + add %o0, 1, %o0 /* IEU0 Group */ + + subcc %o2, 1, %o2 /* IEU1 */ + be,pn %XCC, 19f /* CTI */ + srlx %o3, 40, %g5 /* IEU0 Group */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 20f /* CTI */ + stb %g5, [%o0] /* Store */ + add %o0, 1, %o0 /* IEU0 Group */ + subcc %o2, 1, %o2 /* IEU1 */ + + be,pn %XCC, 19f /* CTI */ + srlx %o3, 32, %g5 /* IEU0 Group */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 20f /* CTI */ + + stb %g5, [%o0] /* Store */ + add %o0, 1, %o0 /* IEU0 Group */ + subcc %o2, 1, %o2 /* IEU1 */ + be,pn %XCC, 19f /* CTI */ + + srlx %o3, 24, %g5 /* IEU0 Group */ + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 20f /* CTI */ + stb %g5, [%o0] /* Store */ + + add %o0, 1, %o0 /* IEU0 Group */ + subcc %o2, 1, %o2 /* IEU1 */ + be,pn %XCC, 19f /* CTI */ + srlx %o3, 16, %g5 /* IEU0 Group */ + + andcc %g5, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 20f /* CTI */ + stb %g5, [%o0] /* Store */ + add %o0, 1, %o0 /* IEU0 Group */ + + subcc %o2, 1, %o2 /* IEU1 */ + be,pn %XCC, 19f /* CTI */ + srlx %o3, 8, %g5 /* IEU0 Group */ + stb %g5, [%o0] /* Store */ + +19: retl /* CTI+IEU1 Group */ + mov %g6, %o0 /* IEU0 */ +50: stb %g0, [%o0] /* Store Group */ +20: subcc %o2, 1, %o2 /* IEU1 Group */ + + bne,pt %XCC, 50b /* CTI */ + add %o0, 1, %o0 /* IEU0 */ + retl /* CTI+IEU1 Group */ + mov %g6, %o0 /* IEU0 */ + +21: andcc %o2, 4, %g0 /* IEU1 Group */ + be,pn %icc, 22f /* CTI */ + srlx %o3, 32, %g5 /* IEU0 */ + stw %g5, [%o0] /* Store Group */ + + add %o0, 4, %o0 /* IEU0 */ + mov %o3, %g5 /* IEU1 */ +22: andcc %o2, 2, %g0 /* IEU1 Group */ + be,pn %icc, 23f /* CTI */ + + srlx %g5, 16, %g4 /* IEU0 */ + sth %g4, [%o0] /* Store Group */ + add %o0, 2, %o0 /* IEU0 */ + mov %g5, %g4 /* IEU1 */ + +23: srlx %g4, 8, %g4 /* IEU0 Group */ + andcc %o2, 1, %g0 /* IEU1 */ + bne,a,pn %icc, 24f /* CTI */ + stb %g4, [%o0] /* Store Group */ + +24: retl /* CTI+IEU1 Group */ + mov %g6, %o0 /* IEU0 */ +25: andcc %o0, 7, %g0 /* IEU1 Group */ + be,a,pn %icc, 4b /* CTI */ + + andncc %o2, 31, %g3 /* IEU1 Group */ + stb %g0, [%o0] /* Store Group */ + subcc %o2, 1, %o2 /* IEU1 */ + bne,pt %XCC, 25b /* CTI */ + + add %o0, 1, %o0 /* IEU0 Group */ + retl /* CTI+IEU1 Group */ + mov %g6, %o0 /* IEU0 */ + + .align 16 +26: ldub [%o1], %o3 /* Load */ + sllx %g1, 7, %g2 /* IEU0 Group */ + stb %o3, [%o0] /* Store */ +27: subcc %o2, 1, %o2 /* IEU1 */ + + be,pn %XCC, 9b /* CTI */ + add %o1, 1, %o1 /* IEU0 Group */ + add %o0, 1, %o0 /* IEU1 */ + andcc %o3, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 25b /* CTI */ + lduba [%o1] ASI_PNF, %o3 /* Load */ + andcc %o0, 7, %g0 /* IEU1 Group */ + bne,a,pt %icc, 27b /* CTI */ + + stb %o3, [%o0] /* Store */ + andcc %o1, 7, %g3 /* IEU1 Group */ + be,a,pt %icc, 1b /* CTI */ + ldx [%o1], %o3 /* Load */ + +28: orcc %g0, 64, %g4 /* IEU1 Group */ + sllx %g3, 3, %g5 /* IEU0 */ + sub %g4, %g5, %g4 /* IEU0 Group */ + sub %o1, %g3, %o1 /* IEU1 */ + /* %g1 = 0101010101010101 + %g2 = 8080808080808080 + %g3 = source alignment + %g5 = number of bits to shift left + %g4 = number of bits to shift right */ + + ldxa [%o1] ASI_PNF, %o5 /* Load Group */ + addcc %o1, 8, %o1 /* IEU1 */ + +29: sllx %o5, %g5, %o3 /* IEU0 Group */ + ldxa [%o1] ASI_PNF, %o5 /* Load */ + subcc %o2, 8, %o2 /* IEU1 */ + bl,pn %XCC, 17b /* CTI */ + + srlx %o5, %g4, %o4 /* IEU0 Group */ + add %o1, 8, %o1 /* IEU1 */ + or %o3, %o4, %o3 /* IEU0 Group */ + add %o0, 8, %o0 /* IEU1 */ + + sub %o3, %g1, %o4 /* IEU0 Group */ +#ifdef EIGHTBIT_NOT_RARE + andn %o4, %o3, %o4 /* IEU0 Group */ +#endif + andcc %o4, %g2, %g0 /* IEU1 Group */ + be,a,pt %xcc, 29b /* CTI */ + stx %o3, [%o0-8] /* Store */ + + srlx %o3, 56, %o4 /* IEU0 Group */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 36f /* CTI */ + srlx %o3, 48, %o4 /* IEU0 */ + + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 35f /* CTI */ + srlx %o3, 40, %o4 /* IEU0 */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 34f /* CTI */ + srlx %o3, 32, %o4 /* IEU0 */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 33f /* CTI */ + + srlx %o3, 24, %o4 /* IEU0 */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 32f /* CTI */ + srlx %o3, 16, %o4 /* IEU0 */ + + andcc %o4, 0xff, %g0 /* IEU1 Group */ + be,pn %icc, 31f /* CTI */ + srlx %o3, 8, %o4 /* IEU0 */ + andcc %o4, 0xff, %g0 /* IEU1 Group */ + + be,pn %icc, 30f /* CTI */ + andcc %o3, 0xff, %g0 /* IEU1 Group */ + bne,pn %icc, 29b /* CTI */ + stx %o3, [%o0-8] /* Store */ + + ba,pt %xcc, 4b /* CTI Group */ + andncc %o2, 31, %g3 /* IEU1 */ +30: srlx %o3, 8, %o4 /* IEU0 */ + ba,pt %xcc, 3b /* CTI */ + + sllx %o4, 8, %g3 /* IEU0 Group */ +31: srlx %o3, 16, %o4 /* IEU0 Group */ + ba,pt %xcc, 3b /* CTI */ + sllx %o4, 16, %g3 /* IEU0 Group */ + +32: srlx %o3, 24, %o4 /* IEU0 Group */ + ba,pt %xcc, 3b /* CTI */ + sllx %o4, 24, %g3 /* IEU0 Group */ +33: srlx %o3, 32, %o4 /* IEU0 Group */ + + ba,pt %xcc, 3b /* CTI */ + sllx %o4, 32, %g3 /* IEU0 Group */ +34: srlx %o3, 40, %o4 /* IEU0 Group */ + ba,pt %xcc, 3b /* CTI */ + + sllx %o4, 40, %g3 /* IEU0 Group */ +35: srlx %o3, 48, %o4 /* IEU0 Group */ + ba,pt %xcc, 3b /* CTI */ + sllx %o4, 48, %g3 /* IEU0 Group */ + +36: srlx %o3, 56, %o4 /* IEU0 Group */ + ba,pt %xcc, 3b /* CTI */ + sllx %o4, 56, %g3 /* IEU0 Group */ +END(strncpy) +libc_hidden_builtin_def (strncpy) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/strpbrk.S b/REORG.TODO/sysdeps/sparc/sparc64/strpbrk.S new file mode 100644 index 0000000000..08e0c2f30f --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/strpbrk.S @@ -0,0 +1,230 @@ +/* strpbrk (s, accept) -- Find the first occurrence in S of any character in + ACCEPT. + For SPARC v9. + Copyright (C) 1999-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek <jj@ultra.linux.cz> + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <asm/asi.h> +#ifndef XCC +#define XCC xcc +#define STACK_SIZE 128 +#define STACK_OFFSET 128+0x7ff + .register %g2, #scratch +#else +#define STACK_SIZE 64 +#define STACK_OFFSET 64 +#endif + + .text + .align 32 +ENTRY(strpbrk) + sub %sp, STACK_SIZE+32, %sp /* IEU0 Group */ + mov 1, %o4 /* IEU1 */ + stx %o4, [%sp + STACK_OFFSET] /* Store Group */ + stx %g0, [%sp + STACK_OFFSET + 8] /* Store Group */ + + add %sp, STACK_OFFSET, %o5 /* IEU0 */ + stx %g0, [%sp + STACK_OFFSET + 16] /* Store Group */ + stx %g0, [%sp + STACK_OFFSET + 24] /* Store Group */ +1: ldub [%o1], %o2 /* Load Group */ + + brz,pn %o2, 2f /* CTI+IEU1 Group */ + srl %o2, 3, %o3 /* IEU0 */ + and %o3, 0x18, %o3 /* IEU0 Group */ + and %o2, 0x3f, %o2 /* IEU1 */ + + ldx [%o5 + %o3], %g2 /* Load Group */ + sllx %o4, %o2, %g1 /* IEU0 */ + add %o1, 1, %o1 /* IEU1 */ + or %g2, %g1, %g2 /* IEU0 Group */ + + ba,pt %xcc, 1b /* CTI */ + stx %g2, [%o5 + %o3] /* Store */ +2: andcc %o0, 7, %g0 /* IEU1 Group */ + be,a,pt %xcc, 4f /* CTI */ + + ldx [%o0], %o2 /* Load */ + ldub [%o0], %o2 /* Load Group */ +3: srl %o2, 3, %o3 /* IEU0 Group */ + and %o2, 0x3f, %o2 /* IEU1 */ + + and %o3, 0x18, %o3 /* IEU0 Group */ + ldx [%o5 + %o3], %g2 /* Load Group */ + sllx %o4, %o2, %g1 /* IEU0 */ + add %o0, 1, %o0 /* IEU1 */ + + andcc %g2, %g1, %g0 /* IEU1 Group */ + bne,pn %xcc, 12f /* CTI */ + andcc %o0, 7, %g0 /* IEU1 Group */ + bne,a,pt %icc, 3b /* CTI */ + + ldub [%o0], %o2 /* Load */ + ldx [%o0], %o2 /* Load Group */ +4: srlx %o2, 59, %o3 /* IEU0 Group */ + srlx %o2, 56, %g4 /* IEU0 Group */ + +5: and %o3, 0x18, %o3 /* IEU1 */ + andcc %g4, 0x3f, %g4 /* IEU1 Group */ + ldx [%o5 + %o3], %g2 /* Load */ + srlx %o2, 51, %o3 /* IEU0 */ + + sllx %o4, %g4, %g1 /* IEU0 Group */ + srlx %o2, 48, %g4 /* IEU0 Group */ + andcc %g2, %g1, %g2 /* IEU1 */ + bne,pn %xcc, 13f /* CTI */ + + and %o3, 0x18, %o3 /* IEU0 Group */ + and %g4, 0x3f, %g4 /* IEU1 */ + ldx [%o5 + %o3], %g2 /* Load Group */ + srlx %o2, 43, %o3 /* IEU0 */ + + sllx %o4, %g4, %g1 /* IEU0 Group */ + srlx %o2, 40, %g4 /* IEU0 Group */ + andcc %g2, %g1, %g2 /* IEU1 */ + bne,pn %xcc, 14f /* CTI */ + + and %o3, 0x18, %o3 /* IEU0 Group */ + and %g4, 0x3f, %g4 /* IEU1 */ + ldx [%o5 + %o3], %g2 /* Load Group */ + srlx %o2, 35, %o3 /* IEU0 */ + + sllx %o4, %g4, %g1 /* IEU0 Group */ + srlx %o2, 32, %g4 /* IEU0 Group */ + andcc %g2, %g1, %g2 /* IEU1 */ + bne,pn %xcc, 15f /* CTI */ + + and %o3, 0x18, %o3 /* IEU0 Group */ + and %g4, 0x3f, %g4 /* IEU1 */ + ldx [%o5 + %o3], %g2 /* Load Group */ + srlx %o2, 27, %o3 /* IEU0 */ + + sllx %o4, %g4, %g1 /* IEU0 Group */ + srlx %o2, 24, %g4 /* IEU0 Group */ + andcc %g2, %g1, %g2 /* IEU1 */ + bne,pn %xcc, 16f /* CTI */ + + and %o3, 0x18, %o3 /* IEU0 Group */ + and %g4, 0x3f, %g4 /* IEU1 */ + ldx [%o5 + %o3], %g2 /* Load Group */ + srlx %o2, 19, %o3 /* IEU0 */ + + sllx %o4, %g4, %g1 /* IEU0 Group */ + srlx %o2, 16, %g4 /* IEU0 Group */ + andcc %g2, %g1, %g2 /* IEU1 */ + bne,pn %xcc, 17f /* CTI */ + + and %o3, 0x18, %o3 /* IEU0 Group */ + and %g4, 0x3f, %g4 /* IEU1 */ + ldx [%o5 + %o3], %g2 /* Load Group */ + srlx %o2, 11, %o3 /* IEU0 */ + + sllx %o4, %g4, %g1 /* IEU0 Group */ + add %o0, 8, %o0 /* IEU1 */ + srlx %o2, 8, %g4 /* IEU0 Group */ + andcc %g2, %g1, %g2 /* IEU1 */ + + bne,pn %xcc, 18f /* CTI */ + and %o3, 0x18, %o3 /* IEU0 Group */ + and %g4, 0x3f, %g4 /* IEU1 */ + ldx [%o5 + %o3], %g2 /* Load Group */ + + sllx %o4, %g4, %g1 /* IEU0 */ + mov %o2, %g5 /* IEU1 */ + srlx %o2, 3, %o3 /* IEU0 Group */ + ldxa [%o0] ASI_PNF, %o2 /* Load */ + + andcc %g2, %g1, %g2 /* IEU1 Group */ + bne,pn %xcc, 19f /* CTI */ + and %o3, 0x18, %o3 /* IEU0 Group */ + and %g5, 0x3f, %g4 /* IEU1 */ + + ldx [%o5 + %o3], %g2 /* Load Group */ + sllx %o4, %g4, %g1 /* IEU0 */ + srlx %o2, 59, %o3 /* IEU0 Group */ + andcc %g2, %g1, %g2 /* IEU1 Group */ + + be,pt %xcc, 5b /* CTI */ + srlx %o2, 56, %g4 /* IEU0 Group */ + sub %o0, 1, %o0 /* IEU1 */ + add %sp, STACK_SIZE+32, %sp /* IEU0 Group */ + + andcc %g5, 0xff, %g0 /* IEU1 */ + retl /* CTI+IEU1 Group */ + move %icc, 0, %o0 /* Single Group */ + + .align 16 +19: sub %o0, 2, %o0 /* IEU1 */ + srl %g5, 8, %g1 /* IEU0 Group */ + add %sp, STACK_SIZE+32, %sp /* IEU1 */ + andcc %g1, 0xff, %g0 /* IEU1 Group */ + + retl /* CTI+IEU1 Group */ + move %icc, 0, %o0 /* Single Group */ +18: sub %o0, 3, %o0 /* IEU1 */ + srl %o2, 16, %g1 /* IEU0 Group */ + + add %sp, STACK_SIZE+32, %sp /* IEU1 */ + andcc %g1, 0xff, %g0 /* IEU1 Group */ + retl /* CTI+IEU1 Group */ + move %icc, 0, %o0 /* Single Group */ + +17: add %o0, 4, %o0 /* IEU1 */ + srl %o2, 24, %g1 /* IEU0 Group */ + add %sp, STACK_SIZE+32, %sp /* IEU1 */ + retl /* CTI+IEU1 Group */ + + movrz %g1, 0, %o0 /* Single Group */ +16: add %o0, 3, %o0 /* IEU1 */ + srlx %o2, 32, %g1 /* IEU0 Group */ + add %sp, STACK_SIZE+32, %sp /* IEU1 */ + + andcc %g1, 0xff, %g0 /* IEU1 Group */ + retl /* CTI+IEU1 Group */ + move %icc, 0, %o0 /* Single Group */ + + .align 16 +15: add %o0, 2, %o0 /* IEU1 */ + srlx %o2, 40, %g1 /* IEU0 Group */ + add %sp, STACK_SIZE+32, %sp /* IEU1 */ + andcc %g1, 0xff, %g0 /* IEU1 Group */ + + retl /* CTI+IEU1 Group */ + move %icc, 0, %o0 /* Single Group */ +14: add %o0, 1, %o0 /* IEU1 */ + srlx %o2, 48, %g1 /* IEU0 Group */ + + add %sp, STACK_SIZE+32, %sp /* IEU1 */ + andcc %g1, 0xff, %g0 /* IEU1 Group */ + retl /* CTI+IEU1 Group */ + move %icc, 0, %o0 /* Single Group */ + +13: add %sp, STACK_SIZE+32, %sp /* IEU1 */ + srlx %o2, 56, %g1 /* IEU0 Group */ + retl /* CTI+IEU1 Group */ + movrz %g1, 0, %o0 /* Single Group */ + + .align 16 +12: sub %o0, 1, %o0 /* IEU0 Group */ + or %o3, %o2, %g1 /* IEU1 */ + add %sp, STACK_SIZE+32, %sp /* IEU0 Group */ + retl /* CTI+IEU1 Group */ + + movrz %g1, 0, %o0 /* Single Group */ +END(strpbrk) +libc_hidden_builtin_def (strpbrk) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/strrchr.c b/REORG.TODO/sysdeps/sparc/sparc64/strrchr.c new file mode 100644 index 0000000000..ec608d6ab3 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/strrchr.c @@ -0,0 +1 @@ +/* strrchr is in strchr.S */ diff --git a/REORG.TODO/sysdeps/sparc/sparc64/strspn.S b/REORG.TODO/sysdeps/sparc/sparc64/strspn.S new file mode 100644 index 0000000000..7c560b9bc2 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/strspn.S @@ -0,0 +1,212 @@ +/* strspn (str, ss) -- Return the length of the maximum initial segment + of S which contains only characters in ACCEPT. + For SPARC v9. + Copyright (C) 1999-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek <jj@ultra.linux.cz> + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> +#include <asm/asi.h> +#ifndef XCC +#define XCC xcc +#define STACK_SIZE 128 +#define STACK_OFFSET 128+0x7ff + .register %g2, #scratch +#else +#define STACK_SIZE 64 +#define STACK_OFFSET 64 +#endif + + .text + .align 32 +ENTRY(strspn) + sub %sp, STACK_SIZE+32, %sp /* IEU0 Group */ + mov 1, %o4 /* IEU1 */ + stx %g0, [%sp + STACK_OFFSET] /* Store Group */ + mov %o0, %g4 /* IEU0 */ + + stx %g0, [%sp + STACK_OFFSET + 8] /* Store Group */ + add %sp, STACK_OFFSET, %o5 /* IEU0 */ + stx %g0, [%sp + STACK_OFFSET + 16] /* Store Group */ + stx %g0, [%sp + STACK_OFFSET + 24] /* Store Group */ + +1: ldub [%o1], %o2 /* Load Group */ + brz,pn %o2, 2f /* CTI+IEU1 Group */ + srl %o2, 3, %o3 /* IEU0 */ + and %o3, 0x18, %o3 /* IEU0 Group */ + + and %o2, 0x3f, %o2 /* IEU1 */ + ldx [%o5 + %o3], %g2 /* Load Group */ + sllx %o4, %o2, %g1 /* IEU0 */ + add %o1, 1, %o1 /* IEU1 */ + + or %g2, %g1, %g2 /* IEU0 Group */ + ba,pt %xcc, 1b /* CTI */ + stx %g2, [%o5 + %o3] /* Store */ +2: andcc %o0, 7, %g0 /* IEU1 Group */ + + be,a,pt %xcc, 4f /* CTI */ + ldx [%o0], %o2 /* Load */ + ldub [%o0], %o2 /* Load Group */ +3: srl %o2, 3, %o3 /* IEU0 Group */ + + and %o2, 0x3f, %o2 /* IEU1 */ + and %o3, 0x18, %o3 /* IEU0 Group */ + ldx [%o5 + %o3], %g2 /* Load Group */ + sllx %o4, %o2, %g1 /* IEU0 */ + + add %o0, 1, %o0 /* IEU1 */ + andcc %g2, %g1, %g0 /* IEU1 Group */ + be,pn %xcc, 12f /* CTI */ + andcc %o0, 7, %g0 /* IEU1 Group */ + + bne,a,pt %icc, 3b /* CTI */ + ldub [%o0], %o2 /* Load */ + ldx [%o0], %o2 /* Load Group */ +4: srlx %o2, 59, %o3 /* IEU0 Group */ + + srlx %o2, 56, %g5 /* IEU0 Group */ +5: and %o3, 0x18, %o3 /* IEU1 */ + andcc %g5, 0x3f, %g5 /* IEU1 Group */ + ldx [%o5 + %o3], %g2 /* Load */ + + srlx %o2, 51, %o3 /* IEU0 */ + sllx %o4, %g5, %g1 /* IEU0 Group */ + srlx %o2, 48, %g5 /* IEU0 Group */ + andcc %g2, %g1, %g2 /* IEU1 */ + + be,pn %xcc, 13f /* CTI */ + and %o3, 0x18, %o3 /* IEU0 Group */ + and %g5, 0x3f, %g5 /* IEU1 */ + ldx [%o5 + %o3], %g2 /* Load Group */ + + srlx %o2, 43, %o3 /* IEU0 */ + sllx %o4, %g5, %g1 /* IEU0 Group */ + srlx %o2, 40, %g5 /* IEU0 Group */ + andcc %g2, %g1, %g2 /* IEU1 */ + + be,pn %xcc, 14f /* CTI */ + and %o3, 0x18, %o3 /* IEU0 Group */ + and %g5, 0x3f, %g5 /* IEU1 */ + ldx [%o5 + %o3], %g2 /* Load Group */ + + srlx %o2, 35, %o3 /* IEU0 */ + sllx %o4, %g5, %g1 /* IEU0 Group */ + srlx %o2, 32, %g5 /* IEU0 Group */ + andcc %g2, %g1, %g2 /* IEU1 */ + + be,pn %xcc, 15f /* CTI */ + and %o3, 0x18, %o3 /* IEU0 Group */ + and %g5, 0x3f, %g5 /* IEU1 */ + ldx [%o5 + %o3], %g2 /* Load Group */ + + srlx %o2, 27, %o3 /* IEU0 */ + sllx %o4, %g5, %g1 /* IEU0 Group */ + srlx %o2, 24, %g5 /* IEU0 Group */ + andcc %g2, %g1, %g2 /* IEU1 */ + + be,pn %xcc, 16f /* CTI */ + and %o3, 0x18, %o3 /* IEU0 Group */ + and %g5, 0x3f, %g5 /* IEU1 */ + ldx [%o5 + %o3], %g2 /* Load Group */ + + srlx %o2, 19, %o3 /* IEU0 */ + sllx %o4, %g5, %g1 /* IEU0 Group */ + srlx %o2, 16, %g5 /* IEU0 Group */ + andcc %g2, %g1, %g2 /* IEU1 */ + + be,pn %xcc, 17f /* CTI */ + and %o3, 0x18, %o3 /* IEU0 Group */ + and %g5, 0x3f, %g5 /* IEU1 */ + ldx [%o5 + %o3], %g2 /* Load Group */ + + srlx %o2, 11, %o3 /* IEU0 */ + sllx %o4, %g5, %g1 /* IEU0 Group */ + add %o0, 8, %o0 /* IEU1 */ + srlx %o2, 8, %g5 /* IEU0 Group */ + + andcc %g2, %g1, %g2 /* IEU1 */ + be,pn %xcc, 18f /* CTI */ + and %o3, 0x18, %o3 /* IEU0 Group */ + and %g5, 0x3f, %g5 /* IEU1 */ + + ldx [%o5 + %o3], %g2 /* Load Group */ + sllx %o4, %g5, %g1 /* IEU0 */ + mov %o2, %g5 /* IEU1 */ + srlx %o2, 3, %o3 /* IEU0 Group */ + + ldxa [%o0] ASI_PNF, %o2 /* Load */ + andcc %g2, %g1, %g2 /* IEU1 Group */ + be,pn %xcc, 19f /* CTI */ + and %o3, 0x18, %o3 /* IEU0 Group */ + + and %g5, 0x3f, %g5 /* IEU1 */ + ldx [%o5 + %o3], %g2 /* Load Group */ + sllx %o4, %g5, %g1 /* IEU0 */ + srlx %o2, 59, %o3 /* IEU0 Group */ + + andcc %g2, %g1, %g2 /* IEU1 Group */ + bne,pt %xcc, 5b /* CTI */ + srlx %o2, 56, %g5 /* IEU0 Group */ + sub %o0, 1, %o0 /* IEU1 */ + + add %sp, STACK_SIZE+32, %sp /* IEU0 Group */ + retl /* CTI+IEU1 Group */ + sub %o0, %g4, %o0 /* IEU0 */ + + .align 16 +19: sub %o0, 2, %o0 /* IEU1 */ + add %sp, STACK_SIZE+32, %sp /* IEU0 Group */ + retl /* CTI+IEU1 Group */ + sub %o0, %g4, %o0 /* IEU0 */ + +18: sub %o0, 3, %o0 /* IEU1 */ + add %sp, STACK_SIZE+32, %sp /* IEU0 Group */ + retl /* CTI+IEU1 Group */ + sub %o0, %g4, %o0 /* IEU0 */ + +17: add %o0, 4, %o0 /* IEU1 */ + add %sp, STACK_SIZE+32, %sp /* IEU0 Group */ + retl /* CTI+IEU1 Group */ + sub %o0, %g4, %o0 /* IEU0 */ + +16: add %o0, 3, %o0 /* IEU1 */ + add %sp, STACK_SIZE+32, %sp /* IEU0 Group */ + retl /* CTI+IEU1 Group */ + sub %o0, %g4, %o0 /* IEU0 */ + +15: add %o0, 2, %o0 /* IEU1 */ + add %sp, STACK_SIZE+32, %sp /* IEU0 Group */ + retl /* CTI+IEU1 Group */ + sub %o0, %g4, %o0 /* IEU0 */ + +14: add %o0, 1, %o0 /* IEU1 */ + add %sp, STACK_SIZE+32, %sp /* IEU0 Group */ + retl /* CTI+IEU1 Group */ + sub %o0, %g4, %o0 /* IEU0 */ + +13: add %sp, STACK_SIZE+32, %sp /* IEU1 */ + retl /* CTI+IEU1 Group */ + sub %o0, %g4, %o0 /* IEU0 */ + + .align 16 +12: sub %o0, 1, %o0 /* IEU0 Group */ + add %sp, STACK_SIZE+32, %sp /* IEU1 */ + retl /* CTI+IEU1 Group */ + sub %o0, %g4, %o0 /* IEU0 */ +END(strspn) +libc_hidden_builtin_def (strspn) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/sub_n.S b/REORG.TODO/sysdeps/sparc/sparc64/sub_n.S new file mode 100644 index 0000000000..006ae809c8 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/sub_n.S @@ -0,0 +1,54 @@ +/* SPARC v9 __mpn_sub_n -- Subtract two limb vectors of the same length > 0 + and store difference in a third limb vector. + + Copyright (C) 1995-2017 Free Software Foundation, Inc. + + This file is part of the GNU MP Library. + + The GNU MP Library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or (at your + option) any later version. + + The GNU MP Library is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with the GNU MP Library; see the file COPYING.LIB. If not, + see <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + +/* INPUT PARAMETERS + res_ptr %o0 + s1_ptr %o1 + s2_ptr %o2 + size %o3 */ + +ENTRY(__mpn_sub_n) + + sub %g0,%o3,%g5 + sllx %o3,3,%g1 + add %o1,%g1,%o1 ! make s1_ptr point at end + add %o2,%g1,%o2 ! make s2_ptr point at end + add %o0,%g1,%o0 ! make res_ptr point at end + mov 0,%o4 ! clear carry variable + sllx %g5,3,%o5 ! compute initial address index + +1: ldx [%o2+%o5],%g1 ! load s2 limb + add %g5,1,%g5 ! increment loop count + ldx [%o1+%o5],%o3 ! load s1 limb + addcc %g1,%o4,%g1 ! add s2 limb and carry variable + movcc %xcc,0,%o4 ! if carry-out, o4 was 1; clear it + subcc %o3,%g1,%g1 ! subtract s1 limb from sum + stx %g1,[%o0+%o5] ! store result + add %o5,8,%o5 ! increment address index + brnz,pt %g5,1b + movcs %xcc,1,%o4 ! if s1 subtract gave carry, record it + + retl + mov %o4,%o0 + +END(__mpn_sub_n) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/submul_1.S b/REORG.TODO/sysdeps/sparc/sparc64/submul_1.S new file mode 100644 index 0000000000..03e62b668b --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/submul_1.S @@ -0,0 +1,82 @@ +/* SPARC v9 __mpn_submul_1 -- Multiply a limb vector with a single limb and + subtract the product from a second limb vector. + + Copyright (C) 1996-2017 Free Software Foundation, Inc. + + This file is part of the GNU MP Library. + + The GNU MP Library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or (at your + option) any later version. + + The GNU MP Library is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with the GNU MP Library; see the file COPYING.LIB. If not, + see <http://www.gnu.org/licenses/>. */ + +#include <sysdep.h> + + +/* INPUT PARAMETERS + res_ptr o0 + s1_ptr o1 + size o2 + s2_limb o3 */ + +ENTRY(__mpn_submul_1) + save %sp,-192,%sp + + sub %g0,%i2,%o7 + mov 0,%o0 ! zero cy_limb + sllx %o7,3,%o7 + sethi %hi(0x80000000),%o2 + srl %i3,0,%o1 ! extract low 32 bits of s2_limb + sub %i1,%o7,%o3 + srlx %i3,32,%i3 ! extract high 32 bits of s2_limb + sub %i0,%o7,%o4 + add %o2,%o2,%o2 ! o2 = 0x100000000 + + ! hi ! + ! mid-1 ! + ! mid-2 ! + ! lo ! +1: + ldx [%o3+%o7],%g5 + srl %g5,0,%i0 ! zero hi bits + ldx [%o4+%o7],%l1 + srlx %g5,32,%g5 + mulx %o1,%i0,%i4 ! lo product + mulx %i3,%i0,%i1 ! mid-1 product + mulx %o1,%g5,%l2 ! mid-2 product + mulx %i3,%g5,%i5 ! hi product + srlx %i4,32,%i0 ! extract high 32 bits of lo product... + add %i1,%i0,%i1 ! ...and add it to the mid-1 product + addcc %i1,%l2,%i1 ! add mid products + mov 0,%l0 ! we need the carry from that add... + movcs %xcc,%o2,%l0 ! ...compute it and... + sllx %i1,32,%i0 ! align low bits of mid product + add %i5,%l0,%i5 ! ...add to bit 32 of the hi product + srl %i4,0,%g5 ! zero high 32 bits of lo product + add %i0,%g5,%i0 ! combine into low 64 bits of result + srlx %i1,32,%i1 ! extract high bits of mid product... + addcc %i0,%o0,%i0 ! add cy_limb to low 64 bits of result + add %i5,%i1,%i1 ! ...and add them to the high result + mov 0,%g5 + movcs %xcc,1,%g5 + subcc %l1,%i0,%i0 + stx %i0,[%o4+%o7] + add %g5,1,%l1 + movcs %xcc,%l1,%g5 + addcc %o7,8,%o7 + bne,pt %xcc,1b + add %i1,%g5,%o0 ! compute new cy_limb + + jmpl %i7+8, %g0 + restore %o0,%g0,%o0 + +END(__mpn_submul_1) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/tls-macros.h b/REORG.TODO/sysdeps/sparc/sparc64/tls-macros.h new file mode 100644 index 0000000000..bb0d8035fc --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/tls-macros.h @@ -0,0 +1,65 @@ +#define TLS_LE(x) \ + ({ int *__l; \ + asm ("sethi %%tle_hix22(" #x "), %0" : "=r" (__l)); \ + asm ("xor %1, %%tle_lox10(" #x "), %0" : "=r" (__l) : "r" (__l)); \ + asm ("add %%g7, %1, %0" : "=r" (__l) : "r" (__l)); \ + __l; }) + +#ifdef __PIC__ +# define TLS_LOAD_PIC \ + ({ long pc, got; \ + asm ("sethi %%hi(_GLOBAL_OFFSET_TABLE_-4), %1\n\t" \ + "rd %%pc, %0\n\t" \ + "add %1, %%lo(_GLOBAL_OFFSET_TABLE_+4), %1\n\t" \ + "add %1, %0, %1\n\t" \ + : "=r" (pc), "=r" (got)); \ + got; }) +#else +# define TLS_LOAD_PIC \ + ({ long got; \ + asm (".hidden _GLOBAL_OFFSET_TABLE_\n\t" \ + "sethi %%hi(_GLOBAL_OFFSET_TABLE_), %0\n\t" \ + "or %0, %%lo(_GLOBAL_OFFSET_TABLE_), %0" \ + : "=r" (got)); \ + got; }) +#endif + +#define TLS_IE(x) \ + ({ int *__l; \ + asm ("sethi %%tie_hi22(" #x "), %0" : "=r" (__l)); \ + asm ("add %1, %%tie_lo10(" #x "), %0" : "=r" (__l) : "r" (__l)); \ + asm ("ldx [%1 + %2], %0, %%tie_ldx(" #x ")" \ + : "=r" (__l) : "r" (TLS_LOAD_PIC), "r" (__l)); \ + asm ("add %%g7, %1, %0, %%tie_add(" #x ")" : "=r" (__l) : "r" (__l)); \ + __l; }) + +#define TLS_LD(x) \ + ({ int *__l; register void *__o0 asm ("%o0"); \ + long __o; \ + asm ("sethi %%tldm_hi22(" #x "), %0" : "=r" (__l)); \ + asm ("add %1, %%tldm_lo10(" #x "), %0" : "=r" (__l) : "r" (__l)); \ + asm ("add %1, %2, %0, %%tldm_add(" #x ")" \ + : "=r" (__o0) : "r" (TLS_LOAD_PIC), "r" (__l)); \ + asm ("call __tls_get_addr, %%tgd_call(" #x ")\n\t" \ + " nop" \ + : "=r" (__o0) : "0" (__o0) \ + : "g1", "g2", "g3", "g4", "g5", "g6", "o1", "o2", "o3", "o4", \ + "o5", "o7", "cc"); \ + asm ("sethi %%tldo_hix22(" #x "), %0" : "=r" (__o)); \ + asm ("xor %1, %%tldo_lox10(" #x "), %0" : "=r" (__o) : "r" (__o)); \ + asm ("add %1, %2, %0, %%tldo_add(" #x ")" : "=r" (__l) \ + : "r" (__o0), "r" (__o)); \ + __l; }) + +#define TLS_GD(x) \ + ({ int *__l; register void *__o0 asm ("%o0"); \ + asm ("sethi %%tgd_hi22(" #x "), %0" : "=r" (__l)); \ + asm ("add %1, %%tgd_lo10(" #x "), %0" : "=r" (__l) : "r" (__l)); \ + asm ("add %1, %2, %0, %%tgd_add(" #x ")" \ + : "=r" (__o0) : "r" (TLS_LOAD_PIC), "r" (__l)); \ + asm ("call __tls_get_addr, %%tgd_call(" #x ")\n\t" \ + " nop" \ + : "=r" (__o0) : "0" (__o0) \ + : "g1", "g2", "g3", "g4", "g5", "g6", "o1", "o2", "o3", "o4", \ + "o5", "o7", "cc"); \ + __o0; }) diff --git a/REORG.TODO/sysdeps/sparc/sparc64/tst-audit.h b/REORG.TODO/sysdeps/sparc/sparc64/tst-audit.h new file mode 100644 index 0000000000..f7123e0ef1 --- /dev/null +++ b/REORG.TODO/sysdeps/sparc/sparc64/tst-audit.h @@ -0,0 +1,25 @@ +/* Definitions for testing PLT entry/exit auditing. SPARC64 version. + + Copyright (C) 2012-2017 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + <http://www.gnu.org/licenses/>. */ + +#define pltenter la_sparc64_gnu_pltenter +#define pltexit la_sparc64_gnu_pltexit +#define La_regs La_sparc64_regs +#define La_retval La_sparc64_retval +#define int_retval lrv_reg[0] |